Additional optimizations for Phase One P45+ and P65+

2017-08-30 18:55:30 +02:00 · 2017-08-30 18:55:30 +02:00 · fd1909e4da
commit fd1909e4da
parent 61a69ba193
1 changed files with 207 additions and 196 deletions
--- a/rtengine/dcraw.cc
+++ b/rtengine/dcraw.cc
@ -1501,19 +1501,21 @@ void CLASS phase_one_flat_field (int is_float, int nc)

 void CLASS phase_one_correct()
 {
-  BENCHFUN
+BENCHFUN
    unsigned entries, tag, data, save, col, row, type;
    int len, i, j, k, cip, val[4], dev[4], sum, max;
    int head[9], diff, mindiff=INT_MAX, off_412=0;
-  static const signed char dir[12][2] =
-    { {-1,-1}, {-1,1}, {1,-1}, {1,1}, {-2,0}, {0,-2}, {0,2}, {2,0},
-      {-2,-2}, {-2,2}, {2,-2}, {2,2} };
+    static const signed char dir[12][2] = { {-1,-1}, {-1,1}, {1,-1}, {1,1}, {-2,0}, {0,-2}, {0,2}, {2,0}, {-2,-2}, {-2,2}, {2,-2}, {2,2} };
    float poly[8], num, cfrac, frac, mult[2], *yval[2];
    ushort *xval[2];
    int qmult_applied = 0, qlin_applied = 0;

-  if (half_size || !meta_length) return;
-  if (verbose) fprintf (stderr,_("Phase One correction...\n"));
+    if (half_size || !meta_length) {
+        return;
+    }
+    if (verbose) {
+        fprintf (stderr,_("Phase One correction...\n"));
+    }
    fseek (ifp, meta_offset, SEEK_SET);
    order = get2();
    fseek (ifp, 6, SEEK_CUR);
@ -1526,32 +1528,41 @@ void CLASS phase_one_correct()
        save = ftell(ifp);
        fseek (ifp, meta_offset+data, SEEK_SET);
        if (tag == 0x419) {				/* Polynomial curve */
-      for (get4(), i=0; i < 8; i++)
+            for (get4(), i=0; i < 8; i++) {
                poly[i] = getreal(11);
+            }
            poly[3] += (ph1.tag_210 - poly[7]) * poly[6] + 1;
            for (i=0; i < 0x10000; i++) {
                num = (poly[5]*i + poly[3])*i + poly[1];
                curve[i] = LIM(num,0,65535);
-      } goto apply;				/* apply to right half */
+            }
+            goto apply;				/* apply to right half */
        } else if (tag == 0x41a) {			/* Polynomial curve */
-      for (i=0; i < 4; i++)
+            for (i=0; i < 4; i++) {
                poly[i] = getreal(11);
+            }
            for (i=0; i < 0x10000; i++) {
-	for (num=0, j=4; j--; )
+                for (num=0, j=4; j--;) {
                    num = num * i + poly[j];
+                }
            curve[i] = LIM(num+i,0,65535);
-      } apply:					/* apply to whole image */
-      for (row=0; row < raw_height; row++)
-	for (col = (tag & 1)*ph1.split_col; col < raw_width; col++)
+            }
+            apply:					/* apply to whole image */
+            #pragma omp parallel for schedule(dynamic,16)
+            for (int row=0; row < raw_height; row++) {
+                for (int col = (tag & 1)*ph1.split_col; col < raw_width; col++) {
                    RAW(row,col) = curve[RAW(row,col)];
+                }
+            }
        } else if (tag == 0x400) {			/* Sensor defects */
            while ((len -= 8) >= 0) {
 	            col  = get2();
 	            row  = get2();
-	type = get2(); get2();
+	            type = get2();
+	            get2();
 	            if (col >= raw_width) continue;
-	if (type == 131 || type == 137)		/* Bad column */
-	  for (row=0; row < raw_height; row++)
+	            if (type == 131 || type == 137) {		/* Bad column */
+                    for (row=0; row < raw_height; row++) {
                        if (FC(row-top_margin,col-left_margin) == 1) {
                            for (sum=i=0; i < 4; i++)
                                sum += val[i] = raw (row+dir[i][0], col+dir[i][1]);
@ -1563,10 +1574,10 @@ void CLASS phase_one_correct()
                        } else {
                            for (sum=0, i=8; i < 12; i++)
                                sum += raw (row+dir[i][0], col+dir[i][1]);
-	      RAW(row,col) = 0.5 + sum * 0.0732233 +
-		(raw(row,col-2) + raw(row,col+2)) * 0.3535534;
+                            RAW(row,col) = 0.5 + sum * 0.0732233 + (raw(row,col-2) + raw(row,col+2)) * 0.3535534;
                        }
-	else if (type == 129) {			/* Bad pixel */
+                    }
+	            } else if (type == 129) {			/* Bad pixel */
                    if (row >= raw_height) continue;
                    j = (FC(row-top_margin,col-left_margin) != 1) * 4;
                    for (sum=0, i=j; i < j+8; i++)
@ -1612,10 +1623,9 @@ void CLASS phase_one_correct()
 	                cx[17] = cf[17] = ((unsigned) ref[15] * 65535) / lc[qr][qc][15];
 	                cx[18] = cf[18] = 65535;
 	                cubic_spline(cx, cf, 19);
-	  for (row = (qr ? ph1.split_row : 0);
-	       row < (qr ? raw_height : ph1.split_row); row++)
-	    for (col = (qc ? ph1.split_col : 0);
-		 col < (qc ? raw_width : ph1.split_col); col++)
+	                #pragma omp parallel for schedule(dynamic,16)
+                    for (int row = (qr ? ph1.split_row : 0); row < (qr ? raw_height : ph1.split_row); row++)
+                        for (int col = (qc ? ph1.split_col : 0); col < (qc ? raw_width : ph1.split_col); col++)
                            RAW(row,col) = curve[RAW(row,col)];
 	            }
            }
@ -1630,11 +1640,13 @@ void CLASS phase_one_correct()
            qmult[1][0] = 1.0 + getreal(11);
            get4(); get4(); get4();
            qmult[1][1] = 1.0 + getreal(11);
-      for (row=0; row < raw_height; row++)
-	for (col=0; col < raw_width; col++) {
-	  i = qmult[row >= ph1.split_row][col >= ph1.split_col] * RAW(row,col);
+            #pragma omp parallel for schedule(dynamic,16)
+            for (int row=0; row < raw_height; row++) {
+                for (int col=0; col < raw_width; col++) {
+                    int i = qmult[row >= ph1.split_row][col >= ph1.split_col] * RAW(row,col);
                    RAW(row,col) = LIM(i,0,65535);
                }
+            }
            qmult_applied = 1;
        } else if (tag == 0x431 && !qmult_applied) { /* Quadrant combined */
            ushort lc[2][2][7], ref[7];
@ -1655,10 +1667,8 @@ void CLASS phase_one_correct()
 	                cx[0] = cf[0] = 0;
 	                cx[8] = cf[8] = 65535;
 	                cubic_spline(cx, cf, 9);
-	  for (row = (qr ? ph1.split_row : 0);
-	       row < (qr ? raw_height : ph1.split_row); row++)
-	    for (col = (qc ? ph1.split_col : 0);
-		 col < (qc ? raw_width : ph1.split_col); col++)
+	                for (row = (qr ? ph1.split_row : 0); row < (qr ? raw_height : ph1.split_row); row++)
+	                    for (col = (qc ? ph1.split_col : 0); col < (qc ? raw_width : ph1.split_col); col++)
 	                        RAW(row,col) = curve[RAW(row,col)];
                }
            }
@ -1669,7 +1679,8 @@ void CLASS phase_one_correct()
    }
    if (off_412) {
        fseek (ifp, off_412, SEEK_SET);
-    for (i=0; i < 9; i++) head[i] = get4() & 0x7fff;
+        for (i=0; i < 9; i++)
+            head[i] = get4() & 0x7fff;
        yval[0] = (float *) calloc (head[1]*head[3] + head[2]*head[4], 6);
        merror (yval[0], "phase_one_correct()");
        yval[1] = (float  *) (yval[0] + head[1]*head[3]);
@ -1689,9 +1700,9 @@ void CLASS phase_one_correct()
 	            num = RAW(row,col) * 0.5;
 	            for (i=cip; i < cip+2; i++) {
 	                for (k=j=0; j < head[1]; j++)
-	    if (num < xval[0][k = head[1]*i+j]) break;
-	  frac = (j == 0 || j == head[1]) ? 0 :
-		(xval[0][k] - num) / (xval[0][k] - xval[0][k-1]);
+	                    if (num < xval[0][k = head[1]*i+j])
+	                        break;
+	                frac = (j == 0 || j == head[1]) ? 0 : (xval[0][k] - num) / (xval[0][k] - xval[0][k-1]);
 	                mult[i-cip] = yval[0][k-1] * frac + yval[0][k] * (1-frac);
 	            }
 	            i = ((mult[0] * (1-cfrac) + mult[1] * cfrac) * row + num) * 2;