[Jdlraw-discussion] jdlRaw/Sources jdlDcRaw.cpp, 1.16, 1.17 jdlDcRaw.h, 1.10, 1.11 jdlDcRaw.i, 1.4,

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/jdlraw/jdlRaw/Sources
In directory sfp-cvsdas-3.v30.ch3.sourceforge.com:/tmp/cvs-serv20264/Sources

Modified Files:
	jdlDcRaw.cpp jdlDcRaw.h jdlDcRaw.i jdlMain.cpp jdlMain.h 
	jdlSettings.cpp 
Log Message:

*) Added OMP to the VNG demosaicing.
   The end result is mathematically not *exactly* equal to the non-OMP case.
   However, visually, I can't see any relevant deviation.
   (by the way : the reason is clear : the omp case is conceived as if
   the photo is divided in N photos on which VNG is applied, this gives
   differences on the boundaries versus one photo with VNG. But I believe
   it is correct)
   I added a user option to switch on/off OMP.

Index: jdlMain.cpp
===================================================================
RCS file: /cvsroot/jdlraw/jdlRaw/Sources/jdlMain.cpp,v
retrieving revision 1.25
retrieving revision 1.26
diff -C2 -d -r1.25 -r1.26
*** jdlMain.cpp	24 Apr 2010 17:30:51 -0000	1.25
--- jdlMain.cpp	1 May 2010 07:38:25 -0000	1.26
***************
*** 1764,1767 ****
--- 1764,1773 ----
  }

+ void jdlMain::CB_InterpolationOMP(const QVariant) {
+   m_TheProcessor->m_TriggerUpdateExifInfoAtEnd = 0;
+   m_TheProcessor->m_TriggerZoomFitAtEnd        = 0;
+   m_TheProcessor->RunFixed(jdlProcessorPhase_Demosaic);
+ }
+ 
  ////////////////////////////////////////////////////////////////////////////////
  //
***************
*** 1859,1862 ****
--- 1865,1869 ----
    M_Dispatch(Interpolation)
    M_Dispatch(FourColorRGB)
+   M_Dispatch(InterpolationOMP)
    M_Dispatch(DCBIterations)
    M_Dispatch(DCBEnhance)

Index: jdlDcRaw.i
===================================================================
RCS file: /cvsroot/jdlraw/jdlRaw/Sources/jdlDcRaw.i,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** jdlDcRaw.i	24 Apr 2010 17:30:51 -0000	1.4
--- jdlDcRaw.i	1 May 2010 07:38:25 -0000	1.5
***************
*** 334,337 ****
--- 334,346 ----
   _("Mix the 2 green channels")},

+ {"Fixed_Interpolation",
+  "InterpolationOMP",
+  jdlGT_Check,
+  2,
+  1,
+  1,
+  _("Interpolation OMP"),
+  _("OMP parallelization used on interpolation")},
+  
  #endif

Index: jdlDcRaw.h
===================================================================
RCS file: /cvsroot/jdlraw/jdlRaw/Sources/jdlDcRaw.h,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** jdlDcRaw.h	24 Apr 2010 17:30:51 -0000	1.10
--- jdlDcRaw.h	1 May 2010 07:38:25 -0000	1.11
***************
*** 176,179 ****
--- 176,182 ----
  short       m_UserSetting_FourColorRGB;

+ // Whether or not to allow OMP on interpolation
+ short       m_UserSetting_InterpolationOMP;
+ 
  // Balance the greens before the interpolation
  short       m_UserSetting_BalanceGreens;

Index: jdlMain.h
===================================================================
RCS file: /cvsroot/jdlraw/jdlRaw/Sources/jdlMain.h,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** jdlMain.h	24 Apr 2010 17:30:51 -0000	1.10
--- jdlMain.h	1 May 2010 07:38:25 -0000	1.11
***************
*** 169,172 ****
--- 169,173 ----

  void  CB_Interpolation(const QVariant);
+ void  CB_InterpolationOMP(const QVariant);
  void  CB_DCBIterations(const QVariant);
  void  CB_DCBEnhance(const QVariant);

Index: jdlSettings.cpp
===================================================================
RCS file: /cvsroot/jdlraw/jdlRaw/Sources/jdlSettings.cpp,v
retrieving revision 1.15
retrieving revision 1.16
diff -C2 -d -r1.15 -r1.16
*** jdlSettings.cpp	24 Apr 2010 17:30:51 -0000	1.15
--- jdlSettings.cpp	1 May 2010 07:38:25 -0000	1.16
***************
*** 1026,1029 ****
--- 1026,1032 ----
    TheDcRaw->m_UserSetting_FourColorRGB = GetInt("FourColorRGB");

+   // Use OMP on interpolation
+   TheDcRaw->m_UserSetting_InterpolationOMP = GetInt("InterpolationOMP");
+ 
    // DCB settings
    TheDcRaw->m_UserSetting_DCBIterations = GetInt("DCBIterations");

Index: jdlDcRaw.cpp
===================================================================
RCS file: /cvsroot/jdlraw/jdlRaw/Sources/jdlDcRaw.cpp,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** jdlDcRaw.cpp	27 Apr 2010 17:30:35 -0000	1.16
--- jdlDcRaw.cpp	1 May 2010 07:38:24 -0000	1.17
***************
*** 37,40 ****
--- 37,41 ----
  #include "jdlConstants.h"

+ #include <omp.h> 
  // Macro fix for explicit fread returnvalue check.
  #define jdlfread(ptr,size,n,stream)     \
***************
*** 59,62 ****
--- 60,66 ----
  }

+ // To assess impact on speed and correctness we make an extra
+ #define DCRAW_ENABLE_OMP
+ 
  // The class.
  #define CLASS DcRaw::
***************
*** 3771,3776 ****
    // very inequal in workload.
    // The chunk size is introduced as to avoid too small fractions.
!   const int Chunk = m_OutHeight/16;
!   #pragma omp parallel for default(shared) schedule(dynamic,Chunk)
    for (uint16_t Row=0; Row < m_OutHeight; Row++) {

--- 3775,3781 ----
    // very inequal in workload.
    // The chunk size is introduced as to avoid too small fractions.
!   #ifdef DCRAW_ENABLE_OMP
!   #pragma omp parallel for default(shared) schedule(dynamic,m_OutHeight/16)
!   #endif
    for (uint16_t Row=0; Row < m_OutHeight; Row++) {

***************
*** 3845,3850 ****
      // The chunk size is introduced as to avoid too small fractions.
      const int c_DetailBorder = 2;
!     const int Chunk = m_OutHeight/16;
!     #pragma omp parallel for default(shared) schedule(dynamic,Chunk)
      for (uint16_t Row = 0; Row < m_OutHeight; Row++) {

--- 3850,3856 ----
      // The chunk size is introduced as to avoid too small fractions.
      const int c_DetailBorder = 2;
!     #ifdef DCRAW_ENABLE_OMP
!     #pragma omp parallel for default(shared) schedule(dynamic,m_OutHeight/16)
!     #endif
      for (uint16_t Row = 0; Row < m_OutHeight; Row++) {

***************
*** 3912,3916 ****
              // And now we correct it back for the increased exposure.
              // (but clipped stays clipped !)
!             ClippedPixel[Color] = (uint16_t)(ClippedPixel[Color]* m_MinPreMulti);
            }

--- 3918,3922 ----
              // And now we correct it back for the increased exposure.
              // (but clipped stays clipped !)
!             ClippedPixel[Color]= (uint16_t)(ClippedPixel[Color]* m_MinPreMulti);
            }

***************
*** 3996,4000 ****
    	  } else {
                ClippedSaturation = 
!                 1.0 - (double)ClippedPixel[MinChannel] / ClippedPixel[MaxChannel];
              }
              double ClippedHue;
--- 4002,4006 ----
    	  } else {
                ClippedSaturation = 
!                 1.0-(double)ClippedPixel[MinChannel] / ClippedPixel[MaxChannel];
              }
              double ClippedHue;
***************
*** 4066,4071 ****
    // The chunk size is introduced as to avoid too small fractions.
    const int c_DetailBorder = 2;
!   const int Chunk = m_Height/16;
!   #pragma omp parallel for default(shared) schedule(dynamic,Chunk)
    for (uint16_t Row=1; Row<m_Height-1; Row++) {

--- 4072,4078 ----
    // The chunk size is introduced as to avoid too small fractions.
    const int c_DetailBorder = 2;
!   #ifdef DCRAW_ENABLE_OMP
!   #pragma omp parallel for default(shared) schedule(dynamic,m_Height/16)
!   #endif
    for (uint16_t Row=1; Row<m_Height-1; Row++) {

***************
*** 4110,4115 ****
    // very inequal in workload.
    // The chunk size is introduced as to avoid too small fractions.
!   // Have it from previous loop : const int Chunk = m_Height/16;
!   #pragma omp parallel for default(shared) schedule(dynamic,Chunk)
    for (uint16_t Row=1; Row<m_Height-1; Row++) {

--- 4117,4123 ----
    // very inequal in workload.
    // The chunk size is introduced as to avoid too small fractions.
!   #ifdef DCRAW_ENABLE_OMP
!   #pragma omp parallel for default(shared) schedule(dynamic,m_Height/16)
!   #endif
    for (uint16_t Row=1; Row<m_Height-1; Row++) {

***************
*** 4179,4184 ****
        // The chunk size is introduced as to avoid too small fractions.
        const int c_DetailBorder = 2;
!       const int Chunk = m_Height/16;
!       #pragma omp parallel for default(shared) schedule(dynamic,Chunk)
        for (uint16_t Row = FC(1,0) >> 1; Row < m_Height; Row+=2) {

--- 4187,4193 ----
        // The chunk size is introduced as to avoid too small fractions.
        const int c_DetailBorder = 2;
!       #ifdef DCRAW_ENABLE_OMP
!       #pragma omp parallel for default(shared) schedule(dynamic,m_Height/16)
!       #endif
        for (uint16_t Row = FC(1,0) >> 1; Row < m_Height; Row+=2) {

***************
*** 4231,4265 ****
  void CLASS BorderInterpolate (uint16_t Border) {

-   const uint16_t c_DetailBorder = Border+2; // Probably more than enough margin.
- 
-   // We allocate with dynamic schedule as the "Detail" view can
-   // stop very early many of the rows/cols. So the chunks are
-   // very inequal in workload.
-   // The chunk size is introduced as to avoid too small fractions.
-   const int Chunk = m_Height/16;
-   #pragma omp parallel for default(shared) schedule(dynamic,Chunk)
    for (uint16_t Row=0; Row < m_Height; Row++) {
- 
-     if (m_Stop) {
-       // OpenMP cannot exit, but continue will go fast now :)
-       continue;
-     }
- 
-     if (m_UserSetting_Detail &&
-          (Row + c_DetailBorder < m_UserSetting_DetailRowStart ||
-           Row > m_UserSetting_DetailRowEnd + c_DetailBorder) ) {
-       continue;
-     }
- 
      uint32_t Offset = Row * m_Width;
- 
      for (uint16_t Col=0; Col < m_Width; Col++) {
- 
-       if (m_UserSetting_Detail &&
-            (Col + c_DetailBorder < m_UserSetting_DetailColStart ||
-             Col > m_UserSetting_DetailColEnd + c_DetailBorder) ) {
-         continue;
-       }
- 
        if (Col==Border && Row >= Border && Row < (m_Height-Border)) {
          // Jump to the right.
--- 4240,4246 ----
***************
*** 4344,4355 ****

    // Real start of the linear interpolate.
!   // XXX JDLA Check once more OPENMP !!!
!   // I believe the last assignment Pix[ip[0]] is OK, but check once more.
    // We allocate with dynamic schedule as the "Detail" view can
    // stop very early many of the rows/cols. So the chunks are
    // very inequal in workload.
    // The chunk size is introduced as to avoid too small fractions.
!   const int Chunk = m_Height/16;
!   #pragma omp parallel for default(shared) schedule(dynamic,Chunk)
    for (uint16_t Row=1; Row < m_Height-1; Row++) {

--- 4325,4339 ----

    // Real start of the linear interpolate.
!   // I believe a simple OPENMP loop is fine.
!   // I had some concern about the last assignment Pix[ip[0]]
!   // But I believe it is ok as the for (Row=1 ..) loop should
!   // be runnable the other way around as well and hence parallelization is OK.
    // We allocate with dynamic schedule as the "Detail" view can
    // stop very early many of the rows/cols. So the chunks are
    // very inequal in workload.
    // The chunk size is introduced as to avoid too small fractions.
!   #ifdef DCRAW_ENABLE_OMP
!   #pragma omp parallel for default(shared) schedule(dynamic,m_Height/16)
!   #endif
    for (uint16_t Row=1; Row < m_Height-1; Row++) {

***************
*** 4436,4443 ****
    }, 
    chood[] = { -1,-1, -1,0, -1,+1, 0,+1, +1,+1, +1,0, +1,-1, 0,-1 };
!   uint16_t (*brow[5])[4], *pix;
!   int prow=7, pcol=1, *ip, *code[16][16], gval[8], gmin, gmax, sum[4];
    int row, col, x, y, x1, x2, y1, y2, t, weight, grads, color, diag;
!   int g, diff, thold, num, c;

    LinearInterpolate();
--- 4420,4426 ----
    }, 
    chood[] = { -1,-1, -1,0, -1,+1, 0,+1, +1,+1, +1,0, +1,-1, 0,-1 };
!   int prow=7, pcol=1, *ip, *code[16][16];
    int row, col, x, y, x1, x2, y1, y2, t, weight, grads, color, diag;
!   int g;

    LinearInterpolate();
***************
*** 4483,4565 ****
      }
    }
-   brow[4] = (uint16_t (*)[4]) CALLOC (m_Width*3, sizeof **brow);
-   merror (brow[4], "VNGInterpolate()");
-   for (row=0; row < 3; row++)
-     brow[row] = brow[4] + row*m_Width;

!   // Here starts the real interpolation.
!   // XXX JDLA TODO Investigate if OpenMP parallelization is possible.
!   for (row=2; row < m_Height-2; row++) {	/* Do VNG interpolation */
!     
!     if (m_Stop) {
!       FREE (brow[4]);
!       FREE (code[0][0]);
!       return;
!     }

!     if (m_UserSetting_Detail &&
!          (row +c_DetailBorder < m_UserSetting_DetailRowStart ||
!           row > m_UserSetting_DetailRowEnd + c_DetailBorder) ) {
!       continue;
      }

!     for (col=2; col < m_Width-2; col++) {

!       if (m_UserSetting_Detail &&
!            (col  + c_DetailBorder < m_UserSetting_DetailColStart ||
!             col > m_UserSetting_DetailColEnd + c_DetailBorder) ) {
!         continue;
        }

!       pix = m_Image[row*m_Width+col];
!       ip = code[row & prow][col & pcol];
!       memset (gval, 0, sizeof gval);
!       while ((g = ip[0]) != INT_MAX) {		/* Calculate gradients */
! 	diff = ABS(pix[g] - pix[ip[1]]) << ip[2];
! 	gval[ip[3]] += diff;
! 	ip += 5;
! 	if ((g = ip[-1]) == -1) continue;
! 	gval[g] += diff;
! 	while ((g = *ip++) != -1)
! 	  gval[g] += diff;
!       }
!       ip++;
!       gmin = gmax = gval[0];			/* Choose a threshold */
!       for (g=1; g < 8; g++) {
! 	if (gmin > gval[g]) gmin = gval[g];
! 	if (gmax < gval[g]) gmax = gval[g];
        }
!       if (gmax == 0) {
! 	memcpy (brow[2][col], pix, sizeof *m_Image);
! 	continue;
        }
!       thold = gmin + (gmax >> 1);
!       memset (sum, 0, sizeof sum);
!       color = fc(row,col);
!       for (num=g=0; g < 8; g++,ip+=2) {		/* Average the neighbors */
! 	if (gval[g] <= thold) {
! 	  for (c=0; c < m_Colors; c++)
! 	    if (c == color && ip[1])
! 	      sum[c] += (pix[c] + pix[ip[1]]) >> 1;
! 	    else
! 	      sum[c] += pix[ip[0] + c];
! 	  num++;
! 	}
        }
!       for (c=0; c < m_Colors; c++) {		/* Save to buffer */
! 	t = pix[color];
! 	if (c != color)
! 	  t += (sum[c] - sum[color]) / num;
! 	brow[2][col][c] = CLIP(t);
        }
      }
!     if (row > 3)				/* Write buffer to image */
!       memcpy (m_Image[(row-2)*m_Width+2], brow[0]+2, (m_Width-4)*sizeof *m_Image);
!     for (g=0; g < 4; g++)
!       brow[(g-1) & 3] = brow[g];
!   }
!   memcpy (m_Image[(row-2)*m_Width+2], brow[0]+2, (m_Width-4)*sizeof *m_Image);
!   memcpy (m_Image[(row-1)*m_Width+2], brow[1]+2, (m_Width-4)*sizeof *m_Image);
!   FREE (brow[4]);
    FREE (code[0][0]);
  }
--- 4466,4611 ----
      }
    }

!   // OMP on/off via user
!   #ifdef _OPENMP
!   omp_set_num_threads(m_UserSetting_InterpolationOMP ? omp_get_max_threads():1);
!   #endif

!   // Some helping variables that are needed in OMP case.
!   // But they are not sitting in the way for the non OMP case.
!   int PreviousRow = -1;
!   int RowInChunk  = -1;
! 
!   #ifdef DCRAW_ENABLE_OMP
!   #pragma omp parallel default(shared) 
!   #endif
!   {
!     uint16_t (*brow[5])[4];
!     brow[4] = (uint16_t (*)[4]) CALLOC (m_Width*3, sizeof **brow);
!     merror (brow[4], "VNGInterpolate()");
!     for (short TmpRow=0; TmpRow < 3; TmpRow++) {
!       brow[TmpRow] = brow[4] + TmpRow*m_Width;
      }

!     // Here starts the real interpolation.
!     // XXX JDLA I'm not fully sure the parallelization is theoretically OK.
!     // It should be if the outer loop is direction independent,
!     // which seems like a fair assumption ?
!     // Remark the lastprivate on row as loopvariable used at the end.
!     #ifdef DCRAW_ENABLE_OMP
!     #pragma omp for lastprivate(row) firstprivate(PreviousRow,RowInChunk) \
!      schedule(dynamic,m_Height/16)
!     #endif
!     for (row=2; row < m_Height-2; row++) {  /* Do VNG interpolation */

!       if (abs(row-PreviousRow)>1) {
!         // This is the beginning of a new chunk.
!         if (PreviousRow != -1) {
!           // Still to finalize the previous chunk
!           memcpy (m_Image[(PreviousRow-1)*m_Width+2], 
!                   brow[0]+2, 
!                   (m_Width-4)*sizeof *m_Image);
!           memcpy (m_Image[PreviousRow*m_Width+2], 
!                   brow[1]+2, 
!                   (m_Width-4)*sizeof *m_Image);
!         }
!         RowInChunk = 0;
        }

!       PreviousRow = row;
!       RowInChunk++;
!  
!       if (m_Stop) {
!         // We avoid stopping (OpenMP) but do a real quick loop ...
!         continue;
        }
!   
!       if (m_UserSetting_Detail &&
!            (row +c_DetailBorder < m_UserSetting_DetailRowStart ||
!             row > m_UserSetting_DetailRowEnd + c_DetailBorder) ) {
!         continue;
        }
!   
!       for (uint16_t col=2; col < m_Width-2; col++) {
!   
!         if (m_UserSetting_Detail &&
!              (col  + c_DetailBorder < m_UserSetting_DetailColStart ||
!               col > m_UserSetting_DetailColEnd + c_DetailBorder) ) {
!           continue;
!         }
!   
!         uint16_t *pix = m_Image[row*m_Width+col];
!         int *ip = code[row & prow][col & pcol];
!         int gval[8];
!         memset (gval, 0, sizeof gval);
!         int g;
!         while ((g = ip[0]) != INT_MAX) {	/* Calculate gradients */
!   	int diff = ABS(pix[g] - pix[ip[1]]) << ip[2];
!   	gval[ip[3]] += diff;
!   	ip += 5;
!   	if ((g = ip[-1]) == -1) continue;
!   	gval[g] += diff;
!   	while ((g = *ip++) != -1)
!   	  gval[g] += diff;
!         }
!         ip++;
!         int gmin = gval[0];			/* Choose a threshold */
!         int gmax = gval[0];			/* Choose a threshold */
!         for (g=1; g < 8; g++) {
!   	if (gmin > gval[g]) gmin = gval[g];
!   	if (gmax < gval[g]) gmax = gval[g];
!         }
!         if (gmax == 0) {
!   	memcpy (brow[2][col], pix, sizeof *m_Image);
!   	continue;
!         }
!         int thold = gmin + (gmax >> 1);
!         int sum[4];
!         memset (sum, 0, sizeof sum);
!         int color = fc(row,col);
!         int num;
!         for (num=g=0; g < 8; g++,ip+=2) {	/* Average the neighbors */
!     	if (gval[g] <= thold) {
!     	  for (short c=0; c < m_Colors; c++)
!     	    if (c == color && ip[1])
!     	      sum[c] += (pix[c] + pix[ip[1]]) >> 1;
!     	    else
!     	      sum[c] += pix[ip[0] + c];
!     	  num++;
!     	  }
!         }
!         for (short c=0; c < m_Colors; c++) {		/* Save to buffer */
!   	  int t = pix[color];
!   	  if (c != color)
!   	    t += (sum[c] - sum[color]) / num;
!   	  brow[2][col][c] = CLIP(t);
!         }
        }
!       // WAS if (row > 3) {		/* Write buffer to image */
!       if (RowInChunk>2) {
!         memcpy(m_Image[(row-2)*m_Width+2],
!                brow[0]+2,
!                (m_Width-4)*sizeof *m_Image);
        }
+       // Visually : shift brow one row.
+       for (int g=0; g < 4; g++) brow[(g-1) & 3] = brow[g];
      }
! 
!     #ifdef _OPENMP
!     int Tid = omp_get_thread_num();
!     #else
!     int Tid = 0;
!     #endif
!     if (Tid == 0) {
!       memcpy (m_Image[(row-2)*m_Width+2], 
!               brow[0]+2, 
!               (m_Width-4)*sizeof *m_Image);
!       memcpy (m_Image[(row-1)*m_Width+2], 
!               brow[1]+2, 
!               (m_Width-4)*sizeof *m_Image);
!     }
! 
!     FREE (brow[4]);
!   } // End OMP parallel section
    FREE (code[0][0]);
  }

[Jdlraw-discussion] jdlRaw/Sources jdlDcRaw.cpp, 1.16, 1.17 jdlDcRaw.h, 1.10, 1.11 jdlDcRaw.i, 1.4,

[Jdlraw-discussion] jdlRaw/Sources jdlDcRaw.cpp, 1.16, 1.17 jdlDcRaw.h, 1.10, 1.11 jdlDcRaw.i, 1.4, 1.5 jdlMain.cpp, 1.25, 1.26 jdlMain.h, 1.10, 1.11 jdlSettings.cpp, 1.15, 1.16