[Jdlraw-discussion] jdlRaw/Sources jdlDcRaw.cpp, 1.16, 1.17 jdlDcRaw.h, 1.10, 1.11 jdlDcRaw.i, 1.4,
Status: Beta
Brought to you by:
jdla
|
From: Jos De L. <jd...@us...> - 2010-05-01 07:38:32
|
Update of /cvsroot/jdlraw/jdlRaw/Sources In directory sfp-cvsdas-3.v30.ch3.sourceforge.com:/tmp/cvs-serv20264/Sources Modified Files: jdlDcRaw.cpp jdlDcRaw.h jdlDcRaw.i jdlMain.cpp jdlMain.h jdlSettings.cpp Log Message: *) Added OMP to the VNG demosaicing. The end result is mathematically not *exactly* equal to the non-OMP case. However, visually, I can't see any relevant deviation. (by the way : the reason is clear : the omp case is conceived as if the photo is divided in N photos on which VNG is applied, this gives differences on the boundaries versus one photo with VNG. But I believe it is correct) I added a user option to switch on/off OMP. Index: jdlMain.cpp =================================================================== RCS file: /cvsroot/jdlraw/jdlRaw/Sources/jdlMain.cpp,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** jdlMain.cpp 24 Apr 2010 17:30:51 -0000 1.25 --- jdlMain.cpp 1 May 2010 07:38:25 -0000 1.26 *************** *** 1764,1767 **** --- 1764,1773 ---- } + void jdlMain::CB_InterpolationOMP(const QVariant) { + m_TheProcessor->m_TriggerUpdateExifInfoAtEnd = 0; + m_TheProcessor->m_TriggerZoomFitAtEnd = 0; + m_TheProcessor->RunFixed(jdlProcessorPhase_Demosaic); + } + //////////////////////////////////////////////////////////////////////////////// // *************** *** 1859,1862 **** --- 1865,1869 ---- M_Dispatch(Interpolation) M_Dispatch(FourColorRGB) + M_Dispatch(InterpolationOMP) M_Dispatch(DCBIterations) M_Dispatch(DCBEnhance) Index: jdlDcRaw.i =================================================================== RCS file: /cvsroot/jdlraw/jdlRaw/Sources/jdlDcRaw.i,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** jdlDcRaw.i 24 Apr 2010 17:30:51 -0000 1.4 --- jdlDcRaw.i 1 May 2010 07:38:25 -0000 1.5 *************** *** 334,337 **** --- 334,346 ---- _("Mix the 2 green channels")}, + {"Fixed_Interpolation", + "InterpolationOMP", + jdlGT_Check, + 2, + 1, + 1, + _("Interpolation OMP"), + _("OMP parallelization used on interpolation")}, + #endif Index: jdlDcRaw.h =================================================================== RCS file: /cvsroot/jdlraw/jdlRaw/Sources/jdlDcRaw.h,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** jdlDcRaw.h 24 Apr 2010 17:30:51 -0000 1.10 --- jdlDcRaw.h 1 May 2010 07:38:25 -0000 1.11 *************** *** 176,179 **** --- 176,182 ---- short m_UserSetting_FourColorRGB; + // Whether or not to allow OMP on interpolation + short m_UserSetting_InterpolationOMP; + // Balance the greens before the interpolation short m_UserSetting_BalanceGreens; Index: jdlMain.h =================================================================== RCS file: /cvsroot/jdlraw/jdlRaw/Sources/jdlMain.h,v retrieving revision 1.10 retrieving revision 1.11 diff -C2 -d -r1.10 -r1.11 *** jdlMain.h 24 Apr 2010 17:30:51 -0000 1.10 --- jdlMain.h 1 May 2010 07:38:25 -0000 1.11 *************** *** 169,172 **** --- 169,173 ---- void CB_Interpolation(const QVariant); + void CB_InterpolationOMP(const QVariant); void CB_DCBIterations(const QVariant); void CB_DCBEnhance(const QVariant); Index: jdlSettings.cpp =================================================================== RCS file: /cvsroot/jdlraw/jdlRaw/Sources/jdlSettings.cpp,v retrieving revision 1.15 retrieving revision 1.16 diff -C2 -d -r1.15 -r1.16 *** jdlSettings.cpp 24 Apr 2010 17:30:51 -0000 1.15 --- jdlSettings.cpp 1 May 2010 07:38:25 -0000 1.16 *************** *** 1026,1029 **** --- 1026,1032 ---- TheDcRaw->m_UserSetting_FourColorRGB = GetInt("FourColorRGB"); + // Use OMP on interpolation + TheDcRaw->m_UserSetting_InterpolationOMP = GetInt("InterpolationOMP"); + // DCB settings TheDcRaw->m_UserSetting_DCBIterations = GetInt("DCBIterations"); Index: jdlDcRaw.cpp =================================================================== RCS file: /cvsroot/jdlraw/jdlRaw/Sources/jdlDcRaw.cpp,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** jdlDcRaw.cpp 27 Apr 2010 17:30:35 -0000 1.16 --- jdlDcRaw.cpp 1 May 2010 07:38:24 -0000 1.17 *************** *** 37,40 **** --- 37,41 ---- #include "jdlConstants.h" + #include <omp.h> // Macro fix for explicit fread returnvalue check. #define jdlfread(ptr,size,n,stream) \ *************** *** 59,62 **** --- 60,66 ---- } + // To assess impact on speed and correctness we make an extra + #define DCRAW_ENABLE_OMP + // The class. #define CLASS DcRaw:: *************** *** 3771,3776 **** // very inequal in workload. // The chunk size is introduced as to avoid too small fractions. ! const int Chunk = m_OutHeight/16; ! #pragma omp parallel for default(shared) schedule(dynamic,Chunk) for (uint16_t Row=0; Row < m_OutHeight; Row++) { --- 3775,3781 ---- // very inequal in workload. // The chunk size is introduced as to avoid too small fractions. ! #ifdef DCRAW_ENABLE_OMP ! #pragma omp parallel for default(shared) schedule(dynamic,m_OutHeight/16) ! #endif for (uint16_t Row=0; Row < m_OutHeight; Row++) { *************** *** 3845,3850 **** // The chunk size is introduced as to avoid too small fractions. const int c_DetailBorder = 2; ! const int Chunk = m_OutHeight/16; ! #pragma omp parallel for default(shared) schedule(dynamic,Chunk) for (uint16_t Row = 0; Row < m_OutHeight; Row++) { --- 3850,3856 ---- // The chunk size is introduced as to avoid too small fractions. const int c_DetailBorder = 2; ! #ifdef DCRAW_ENABLE_OMP ! #pragma omp parallel for default(shared) schedule(dynamic,m_OutHeight/16) ! #endif for (uint16_t Row = 0; Row < m_OutHeight; Row++) { *************** *** 3912,3916 **** // And now we correct it back for the increased exposure. // (but clipped stays clipped !) ! ClippedPixel[Color] = (uint16_t)(ClippedPixel[Color]* m_MinPreMulti); } --- 3918,3922 ---- // And now we correct it back for the increased exposure. // (but clipped stays clipped !) ! ClippedPixel[Color]= (uint16_t)(ClippedPixel[Color]* m_MinPreMulti); } *************** *** 3996,4000 **** } else { ClippedSaturation = ! 1.0 - (double)ClippedPixel[MinChannel] / ClippedPixel[MaxChannel]; } double ClippedHue; --- 4002,4006 ---- } else { ClippedSaturation = ! 1.0-(double)ClippedPixel[MinChannel] / ClippedPixel[MaxChannel]; } double ClippedHue; *************** *** 4066,4071 **** // The chunk size is introduced as to avoid too small fractions. const int c_DetailBorder = 2; ! const int Chunk = m_Height/16; ! #pragma omp parallel for default(shared) schedule(dynamic,Chunk) for (uint16_t Row=1; Row<m_Height-1; Row++) { --- 4072,4078 ---- // The chunk size is introduced as to avoid too small fractions. const int c_DetailBorder = 2; ! #ifdef DCRAW_ENABLE_OMP ! #pragma omp parallel for default(shared) schedule(dynamic,m_Height/16) ! #endif for (uint16_t Row=1; Row<m_Height-1; Row++) { *************** *** 4110,4115 **** // very inequal in workload. // The chunk size is introduced as to avoid too small fractions. ! // Have it from previous loop : const int Chunk = m_Height/16; ! #pragma omp parallel for default(shared) schedule(dynamic,Chunk) for (uint16_t Row=1; Row<m_Height-1; Row++) { --- 4117,4123 ---- // very inequal in workload. // The chunk size is introduced as to avoid too small fractions. ! #ifdef DCRAW_ENABLE_OMP ! #pragma omp parallel for default(shared) schedule(dynamic,m_Height/16) ! #endif for (uint16_t Row=1; Row<m_Height-1; Row++) { *************** *** 4179,4184 **** // The chunk size is introduced as to avoid too small fractions. const int c_DetailBorder = 2; ! const int Chunk = m_Height/16; ! #pragma omp parallel for default(shared) schedule(dynamic,Chunk) for (uint16_t Row = FC(1,0) >> 1; Row < m_Height; Row+=2) { --- 4187,4193 ---- // The chunk size is introduced as to avoid too small fractions. const int c_DetailBorder = 2; ! #ifdef DCRAW_ENABLE_OMP ! #pragma omp parallel for default(shared) schedule(dynamic,m_Height/16) ! #endif for (uint16_t Row = FC(1,0) >> 1; Row < m_Height; Row+=2) { *************** *** 4231,4265 **** void CLASS BorderInterpolate (uint16_t Border) { - const uint16_t c_DetailBorder = Border+2; // Probably more than enough margin. - - // We allocate with dynamic schedule as the "Detail" view can - // stop very early many of the rows/cols. So the chunks are - // very inequal in workload. - // The chunk size is introduced as to avoid too small fractions. - const int Chunk = m_Height/16; - #pragma omp parallel for default(shared) schedule(dynamic,Chunk) for (uint16_t Row=0; Row < m_Height; Row++) { - - if (m_Stop) { - // OpenMP cannot exit, but continue will go fast now :) - continue; - } - - if (m_UserSetting_Detail && - (Row + c_DetailBorder < m_UserSetting_DetailRowStart || - Row > m_UserSetting_DetailRowEnd + c_DetailBorder) ) { - continue; - } - uint32_t Offset = Row * m_Width; - for (uint16_t Col=0; Col < m_Width; Col++) { - - if (m_UserSetting_Detail && - (Col + c_DetailBorder < m_UserSetting_DetailColStart || - Col > m_UserSetting_DetailColEnd + c_DetailBorder) ) { - continue; - } - if (Col==Border && Row >= Border && Row < (m_Height-Border)) { // Jump to the right. --- 4240,4246 ---- *************** *** 4344,4355 **** // Real start of the linear interpolate. ! // XXX JDLA Check once more OPENMP !!! ! // I believe the last assignment Pix[ip[0]] is OK, but check once more. // We allocate with dynamic schedule as the "Detail" view can // stop very early many of the rows/cols. So the chunks are // very inequal in workload. // The chunk size is introduced as to avoid too small fractions. ! const int Chunk = m_Height/16; ! #pragma omp parallel for default(shared) schedule(dynamic,Chunk) for (uint16_t Row=1; Row < m_Height-1; Row++) { --- 4325,4339 ---- // Real start of the linear interpolate. ! // I believe a simple OPENMP loop is fine. ! // I had some concern about the last assignment Pix[ip[0]] ! // But I believe it is ok as the for (Row=1 ..) loop should ! // be runnable the other way around as well and hence parallelization is OK. // We allocate with dynamic schedule as the "Detail" view can // stop very early many of the rows/cols. So the chunks are // very inequal in workload. // The chunk size is introduced as to avoid too small fractions. ! #ifdef DCRAW_ENABLE_OMP ! #pragma omp parallel for default(shared) schedule(dynamic,m_Height/16) ! #endif for (uint16_t Row=1; Row < m_Height-1; Row++) { *************** *** 4436,4443 **** }, chood[] = { -1,-1, -1,0, -1,+1, 0,+1, +1,+1, +1,0, +1,-1, 0,-1 }; ! uint16_t (*brow[5])[4], *pix; ! int prow=7, pcol=1, *ip, *code[16][16], gval[8], gmin, gmax, sum[4]; int row, col, x, y, x1, x2, y1, y2, t, weight, grads, color, diag; ! int g, diff, thold, num, c; LinearInterpolate(); --- 4420,4426 ---- }, chood[] = { -1,-1, -1,0, -1,+1, 0,+1, +1,+1, +1,0, +1,-1, 0,-1 }; ! int prow=7, pcol=1, *ip, *code[16][16]; int row, col, x, y, x1, x2, y1, y2, t, weight, grads, color, diag; ! int g; LinearInterpolate(); *************** *** 4483,4565 **** } } - brow[4] = (uint16_t (*)[4]) CALLOC (m_Width*3, sizeof **brow); - merror (brow[4], "VNGInterpolate()"); - for (row=0; row < 3; row++) - brow[row] = brow[4] + row*m_Width; ! // Here starts the real interpolation. ! // XXX JDLA TODO Investigate if OpenMP parallelization is possible. ! for (row=2; row < m_Height-2; row++) { /* Do VNG interpolation */ ! ! if (m_Stop) { ! FREE (brow[4]); ! FREE (code[0][0]); ! return; ! } ! if (m_UserSetting_Detail && ! (row +c_DetailBorder < m_UserSetting_DetailRowStart || ! row > m_UserSetting_DetailRowEnd + c_DetailBorder) ) { ! continue; } ! for (col=2; col < m_Width-2; col++) { ! if (m_UserSetting_Detail && ! (col + c_DetailBorder < m_UserSetting_DetailColStart || ! col > m_UserSetting_DetailColEnd + c_DetailBorder) ) { ! continue; } ! pix = m_Image[row*m_Width+col]; ! ip = code[row & prow][col & pcol]; ! memset (gval, 0, sizeof gval); ! while ((g = ip[0]) != INT_MAX) { /* Calculate gradients */ ! diff = ABS(pix[g] - pix[ip[1]]) << ip[2]; ! gval[ip[3]] += diff; ! ip += 5; ! if ((g = ip[-1]) == -1) continue; ! gval[g] += diff; ! while ((g = *ip++) != -1) ! gval[g] += diff; ! } ! ip++; ! gmin = gmax = gval[0]; /* Choose a threshold */ ! for (g=1; g < 8; g++) { ! if (gmin > gval[g]) gmin = gval[g]; ! if (gmax < gval[g]) gmax = gval[g]; } ! if (gmax == 0) { ! memcpy (brow[2][col], pix, sizeof *m_Image); ! continue; } ! thold = gmin + (gmax >> 1); ! memset (sum, 0, sizeof sum); ! color = fc(row,col); ! for (num=g=0; g < 8; g++,ip+=2) { /* Average the neighbors */ ! if (gval[g] <= thold) { ! for (c=0; c < m_Colors; c++) ! if (c == color && ip[1]) ! sum[c] += (pix[c] + pix[ip[1]]) >> 1; ! else ! sum[c] += pix[ip[0] + c]; ! num++; ! } } ! for (c=0; c < m_Colors; c++) { /* Save to buffer */ ! t = pix[color]; ! if (c != color) ! t += (sum[c] - sum[color]) / num; ! brow[2][col][c] = CLIP(t); } } ! if (row > 3) /* Write buffer to image */ ! memcpy (m_Image[(row-2)*m_Width+2], brow[0]+2, (m_Width-4)*sizeof *m_Image); ! for (g=0; g < 4; g++) ! brow[(g-1) & 3] = brow[g]; ! } ! memcpy (m_Image[(row-2)*m_Width+2], brow[0]+2, (m_Width-4)*sizeof *m_Image); ! memcpy (m_Image[(row-1)*m_Width+2], brow[1]+2, (m_Width-4)*sizeof *m_Image); ! FREE (brow[4]); FREE (code[0][0]); } --- 4466,4611 ---- } } ! // OMP on/off via user ! #ifdef _OPENMP ! omp_set_num_threads(m_UserSetting_InterpolationOMP ? omp_get_max_threads():1); ! #endif ! // Some helping variables that are needed in OMP case. ! // But they are not sitting in the way for the non OMP case. ! int PreviousRow = -1; ! int RowInChunk = -1; ! ! #ifdef DCRAW_ENABLE_OMP ! #pragma omp parallel default(shared) ! #endif ! { ! uint16_t (*brow[5])[4]; ! brow[4] = (uint16_t (*)[4]) CALLOC (m_Width*3, sizeof **brow); ! merror (brow[4], "VNGInterpolate()"); ! for (short TmpRow=0; TmpRow < 3; TmpRow++) { ! brow[TmpRow] = brow[4] + TmpRow*m_Width; } ! // Here starts the real interpolation. ! // XXX JDLA I'm not fully sure the parallelization is theoretically OK. ! // It should be if the outer loop is direction independent, ! // which seems like a fair assumption ? ! // Remark the lastprivate on row as loopvariable used at the end. ! #ifdef DCRAW_ENABLE_OMP ! #pragma omp for lastprivate(row) firstprivate(PreviousRow,RowInChunk) \ ! schedule(dynamic,m_Height/16) ! #endif ! for (row=2; row < m_Height-2; row++) { /* Do VNG interpolation */ ! if (abs(row-PreviousRow)>1) { ! // This is the beginning of a new chunk. ! if (PreviousRow != -1) { ! // Still to finalize the previous chunk ! memcpy (m_Image[(PreviousRow-1)*m_Width+2], ! brow[0]+2, ! (m_Width-4)*sizeof *m_Image); ! memcpy (m_Image[PreviousRow*m_Width+2], ! brow[1]+2, ! (m_Width-4)*sizeof *m_Image); ! } ! RowInChunk = 0; } ! PreviousRow = row; ! RowInChunk++; ! ! if (m_Stop) { ! // We avoid stopping (OpenMP) but do a real quick loop ... ! continue; } ! ! if (m_UserSetting_Detail && ! (row +c_DetailBorder < m_UserSetting_DetailRowStart || ! row > m_UserSetting_DetailRowEnd + c_DetailBorder) ) { ! continue; } ! ! for (uint16_t col=2; col < m_Width-2; col++) { ! ! if (m_UserSetting_Detail && ! (col + c_DetailBorder < m_UserSetting_DetailColStart || ! col > m_UserSetting_DetailColEnd + c_DetailBorder) ) { ! continue; ! } ! ! uint16_t *pix = m_Image[row*m_Width+col]; ! int *ip = code[row & prow][col & pcol]; ! int gval[8]; ! memset (gval, 0, sizeof gval); ! int g; ! while ((g = ip[0]) != INT_MAX) { /* Calculate gradients */ ! int diff = ABS(pix[g] - pix[ip[1]]) << ip[2]; ! gval[ip[3]] += diff; ! ip += 5; ! if ((g = ip[-1]) == -1) continue; ! gval[g] += diff; ! while ((g = *ip++) != -1) ! gval[g] += diff; ! } ! ip++; ! int gmin = gval[0]; /* Choose a threshold */ ! int gmax = gval[0]; /* Choose a threshold */ ! for (g=1; g < 8; g++) { ! if (gmin > gval[g]) gmin = gval[g]; ! if (gmax < gval[g]) gmax = gval[g]; ! } ! if (gmax == 0) { ! memcpy (brow[2][col], pix, sizeof *m_Image); ! continue; ! } ! int thold = gmin + (gmax >> 1); ! int sum[4]; ! memset (sum, 0, sizeof sum); ! int color = fc(row,col); ! int num; ! for (num=g=0; g < 8; g++,ip+=2) { /* Average the neighbors */ ! if (gval[g] <= thold) { ! for (short c=0; c < m_Colors; c++) ! if (c == color && ip[1]) ! sum[c] += (pix[c] + pix[ip[1]]) >> 1; ! else ! sum[c] += pix[ip[0] + c]; ! num++; ! } ! } ! for (short c=0; c < m_Colors; c++) { /* Save to buffer */ ! int t = pix[color]; ! if (c != color) ! t += (sum[c] - sum[color]) / num; ! brow[2][col][c] = CLIP(t); ! } } ! // WAS if (row > 3) { /* Write buffer to image */ ! if (RowInChunk>2) { ! memcpy(m_Image[(row-2)*m_Width+2], ! brow[0]+2, ! (m_Width-4)*sizeof *m_Image); } + // Visually : shift brow one row. + for (int g=0; g < 4; g++) brow[(g-1) & 3] = brow[g]; } ! ! #ifdef _OPENMP ! int Tid = omp_get_thread_num(); ! #else ! int Tid = 0; ! #endif ! if (Tid == 0) { ! memcpy (m_Image[(row-2)*m_Width+2], ! brow[0]+2, ! (m_Width-4)*sizeof *m_Image); ! memcpy (m_Image[(row-1)*m_Width+2], ! brow[1]+2, ! (m_Width-4)*sizeof *m_Image); ! } ! ! FREE (brow[4]); ! } // End OMP parallel section FREE (code[0][0]); } |