|
From: Gordon K. <kin...@us...> - 2004-03-24 06:59:24
|
Update of /cvsroot/teem/teem/src/coil In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv15148 Modified Files: coil.h coreCoil.c enumsCoil.c methodsCoil.c realmethods.c scalarCoil.c tensorCoil.c Log Message: better load balancing, better conductance, just friggen better Index: coil.h =================================================================== RCS file: /cvsroot/teem/teem/src/coil/coil.h,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** coil.h 22 Mar 2004 21:40:11 -0000 1.1 --- coil.h 24 Mar 2004 06:48:50 -0000 1.2 *************** *** 72,77 **** coilMethodTypeUnknown, /* 0 */ coilMethodTypeTesting, /* 1: basically a no-op */ ! coilMethodTypeIsotropic, /* 2 */ ! coilMethodTypeAnisotropic, /* 3 */ coilMethodTypeModifiedCurvature, /* 4 */ coilMethodTypeCurvatureFlow, /* 5 */ --- 72,77 ---- coilMethodTypeUnknown, /* 0 */ coilMethodTypeTesting, /* 1: basically a no-op */ ! coilMethodTypeHomogeneous, /* 2 */ ! coilMethodTypePeronaMalik, /* 3 */ coilMethodTypeModifiedCurvature, /* 4 */ coilMethodTypeCurvatureFlow, /* 5 */ *************** *** 124,128 **** or something else (baseDim=1) */ /* all the available methods */ ! void (*filter[COIL_METHOD_TYPE_MAX+1])(coil_t *delta, coil_t *iv3, double spacing[3], double parm[COIL_PARMS_NUM]); --- 124,128 ---- or something else (baseDim=1) */ /* all the available methods */ ! void (*filter[COIL_METHOD_TYPE_MAX+1])(coil_t *delta, coil_t **iv3, double spacing[3], double parm[COIL_PARMS_NUM]); *************** *** 140,149 **** struct coilContext_t *cctx; /* parent's context */ airThread *thread; /* my thread */ ! int threadIdx, /* which thread am I */ ! startZ, endZ; /* my index range (inclusive) of slices */ ! coil_t *iv3; /* cache of local values, ordering same ! as in gage: any multiple values per ! voxel are the slowest axis, not the ! fastest */ void *returnPtr; /* for airThreadJoin */ } coilTask; --- 140,152 ---- struct coilContext_t *cctx; /* parent's context */ airThread *thread; /* my thread */ ! int threadIdx; /* which thread am I */ ! coil_t *_iv3, /* underlying value cache */ ! **iv3; /* short array of pointers into 2-D value ! caches, in which the order is based on ! the volume order: ! values, then Y, then Z */ ! /* how to fill iv3 */ ! void (*iv3Fill)(coil_t **iv3, coil_t *here, int radius, int valLen, ! int x0, int y0, int z0, int sizeX, int sizeY, int sizeZ); void *returnPtr; /* for airThreadJoin */ } coilTask; *************** *** 174,185 **** filtering result, and (2nd) the update values from the current iteration */ ! int finished; /* used to signal all threads to return */ coilTask **task; /* dynamically allocated array of tasks */ ! airThreadBarrier *filterBarrier, /* for when filtering is done, and the ! "update" values have been set */ ! *updateBarrier, /* after the update values have been ! applied to current values */ ! *finishBarrier; /* so that thread 0 can see if filtering should go onward, and set "finished" */ } coilContext; --- 177,195 ---- filtering result, and (2nd) the update values from the current iteration */ ! int finished, /* used to signal all threads to return */ ! nextSlice, /* global indicator of next slice needing ! to be processed, either in filter or ! in update stage. Stage is done when ! nextSlice == size[2] */ ! todoFilter, todoUpdate; /* flags to signal which is scheduled to ! come next, used as part of doling out ! slices to workers */ ! airThreadMutex *nextSliceMutex; /* mutex around nextSlice (and effectively, ! also the "todo" flags above) */ coilTask **task; /* dynamically allocated array of tasks */ ! airThreadBarrier *filterBarrier, /* so that thread 0 can see if filtering should go onward, and set "finished" */ + *updateBarrier; /* after the update values have been + applied to current values */ } coilContext; Index: tensorCoil.c =================================================================== RCS file: /cvsroot/teem/teem/src/coil/tensorCoil.c,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** tensorCoil.c 22 Mar 2004 21:40:11 -0000 1.1 --- tensorCoil.c 24 Mar 2004 06:48:50 -0000 1.2 *************** *** 21,25 **** void ! _coilKind7TensorFilterTesting(coil_t *delta, coil_t *iv3, double spacing[3], double parm[COIL_PARMS_NUM]) { --- 21,25 ---- void ! _coilKind7TensorFilterTesting(coil_t *delta, coil_t **iv3, double spacing[3], double parm[COIL_PARMS_NUM]) { *************** *** 34,40 **** void ! _coilKind7TensorFilterIsotropic(coil_t *delta, coil_t *iv3, ! double spacing[3], ! double parm[COIL_PARMS_NUM]) { } --- 34,40 ---- void ! _coilKind7TensorFilterHomogeneous(coil_t *delta, coil_t **iv3, ! double spacing[3], ! double parm[COIL_PARMS_NUM]) { } *************** *** 58,62 **** {NULL, _coilKind7TensorFilterTesting, ! _coilKind7TensorFilterIsotropic, NULL, NULL, --- 58,62 ---- {NULL, _coilKind7TensorFilterTesting, ! _coilKind7TensorFilterHomogeneous, NULL, NULL, Index: scalarCoil.c =================================================================== RCS file: /cvsroot/teem/teem/src/coil/scalarCoil.c,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** scalarCoil.c 22 Mar 2004 21:40:11 -0000 1.1 --- scalarCoil.c 24 Mar 2004 06:48:50 -0000 1.2 *************** *** 20,48 **** #include "coil.h" - /* - ** x ----> X - ** \ 0 1 2 - ** | \ 3 4 5 - ** | Y 6 7 8 - ** | - ** | 9 10 11 - ** Z 12 13 14 - ** 15 16 17 - ** - ** 18 19 20 - ** 21 22 23 - ** 24 25 26 - */ - coil_t ! _coilLaplacian3(coil_t *iv3, double spacing[3]) { ! return ((iv3[12] - 2*iv3[13] + iv3[14])/(spacing[0]*spacing[0]) ! + (iv3[10] - 2*iv3[13] + iv3[16])/(spacing[1]*spacing[1]) ! + (iv3[ 4] - 2*iv3[13] + iv3[22])/(spacing[2]*spacing[2])); } void ! _coilKindScalarFilterTesting(coil_t *delta, coil_t *iv3, double spacing[3], double parm[COIL_PARMS_NUM]) { --- 20,33 ---- #include "coil.h" coil_t ! _coilLaplacian3(coil_t **iv3, double spacing[3]) { ! return ( (iv3[0][4] - 2*iv3[1][4] + iv3[2][4])/(spacing[0]*spacing[0]) ! + (iv3[1][3] - 2*iv3[1][4] + iv3[1][5])/(spacing[1]*spacing[1]) ! + (iv3[1][1] - 2*iv3[1][4] + iv3[1][7])/(spacing[2]*spacing[2])); } void ! _coilKindScalarFilterTesting(coil_t *delta, coil_t **iv3, double spacing[3], double parm[COIL_PARMS_NUM]) { *************** *** 51,61 **** void ! _coilKindScalarFilterIsotropic(coil_t *delta, coil_t *iv3, ! double spacing[3], ! double parm[COIL_PARMS_NUM]) { ! coil_t lapl; ! lapl = _coilLaplacian3(iv3, spacing); ! delta[0] = parm[0]*lapl; } --- 36,117 ---- void ! _coilKindScalarFilterHomogeneous(coil_t *delta, coil_t **iv3, ! double spacing[3], ! double parm[COIL_PARMS_NUM]) { ! ! delta[0] = parm[0]*_coilLaplacian3(iv3, spacing); ! } ! /* ! ** x ----> X ! ** \ [0][0] [1][0] [2][0] ! ** | \ [0][1] [1][1] [2][1] ! ** | Y [0][2] [1][2] [2][2] ! ** | ! ** | [0][3] [1][3] [2][3] ! ** Z [0][4] [1][4] [2][4] ! ** [0][5] [1][5] [2][5] ! ** ! ** [0][6] [1][6] [2][6] ! ** [0][7] [1][7] [2][7] ! ** [0][8] [1][8] [2][8] ! */ ! ! void ! _coilKindScalarFilterPeronaMalik(coil_t *delta, coil_t **i, ! double spacing[3], ! double parm[COIL_PARMS_NUM]) { ! coil_t forwX[3], backX[3], forwY[3], backY[3], forwZ[3], backZ[3], ! KK, rspX, rspY, rspZ; ! ! /* reciprocals of spacings in X, Y, and Z */ ! rspX = 1.0/spacing[0]; ! rspY = 1.0/spacing[1]; ! rspZ = 1.0/spacing[2]; ! ! /* gradients at forward and backward X */ ! forwX[0] = rspX*(i[2][4] - i[1][4]); ! forwX[1] = rspY*(i[1][5] + i[2][5] - i[1][3] - i[2][3])/2; ! forwX[2] = rspZ*(i[1][7] + i[2][7] - i[1][1] - i[2][1])/2; ! backX[0] = rspX*(i[1][4] - i[0][4]); ! backX[1] = rspY*(i[0][5] + i[1][5] - i[0][3] - i[1][3])/2; ! backX[2] = rspZ*(i[0][7] + i[1][7] - i[0][1] - i[1][1])/2; ! ! /* gradients at forward and backward Y */ ! forwY[0] = rspX*(i[2][4] + i[2][5] - i[0][4] - i[0][5])/2; ! forwY[1] = rspY*(i[1][5] - i[1][4]); ! forwY[2] = rspZ*(i[1][7] + i[1][8] - i[1][1] - i[1][2])/2; ! backY[0] = rspX*(i[2][3] + i[2][4] - i[0][3] - i[0][4])/2; ! backY[1] = rspY*(i[1][4] - i[1][3]); ! backY[2] = rspZ*(i[1][6] + i[1][7] - i[1][0] - i[1][1])/2; ! ! /* gradients at forward and backward Z */ ! forwZ[0] = rspX*(i[2][4] + i[2][7] - i[0][4] - i[0][7])/2; ! forwZ[1] = rspY*(i[1][5] + i[1][8] - i[1][3] - i[1][6])/2; ! forwZ[2] = rspZ*(i[1][7] - i[1][4]); ! backZ[0] = rspX*(i[2][1] + i[2][4] - i[0][1] - i[0][4])/2; ! backZ[1] = rspY*(i[1][2] + i[1][5] - i[1][0] - i[1][3])/2; ! backZ[2] = rspZ*(i[1][4] - i[1][1]); ! ! /* compute fluxes */ ! KK = parm[1]*parm[1]; ! /* ! forwX[0] *= 1.0/(1.0 + ELL_3V_DOT(forwX, forwX)/KK); ! forwY[1] *= 1.0/(1.0 + ELL_3V_DOT(forwY, forwY)/KK); ! forwZ[2] *= 1.0/(1.0 + ELL_3V_DOT(forwZ, forwZ)/KK); ! backX[0] *= 1.0/(1.0 + ELL_3V_DOT(backX, backX)/KK); ! backY[1] *= 1.0/(1.0 + ELL_3V_DOT(backY, backY)/KK); ! backZ[2] *= 1.0/(1.0 + ELL_3V_DOT(backZ, backZ)/KK); ! */ ! forwX[0] *= exp(-0.5*ELL_3V_DOT(forwX, forwX)/KK); ! forwY[1] *= exp(-0.5*ELL_3V_DOT(forwY, forwY)/KK); ! forwZ[2] *= exp(-0.5*ELL_3V_DOT(forwZ, forwZ)/KK); ! backX[0] *= exp(-0.5*ELL_3V_DOT(backX, backX)/KK); ! backY[1] *= exp(-0.5*ELL_3V_DOT(backY, backY)/KK); ! backZ[2] *= exp(-0.5*ELL_3V_DOT(backZ, backZ)/KK); ! ! delta[0] = parm[0]*(rspX*(forwX[0] - backX[0]) ! + rspY*(forwY[1] - backY[1]) ! + rspZ*(forwZ[2] - backZ[2])); } *************** *** 72,77 **** {NULL, _coilKindScalarFilterTesting, ! _coilKindScalarFilterIsotropic, ! NULL, NULL, NULL}, --- 128,133 ---- {NULL, _coilKindScalarFilterTesting, ! _coilKindScalarFilterHomogeneous, ! _coilKindScalarFilterPeronaMalik, NULL, NULL}, Index: realmethods.c =================================================================== RCS file: /cvsroot/teem/teem/src/coil/realmethods.c,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** realmethods.c 23 Mar 2004 01:18:40 -0000 1.2 --- realmethods.c 24 Mar 2004 06:48:50 -0000 1.3 *************** *** 29,41 **** coilMethodTesting = &_coilMethodTesting; - const coilMethod ! _coilMethodIsotropic = { ! "isotropic", ! coilMethodTypeIsotropic, 1 }; const coilMethod* ! coilMethodIsotropic = &_coilMethodIsotropic; const coilMethod* --- 29,49 ---- coilMethodTesting = &_coilMethodTesting; const coilMethod ! _coilMethodHomogeneous = { ! "homogeneous", ! coilMethodTypeHomogeneous, 1 }; const coilMethod* ! coilMethodHomogeneous = &_coilMethodHomogeneous; ! ! const coilMethod ! _coilMethodPeronaMalik = { ! "perona-malik", ! coilMethodTypePeronaMalik, ! 2 ! }; ! const coilMethod* ! coilMethodPeronaMalik = &_coilMethodPeronaMalik; const coilMethod* *************** *** 43,48 **** NULL, &_coilMethodTesting, ! &_coilMethodIsotropic, ! NULL, NULL, NULL --- 51,56 ---- NULL, &_coilMethodTesting, ! &_coilMethodHomogeneous, ! &_coilMethodPeronaMalik, NULL, NULL Index: methodsCoil.c =================================================================== RCS file: /cvsroot/teem/teem/src/coil/methodsCoil.c,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** methodsCoil.c 23 Mar 2004 01:18:40 -0000 1.2 --- methodsCoil.c 24 Mar 2004 06:48:50 -0000 1.3 *************** *** 57,61 **** cctx->finished = AIR_FALSE; cctx->task = NULL; ! cctx->finishBarrier = NULL; cctx->filterBarrier = NULL; cctx->updateBarrier = NULL; --- 57,61 ---- cctx->finished = AIR_FALSE; cctx->task = NULL; ! cctx->nextSliceMutex = NULL; cctx->filterBarrier = NULL; cctx->updateBarrier = NULL; *************** *** 207,210 **** --- 207,211 ---- if (cctx) { + /* thread machinery destroyed with coilFinish() */ cctx->nvol = nrrdNuke(cctx->nvol); cctx = airFree(cctx); Index: enumsCoil.c =================================================================== RCS file: /cvsroot/teem/teem/src/coil/enumsCoil.c,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** enumsCoil.c 22 Mar 2004 21:40:11 -0000 1.1 --- enumsCoil.c 24 Mar 2004 06:48:50 -0000 1.2 *************** *** 24,29 **** "(unknown_method)", "testing", ! "isotropic", ! "anisotropic", "modified curvature", "curvature flow" --- 24,29 ---- "(unknown_method)", "testing", ! "homogeneous", ! "perona-malik", "modified curvature", "curvature flow" *************** *** 34,39 **** "unknown_method", "nothing, actually, just here for testing", ! "isotropic diffusion (Gaussian blurring)", ! "anisotropic diffusion (Perona-Malik)", "modified curvature diffusion", "curvature flow" --- 34,39 ---- "unknown_method", "nothing, actually, just here for testing", ! "homogenous isotropic diffusion (Gaussian blurring)", ! "Perona-Malik", "modified curvature diffusion", "curvature flow" *************** *** 43,48 **** _coilMethodTypeStrEqv[][AIR_STRLEN_SMALL] = { "test", "testing", ! "iso", "isotropic", ! "aniso", "anisotropic", "mcde", "flow", --- 43,48 ---- _coilMethodTypeStrEqv[][AIR_STRLEN_SMALL] = { "test", "testing", ! "homog", "homogeneous", ! "pm", "perona-malik", "mcde", "flow", *************** *** 53,58 **** _coilMethodTypeValEqv[] = { coilMethodTypeTesting, coilMethodTypeTesting, ! coilMethodTypeIsotropic, coilMethodTypeIsotropic, ! coilMethodTypeAnisotropic, coilMethodTypeAnisotropic, coilMethodTypeModifiedCurvature, coilMethodTypeCurvatureFlow --- 53,58 ---- _coilMethodTypeValEqv[] = { coilMethodTypeTesting, coilMethodTypeTesting, ! coilMethodTypeHomogeneous, coilMethodTypeHomogeneous, ! coilMethodTypePeronaMalik, coilMethodTypePeronaMalik, coilMethodTypeModifiedCurvature, coilMethodTypeCurvatureFlow Index: coreCoil.c =================================================================== RCS file: /cvsroot/teem/teem/src/coil/coreCoil.c,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** coreCoil.c 23 Mar 2004 01:18:40 -0000 1.2 --- coreCoil.c 24 Mar 2004 06:48:50 -0000 1.3 *************** *** 20,58 **** #include "coil.h" /* ** ** iv3 is: diam x diam x diam x valLen */ void ! _coilIv3Fill(coil_t *iv3, coil_t *here, int radius, int valLen, ! int x0, int y0, int z0, int sizeX, int sizeY, int sizeZ) { int diam, vi, /* value index */ xni, yni, zni, /* neighborhood (iv3) indices */ xvi, yvi, zvi; /* volume indices */ - /* this should be parameterized on both radius and valLen */ - /* this should shuffle values within iv3 when possible */ diam = 1 + 2*radius; ! for (zni=0; zni<diam; zni++) { ! zvi = AIR_CLAMP(0, zni-radius+z0, sizeZ-1) - z0; ! for (yni=0; yni<diam; yni++) { ! yvi = AIR_CLAMP(0, yni-radius+y0, sizeY-1) - y0; ! for (xni=0; xni<diam; xni++) { ! xvi = AIR_CLAMP(0, xni-radius+x0, sizeX-1) - x0; ! for (vi=0; vi<valLen; vi++) { ! iv3[xni + diam*(yni + diam*(zni + diam*vi))] = ! here[vi + valLen*(0 + 2*(xvi + sizeX*(yvi + sizeY*zvi)))]; ! } ! } } } ! return; } void _coilProcess(coilTask *task, int doFilter) { ! int xi, yi, zi, sizeX, sizeY, sizeZ, valLen, radius; coil_t *here; ! void (*filter)(coil_t *delta, coil_t *iv3, double spacing[3], double parm[COIL_PARMS_NUM]); --- 20,128 ---- #include "coil.h" + #define _COIL_IV3_FILL(radius, diam, valLen) \ + if (0 && x0) { \ + /* cycle through slices */ \ + tmp = iv3[0]; \ + for (xni=0; xni<diam-1; xni++) { \ + iv3[xni] = iv3[xni+1]; \ + } \ + iv3[diam-1] = tmp; \ + /* refill only newest one */ \ + xni = diam-1; \ + xvi = AIR_CLAMP(0, xni-radius+x0, sizeX-1) - x0; \ + for (zni=0; zni<diam; zni++) { \ + zvi = AIR_CLAMP(0, zni-radius+z0, sizeZ-1) - z0; \ + for (yni=0; yni<diam; yni++) { \ + yvi = AIR_CLAMP(0, yni-radius+y0, sizeY-1) - y0; \ + for (vi=0; vi<valLen; vi++) { \ + iv3[xni][vi + valLen*(yni + diam*zni)] = \ + here[vi + valLen*(0 + 2*(xvi + sizeX*(yvi + sizeY*zvi)))]; \ + } \ + } \ + } \ + } else { \ + /* have to re-fill entire thing */ \ + for (zni=0; zni<diam; zni++) { \ + zvi = AIR_CLAMP(0, zni-radius+z0, sizeZ-1) - z0; \ + for (yni=0; yni<diam; yni++) { \ + yvi = AIR_CLAMP(0, yni-radius+y0, sizeY-1) - y0; \ + for (xni=0; xni<diam; xni++) { \ + xvi = AIR_CLAMP(0, xni-radius+x0, sizeX-1) - x0; \ + for (vi=0; vi<valLen; vi++) { \ + iv3[xni][vi + valLen*(yni + diam*zni)] = \ + here[vi + valLen*(0 + 2*(xvi + sizeX*(yvi + sizeY*zvi)))]; \ + } \ + } \ + } \ + } \ + } + /* ** ** iv3 is: diam x diam x diam x valLen + ** + ** this should be parameterized on both radius and valLen */ void ! _coilIv3Fill_R_L(coil_t **iv3, coil_t *here, int radius, int valLen, ! int x0, int y0, int z0, int sizeX, int sizeY, int sizeZ) { int diam, vi, /* value index */ xni, yni, zni, /* neighborhood (iv3) indices */ xvi, yvi, zvi; /* volume indices */ + coil_t *tmp; diam = 1 + 2*radius; ! _COIL_IV3_FILL(radius, diam, valLen); ! return; ! } ! ! void ! _coilIv3Fill_1_1(coil_t **iv3, coil_t *here, int radius, int valLen, ! int x0, int y0, int z0, int sizeX, int sizeY, int sizeZ) { ! int vi, /* value index */ ! xni, yni, zni, /* neighborhood (iv3) indices */ ! xvi, yvi, zvi; /* volume indices */ ! coil_t *tmp; ! ! _COIL_IV3_FILL(1, 3, 1); ! return; ! } ! ! int ! _coilThisZGet(coilTask *task, int doFilter) { ! int thisZ, *thisFlag, *thatFlag; ! ! if (doFilter) { ! thisFlag = &(task->cctx->todoFilter); ! thatFlag = &(task->cctx->todoUpdate); ! } else { ! thisFlag = &(task->cctx->todoUpdate); ! thatFlag = &(task->cctx->todoFilter); ! } ! ! airThreadMutexLock(task->cctx->nextSliceMutex); ! if (task->cctx->nextSlice == task->cctx->size[2] ! && *thisFlag) { ! /* we're the first thread to start this phase */ ! task->cctx->nextSlice = 0; ! *thisFlag = AIR_FALSE; ! } ! thisZ = task->cctx->nextSlice; ! if (task->cctx->nextSlice < task->cctx->size[2]) { ! task->cctx->nextSlice++; ! if (task->cctx->nextSlice == task->cctx->size[2]) { ! /* we just grabbed the last slice of this phase */ ! *thatFlag = AIR_TRUE; } } ! airThreadMutexUnlock(task->cctx->nextSliceMutex); ! return thisZ; } void _coilProcess(coilTask *task, int doFilter) { ! int xi, yi, sizeX, sizeY, thisZ, sizeZ, valLen, radius; coil_t *here; ! void (*filter)(coil_t *delta, coil_t **iv3, double spacing[3], double parm[COIL_PARMS_NUM]); *************** *** 64,76 **** radius = task->cctx->radius; filter = task->cctx->kind->filter[task->cctx->method->type]; - here = (coil_t*)(task->cctx->nvol->data); - here += 2*valLen*sizeX*sizeY*task->startZ; if (doFilter) { ! for (zi=task->startZ; zi<=task->endZ; zi++) { for (yi=0; yi<sizeY; yi++) { for (xi=0; xi<sizeX; xi++) { ! _coilIv3Fill(task->iv3, here + 0*valLen, radius, valLen, ! xi, yi, zi, sizeX, sizeY, sizeZ); ! filter(here + 1*valLen, task->iv3, task->cctx->spacing, task->cctx->parm); here += 2*valLen; } --- 134,150 ---- radius = task->cctx->radius; filter = task->cctx->kind->filter[task->cctx->method->type]; if (doFilter) { ! while (1) { ! thisZ = _coilThisZGet(task, doFilter); ! if (thisZ == sizeZ) { ! break; ! } ! here = (coil_t*)(task->cctx->nvol->data) + 2*valLen*sizeX*sizeY*thisZ; for (yi=0; yi<sizeY; yi++) { for (xi=0; xi<sizeX; xi++) { ! task->iv3Fill(task->iv3, here + 0*valLen, radius, valLen, ! xi, yi, thisZ, sizeX, sizeY, sizeZ); ! filter(here + 1*valLen, task->iv3, ! task->cctx->spacing, task->cctx->parm); here += 2*valLen; } *************** *** 78,82 **** } } else { ! for (zi=task->startZ; zi<=task->endZ; zi++) { for (yi=0; yi<sizeY; yi++) { for (xi=0; xi<sizeX; xi++) { --- 152,161 ---- } } else { ! while (1) { ! thisZ = _coilThisZGet(task, doFilter); ! if (thisZ == sizeZ) { ! break; ! } ! here = (coil_t*)(task->cctx->nvol->data) + 2*valLen*sizeX*sizeY*thisZ; for (yi=0; yi<sizeY; yi++) { for (xi=0; xi<sizeX; xi++) { *************** *** 91,97 **** coilTask * ! _coilTaskNew(coilContext *cctx, int threadIdx, int sizeZ) { coilTask *task; ! int len, diam; len = cctx->kind->valLen; --- 170,176 ---- coilTask * ! _coilTaskNew(coilContext *cctx, int threadIdx) { coilTask *task; ! int len, diam, xi; len = cctx->kind->valLen; *************** *** 101,108 **** task->cctx = cctx; task->thread = airThreadNew(); - task->startZ = threadIdx*sizeZ/cctx->numThreads; - task->endZ = (threadIdx+1)*sizeZ/cctx->numThreads - 1; task->threadIdx = threadIdx; ! task->iv3 = (coil_t*)calloc(len*diam*diam*diam, sizeof(coil_t)); task->returnPtr = NULL; } --- 180,194 ---- task->cctx = cctx; task->thread = airThreadNew(); task->threadIdx = threadIdx; ! task->_iv3 = (coil_t*)calloc(len*diam*diam*diam, sizeof(coil_t)); ! task->iv3 = (coil_t**)calloc(diam, sizeof(coil_t*)); ! for (xi=0; xi<diam; xi++) { ! task->iv3[xi] = task->_iv3 + xi*len*diam*diam; ! } ! if (1 == cctx->radius && 1 == cctx->kind->valLen) { ! task->iv3Fill = _coilIv3Fill_1_1; ! } else { ! task->iv3Fill = _coilIv3Fill_R_L; ! } task->returnPtr = NULL; } *************** *** 115,118 **** --- 201,205 ---- if (task) { task->thread = airThreadNix(task->thread); + task->_iv3 = airFree(task->_iv3); task->iv3 = airFree(task->iv3); free(task); *************** *** 130,161 **** while (1) { /* wait until parent has set cctx->finished */ ! if (task->cctx->verbose) { fprintf(stderr, "%s(%d): waiting to check finished\n", me, task->threadIdx); } ! airThreadBarrierWait(task->cctx->finishBarrier); if (task->cctx->finished) { ! if (task->cctx->verbose) { fprintf(stderr, "%s(%d): done!\n", me, task->threadIdx); } break; } ! /* else there's work */ /* first: filter */ ! if (task->cctx->verbose) { fprintf(stderr, "%s(%d): filtering ... \n", me, task->threadIdx); } _coilProcess(task, AIR_TRUE); - airThreadBarrierWait(task->cctx->filterBarrier); /* second: update */ ! if (task->cctx->verbose) { fprintf(stderr, "%s(%d): updating ... \n", me, task->threadIdx); } _coilProcess(task, AIR_FALSE); ! airThreadBarrierWait(task->cctx->updateBarrier); } --- 217,248 ---- while (1) { /* wait until parent has set cctx->finished */ ! if (task->cctx->verbose > 1) { fprintf(stderr, "%s(%d): waiting to check finished\n", me, task->threadIdx); } ! airThreadBarrierWait(task->cctx->filterBarrier); if (task->cctx->finished) { ! if (task->cctx->verbose > 1) { fprintf(stderr, "%s(%d): done!\n", me, task->threadIdx); } break; } ! /* else there's work to do ... */ /* first: filter */ ! if (task->cctx->verbose > 1) { fprintf(stderr, "%s(%d): filtering ... \n", me, task->threadIdx); } _coilProcess(task, AIR_TRUE); /* second: update */ ! airThreadBarrierWait(task->cctx->updateBarrier); ! if (task->cctx->verbose > 1) { fprintf(stderr, "%s(%d): updating ... \n", me, task->threadIdx); } _coilProcess(task, AIR_FALSE); ! } *************** *** 182,186 **** cctx->task[0] = NULL; for (tidx=0; tidx<cctx->numThreads; tidx++) { ! cctx->task[tidx] = _coilTaskNew(cctx, tidx, cctx->size[2]); if (!(cctx->task[tidx])) { sprintf(err, "%s: couldn't allocate task %d", me, tidx); --- 269,273 ---- cctx->task[0] = NULL; for (tidx=0; tidx<cctx->numThreads; tidx++) { ! cctx->task[tidx] = _coilTaskNew(cctx, tidx); if (!(cctx->task[tidx])) { sprintf(err, "%s: couldn't allocate task %d", me, tidx); *************** *** 191,213 **** cctx->finished = AIR_FALSE; if (cctx->numThreads > 1) { ! cctx->finishBarrier = airThreadBarrierNew(cctx->numThreads); cctx->filterBarrier = airThreadBarrierNew(cctx->numThreads); cctx->updateBarrier = airThreadBarrierNew(cctx->numThreads); } - /* start threads 1 and up running (they won't get far) */ - if (cctx->verbose) { - fprintf(stderr, "%s: parent doing slices Z=[%d,%d]\n", me, - cctx->task[0]->startZ, cctx->task[0]->endZ); - } - for (tidx=1; tidx<cctx->numThreads; tidx++) { - if (cctx->verbose) { - fprintf(stderr, "%s: spawning thread %d (Z=[%d,%d])\n", me, tidx, - cctx->task[tidx]->startZ, cctx->task[tidx]->endZ); - } - airThreadStart(cctx->task[tidx]->thread, _coilWorker, - (void *)(cctx->task[tidx])); - } - /* initialize the values in cctx->nvol */ val = (coil_t*)(cctx->nvol->data); --- 278,286 ---- cctx->finished = AIR_FALSE; if (cctx->numThreads > 1) { ! cctx->nextSliceMutex = airThreadMutexNew(); cctx->filterBarrier = airThreadBarrierNew(cctx->numThreads); cctx->updateBarrier = airThreadBarrierNew(cctx->numThreads); } /* initialize the values in cctx->nvol */ val = (coil_t*)(cctx->nvol->data); *************** *** 226,243 **** } ! return 0; ! } ! ! int ! _coilPause(int huge) { ! int i, j, num; ! ! num = 0; ! for (i=1; i<huge; i++) { ! for (j=0; j<huge; j++) { ! num *= i; } } ! return num; } --- 299,317 ---- } ! /* start threads 1 and up running; they'll all hit filterBarrier */ ! for (tidx=1; tidx<cctx->numThreads; tidx++) { ! if (cctx->verbose > 1) { ! fprintf(stderr, "%s: spawning thread %d\n", me, tidx); } + airThreadStart(cctx->task[tidx]->thread, _coilWorker, + (void *)(cctx->task[tidx])); } ! ! /* set things as though we've just finished an update phase */ ! cctx->nextSlice = cctx->size[2]; ! cctx->todoFilter = AIR_TRUE; ! cctx->todoUpdate = AIR_FALSE; ! ! return 0; } *************** *** 253,256 **** --- 327,331 ---- char me[]="coilIterate", err[AIR_STRLEN_MED]; int iter; + double time0, time1; if (!cctx) { *************** *** 259,288 **** } for (iter=0; iter<numIterations; iter++) { if (cctx->verbose) { ! fprintf(stderr, "%s: starting iter %d\n", me, iter); } cctx->finished = AIR_FALSE; if (cctx->numThreads > 1) { ! airThreadBarrierWait(cctx->finishBarrier); } /* first: filter */ ! if (cctx->verbose) { fprintf(stderr, "%s: filtering ... \n", me); } _coilProcess(cctx->task[0], AIR_TRUE); - if (cctx->numThreads > 1) { - airThreadBarrierWait(cctx->filterBarrier); - } /* second: update */ ! if (cctx->verbose) { fprintf(stderr, "%s: updating ... \n", me); } - _coilProcess(cctx->task[0], AIR_FALSE); if (cctx->numThreads > 1) { airThreadBarrierWait(cctx->updateBarrier); } } return 0; --- 334,368 ---- } + time0 = airTime(); for (iter=0; iter<numIterations; iter++) { if (cctx->verbose) { ! fprintf(stderr, "%s: starting iter %d (of %d)\n", me, iter, ! numIterations); } cctx->finished = AIR_FALSE; if (cctx->numThreads > 1) { ! airThreadBarrierWait(cctx->filterBarrier); } /* first: filter */ ! if (cctx->verbose > 1) { fprintf(stderr, "%s: filtering ... \n", me); } _coilProcess(cctx->task[0], AIR_TRUE); /* second: update */ ! if (cctx->verbose > 1) { fprintf(stderr, "%s: updating ... \n", me); } if (cctx->numThreads > 1) { airThreadBarrierWait(cctx->updateBarrier); } + _coilProcess(cctx->task[0], AIR_FALSE); + + } + time1 = airTime(); + if (cctx->verbose) { + fprintf(stderr, "%s: elapsed time = %g (%g/iter)\n", me, + time1 - time0, (time1 - time0)/numIterations); } return 0; *************** *** 299,308 **** } ! if (cctx->verbose) { fprintf(stderr, "%s: finishing workers\n", me); } cctx->finished = AIR_TRUE; if (cctx->numThreads > 1) { ! airThreadBarrierWait(cctx->finishBarrier); } for (tidx=1; tidx<cctx->numThreads; tidx++) { --- 379,388 ---- } ! if (cctx->verbose > 1) { fprintf(stderr, "%s: finishing workers\n", me); } cctx->finished = AIR_TRUE; if (cctx->numThreads > 1) { ! airThreadBarrierWait(cctx->filterBarrier); } for (tidx=1; tidx<cctx->numThreads; tidx++) { *************** *** 315,318 **** --- 395,404 ---- cctx->task = airFree(cctx->task); + if (cctx->numThreads > 1) { + cctx->nextSliceMutex = airThreadMutexNix(cctx->nextSliceMutex); + cctx->filterBarrier = airThreadBarrierNix(cctx->filterBarrier); + cctx->updateBarrier = airThreadBarrierNix(cctx->updateBarrier); + } + return 0; } |