00001
00009 #include "oda.h"
00010 #include "parUtils.h"
00011 #include "seqUtils.h"
00012 #include "colors.h"
00013 #include "nodeAndRanks.h"
00014 #include "testUtils.h"
00015
00016 #ifdef __DEBUG__
00017 #ifndef __DEBUG_DA_NLIST__
00018 #define __DEBUG_DA_NLIST__
00019 #endif
00020 #endif
00021
00022 #ifdef __DEBUG_DA__
00023 #ifndef __DEBUG_DA_NLIST__
00024 #define __DEBUG_DA_NLIST__
00025 #endif
00026 #endif
00027
00028 #ifdef __DEBUG_DA_NLIST__
00029 #ifndef __MEASURE_BUILD_NLIST__
00030 #define __MEASURE_BUILD_NLIST__
00031 #endif
00032 #endif
00033
00034
00035
00036 namespace ot {
00037
00038 #define CHECK_FAST_MAX_LOWER_BOUND(arr,key,fastIdx,fastResult) { \
00039 unsigned int tmpMlbIdx;\
00040 bool tmpMlbResult;\
00041 tmpMlbResult = seq::maxLowerBound<ot::TreeNode> (arr, key, tmpMlbIdx,NULL,NULL);\
00042 assert(tmpMlbResult == fastResult);\
00043 assert(tmpMlbIdx == fastIdx);\
00044 }
00045
00046
00047 void DA::buildNodeList(std::vector<ot::TreeNode> &in) {
00048 #ifdef __PROF_WITH_BARRIER__
00049 MPI_Barrier(m_mpiCommActive);
00050 #endif
00051 PROF_BUILD_NLIST_BEGIN
00052
00053
00054
00055
00056
00057
00058 unsigned int nelem = m_uiPreGhostElementSize + m_uiElementSize;
00059
00060 std::vector<unsigned int> nlist;
00061
00062
00063
00064
00065
00066
00067 for(unsigned int numFullLoopCtr = 0; numFullLoopCtr < 2; numFullLoopCtr++) {
00068
00069
00070 std::vector<ot::TreeNode> primaryKeys;
00071 std::vector<ot::TreeNode> secondaryKeys;
00072 std::vector<ot::TreeNode> extraAtEnd;
00073
00074 #ifdef __DEBUG_DA_NLIST__
00075 MPI_Barrier(m_mpiCommActive);
00076 std::vector<ot:: TreeNode > checkSecondRing;
00077 std::vector<ot::TreeNode> chkMissedPrimary;
00078 #endif
00079
00080 unsigned int iLoopSt,iLoopEnd;
00081 if( numFullLoopCtr == 0) {
00082
00083 iLoopSt = 0;
00084 iLoopEnd = m_uiPreGhostElementSize;
00085 }else {
00086
00087
00088
00089 iLoopSt = m_uiPreGhostElementSize;
00090 iLoopEnd = nelem;
00091 }
00092
00093 #ifdef __DEBUG_DA_NLIST__
00094 MPI_Barrier(m_mpiCommActive);
00095 if(!m_iRankActive) {
00096 std::cout<<"numFullLoopCtr: "<<numFullLoopCtr<<std::endl;
00097 }
00098 std::cout<<m_iRankActive<<": preElemSz: "<<m_uiPreGhostElementSize
00099 <<" elemBeg: "<<m_uiElementBegin<<" elemEnd: "<<m_uiElementEnd
00100 <<" postGhostBegin: "<<m_uiPostGhostBegin
00101 <<" locBufferSz: "<<m_uiLocalBufferSize
00102 <<" nelem: "<<nelem
00103 <<" iLoopSt: "<<iLoopSt
00104 <<" iLoopEnd: "<<iLoopEnd<<std::endl;
00105 assert(m_uiElementBegin < m_uiPostGhostBegin);
00106 std::cout<<m_iRankActive<<" my First Octant(elem/Bnd): "<<in[m_uiElementBegin]<<std::endl;
00107 MPI_Barrier(m_mpiCommActive);
00108 #endif
00109
00110
00111
00112
00113
00114
00115 if( numFullLoopCtr == 0) {
00116 nlist.resize(8*iLoopEnd);
00117 m_ucpLutMasks.resize(2*iLoopEnd);
00118 }
00119
00120
00121 for (unsigned int i = iLoopSt; i < iLoopEnd; i++) {
00122
00123 m_ucpLutMasks[2*i + 1] = 0;
00124
00125 std::vector<ot::TreeNode> nodeLocations(8);
00126 std::vector<ot::TreeNode> parNodeLocations(8);
00127
00128 unsigned int d = in[i].getLevel();
00129 unsigned int x = in[i].getX();
00130 unsigned int y = in[i].getY();
00131 unsigned int z = in[i].getZ();
00132
00133
00134 unsigned int parX = ( ( x >> ( m_uiMaxDepth - d + 1 ) ) << ( m_uiMaxDepth - d + 1 ) );
00135 unsigned int parY = ( ( y >> ( m_uiMaxDepth - d + 1 ) ) << ( m_uiMaxDepth - d + 1 ) );
00136 unsigned int parZ = ( ( z >> ( m_uiMaxDepth - d + 1 ) ) << ( m_uiMaxDepth - d + 1 ) );
00137
00138 unsigned int sz = 1u << (m_uiMaxDepth - d);
00139 unsigned int len_par = (unsigned int)(1u<<( m_uiMaxDepth - d +1 ) );
00140
00141 unsigned int a = x % len_par;
00142 unsigned int b = y % len_par;
00143 unsigned int c = z % len_par;
00144
00145 a /= sz;
00146 b /= sz;
00147 c /= sz;
00148
00149 unsigned int ch_num = (4*c + 2*b + a);
00150
00151 #ifdef __DEBUG_DA_NLIST__
00152 if ( !ch_num || (ch_num==7) ) {
00153 if ( !(in[i].getFlag() & ot::TreeNode::NODE) ) {
00154 std::cerr << RED"Nodes are marked wrongly "NRM << std::endl;
00155 assert(false);
00156 }
00157 }
00158 #endif
00159
00160 bool found[8];
00161
00162 for (unsigned int k = 0; k < 8; k++) {
00163 nlist[8*i + k] = m_uiLocalBufferSize;
00164 found[k] = false;
00165 }
00166
00167
00168 if(numFullLoopCtr == 1) {
00169 unsigned int idnx, idny, idnz;
00170
00171
00172
00173
00174
00175
00176 bool foundNegX=false, foundNegY=false, foundNegZ=false;
00177
00178
00179
00180 if (x) {
00181 ot::TreeNode knx( x-1, y, z, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00182 foundNegX = seq::maxLowerBound<ot::TreeNode> (in, knx, idnx,NULL,&i);
00183 #ifdef __DEBUG_DA_NLIST__
00184 CHECK_FAST_MAX_LOWER_BOUND(in,knx,idnx,foundNegX)
00185 #endif
00186
00187 if ( foundNegX && (!( (in[idnx].isAncestor(knx) ) || (in[idnx] == knx) )) ) {
00188 foundNegX=false;
00189 }
00190 if ( foundNegX && (m_ucpLutMasks[2*idnx+1] == ot::DA_FLAGS::FOREIGN) ) {
00191 foundNegX=false;
00192 }
00193 }
00194
00195
00196 if (y) {
00197 ot::TreeNode kny( x, y-1, z, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00198 foundNegY = seq::maxLowerBound<ot::TreeNode> (in, kny, idny,NULL,&i) ;
00199 #ifdef __DEBUG_DA_NLIST__
00200 CHECK_FAST_MAX_LOWER_BOUND(in,kny,idny,foundNegY)
00201 #endif
00202 if ( foundNegY && (!( (in[idny].isAncestor(kny) ) || (in[idny] == kny) )) ) {
00203 foundNegY=false;
00204 }
00205 if ( foundNegY && (m_ucpLutMasks[2*idny+1] == ot::DA_FLAGS::FOREIGN) ) {
00206 foundNegY=false;
00207 }
00208 }
00209
00210
00211 if (z) {
00212 ot::TreeNode knz( x, y, z-1, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00213 foundNegZ = seq::maxLowerBound<ot::TreeNode> (in, knz, idnz,NULL,&i) ;
00214 #ifdef __DEBUG_DA_NLIST__
00215 CHECK_FAST_MAX_LOWER_BOUND(in,knz,idnz,foundNegZ)
00216 #endif
00217 if ( foundNegZ && (!( (in[idnz].isAncestor(knz) ) || (in[idnz] == knz) )) ) {
00218 foundNegZ=false;
00219 }
00220 if ( foundNegZ && (m_ucpLutMasks[2*idnz+1] == ot::DA_FLAGS::FOREIGN) ) {
00221 foundNegZ=false;
00222 }
00223 }
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240 #define NEG_SEARCH_BLOCK1(idx,n1l,n1r,n2l,n2r,n3l,n3r,n4l,n4r) {\
00241 if(nlist[8*idx+n1r] < m_uiLocalBufferSize) {\
00242 nlist[8*i+n1l] = nlist[8*idx+n1r];\
00243 found[n1l] = true;\
00244 }\
00245 if(nlist[8*idx+n2r] < m_uiLocalBufferSize) {\
00246 nlist[8*i+n2l] = nlist[8*idx+n2r];\
00247 found[n2l] = true;\
00248 }\
00249 if(nlist[8*idx+n3r] < m_uiLocalBufferSize) {\
00250 nlist[8*i+n3l] = nlist[8*idx+n3r];\
00251 found[n3l] = true;\
00252 }\
00253 if(nlist[8*idx+n4r] < m_uiLocalBufferSize) {\
00254 nlist[8*i+n4l] = nlist[8*idx+n4r];\
00255 found[n4l] = true;\
00256 }\
00257 }
00258
00259 #define NEG_SEARCH_BLOCK2(idx,n1l,n1r,n2l,n2r,n3l,n3r,n4l,n4r) {\
00260 unsigned char negMask;\
00261 if ( in[i].getLevel() != in[idx].getLevel()) {\
00262 negMask = 0;\
00263 } else {\
00264 negMask = m_ucpLutMasks[2*idx+1];\
00265 }\
00266 if (negMask != ot::DA_FLAGS::FOREIGN) {\
00267 if ( negMask & (1 << n1r)) {\
00268 if(nlist[8*idx+n1l] < m_uiLocalBufferSize) {\
00269 nlist[8*i+n1l] = nlist[8*idx+n1l];\
00270 found[n1l] = true;\
00271 }\
00272 } else {\
00273 if(nlist[8*idx+n1r] < m_uiLocalBufferSize) {\
00274 nlist[8*i+n1l] = nlist[8*idx+n1r];\
00275 found[n1l] = true;\
00276 }\
00277 }\
00278 if ( negMask & (1 << n2r)) {\
00279 if(nlist[8*idx+n2l] < m_uiLocalBufferSize) {\
00280 nlist[8*i+n2l] = nlist[8*idx+n2l];\
00281 found[n2l] = true;\
00282 }\
00283 } else {\
00284 if(nlist[8*idx+n2r] < m_uiLocalBufferSize) {\
00285 nlist[8*i+n2l] = nlist[8*idx+n2r];\
00286 found[n2l] = true;\
00287 }\
00288 }\
00289 if ( negMask & (1 << n3r)) {\
00290 if(nlist[8*idx+n3l] < m_uiLocalBufferSize) {\
00291 nlist[8*i+n3l] = nlist[8*idx+n3l];\
00292 found[n3l] = true;\
00293 }\
00294 } else {\
00295 if(nlist[8*idx+n3r] < m_uiLocalBufferSize) {\
00296 nlist[8*i+n3l] = nlist[8*idx+n3r];\
00297 found[n3l] = true;\
00298 }\
00299 }\
00300 if ( negMask & (1 << n4r)) {\
00301 if(nlist[8*idx+n4l] < m_uiLocalBufferSize) {\
00302 nlist[8*i+n4l] = nlist[8*idx+n4l];\
00303 found[n4l] = true;\
00304 }\
00305 } else {\
00306 if(nlist[8*idx+n4r] < m_uiLocalBufferSize) {\
00307 nlist[8*i+n4l] = nlist[8*idx+n4r];\
00308 found[n4l] = true;\
00309 }\
00310 }\
00311 }\
00312 }
00313
00314 #define NEG_SEARCH_BLOCK1X NEG_SEARCH_BLOCK1(idnx,0,1,2,3,4,5,6,7)
00315 #define NEG_SEARCH_BLOCK2X NEG_SEARCH_BLOCK2(idnx,0,1,2,3,4,5,6,7)
00316
00317 #define NEG_SEARCH_BLOCK1Y NEG_SEARCH_BLOCK1(idny,0,2,1,3,4,6,5,7)
00318 #define NEG_SEARCH_BLOCK2Y NEG_SEARCH_BLOCK2(idny,0,2,1,3,4,6,5,7)
00319
00320 #define NEG_SEARCH_BLOCK1Z NEG_SEARCH_BLOCK1(idnz,0,4,1,5,2,6,3,7)
00321 #define NEG_SEARCH_BLOCK2Z NEG_SEARCH_BLOCK2(idnz,0,4,1,5,2,6,3,7)
00322
00323 switch (ch_num) {
00324 case 0: {
00325
00326 if (foundNegX) {
00327 NEG_SEARCH_BLOCK1X
00328 }
00329 if (foundNegY) {
00330 NEG_SEARCH_BLOCK1Y
00331 }
00332 if (foundNegZ) {
00333 NEG_SEARCH_BLOCK1Z
00334 }
00335 break;
00336 }
00337 case 1: {
00338
00339 if (foundNegX) {
00340 NEG_SEARCH_BLOCK2X
00341 }
00342 if (foundNegY) {
00343 NEG_SEARCH_BLOCK1Y
00344 }
00345 if (foundNegZ) {
00346 NEG_SEARCH_BLOCK1Z
00347 }
00348 break;
00349 }
00350 case 2: {
00351
00352 if (foundNegX) {
00353 NEG_SEARCH_BLOCK1X
00354 }
00355 if (foundNegY) {
00356 NEG_SEARCH_BLOCK2Y
00357 }
00358 if (foundNegZ) {
00359 NEG_SEARCH_BLOCK1Z
00360 }
00361 break;
00362 }
00363 case 3: {
00364
00365 if (foundNegX) {
00366 NEG_SEARCH_BLOCK2X
00367 }
00368 if (foundNegY) {
00369 NEG_SEARCH_BLOCK2Y
00370 }
00371 if (foundNegZ) {
00372 NEG_SEARCH_BLOCK1Z
00373 }
00374 break;
00375 }
00376 case 4: {
00377
00378 if (foundNegX) {
00379 NEG_SEARCH_BLOCK1X
00380 }
00381 if (foundNegY) {
00382 NEG_SEARCH_BLOCK1Y
00383 }
00384 if (foundNegZ) {
00385 NEG_SEARCH_BLOCK2Z
00386 }
00387 break;
00388 }
00389 case 5: {
00390
00391 if (foundNegX) {
00392 NEG_SEARCH_BLOCK2X
00393 }
00394 if (foundNegY) {
00395 NEG_SEARCH_BLOCK1Y
00396 }
00397 if (foundNegZ) {
00398 NEG_SEARCH_BLOCK2Z
00399 }
00400 break;
00401 }
00402 case 6: {
00403
00404 if (foundNegX) {
00405 NEG_SEARCH_BLOCK1X
00406 }
00407 if (foundNegY) {
00408 NEG_SEARCH_BLOCK2Y
00409 }
00410 if (foundNegZ) {
00411 NEG_SEARCH_BLOCK2Z
00412 }
00413 break;
00414 }
00415 case 7: {
00416
00417 if (foundNegX) {
00418 NEG_SEARCH_BLOCK2X
00419 }
00420 if (foundNegY) {
00421 NEG_SEARCH_BLOCK2Y
00422 }
00423 if (foundNegZ) {
00424 NEG_SEARCH_BLOCK2Z
00425 }
00426 break;
00427 }
00428 default: {
00429 std::cerr << "Wrong Child Number " << ch_num << std::endl;
00430 assert(false);
00431 break;
00432 }
00433 }
00434 }
00435
00436 #undef NEG_SEARCH_BLOCK1X
00437 #undef NEG_SEARCH_BLOCK2X
00438 #undef NEG_SEARCH_BLOCK1Y
00439 #undef NEG_SEARCH_BLOCK2Y
00440 #undef NEG_SEARCH_BLOCK1Z
00441 #undef NEG_SEARCH_BLOCK2Z
00442 #undef NEG_SEARCH_BLOCK1
00443 #undef NEG_SEARCH_BLOCK2
00444
00445
00446 #ifdef __DEBUG_DA_NLIST__
00447 #define DEBUG_CHECK_FAST_MAX_LOWER_BOUND(arr,key,fastIdx,fastResult) CHECK_FAST_MAX_LOWER_BOUND(arr,key,fastIdx,fastResult)
00448
00449 #define POS_SEARCH_DEBUG_BLOCK1(debugV1,debugV2) {\
00450 if ( (ch_num == debugV1) || (ch_num == debugV2) ) {\
00451 std::cout << "Failing for index " << i << " with child num " << ch_num << std::endl;\
00452 assert(false);\
00453 }\
00454 }
00455
00456 #define POS_SEARCH_DEBUG_BLOCK2 {\
00457 assert(sKey > in[m_uiPostGhostBegin - 1]);\
00458 chkMissedPrimary.push_back(sKey);\
00459 }
00460
00461 #define POS_SEARCH_DEBUG_BLOCK3 {\
00462 assert( (idx+k) < m_uiLocalBufferSize );\
00463 assert( in[idx+k].getParent() == newKey );\
00464 assert( !(in[idx+k].getFlag() & ot::TreeNode::NODE) );\
00465 assert( ((idx+k) < m_uiElementBegin) || (((idx+k) >= m_uiPostGhostBegin)) );\
00466 }
00467
00468 #else
00469 #define DEBUG_CHECK_FAST_MAX_LOWER_BOUND(arr,key,fastIdx,fastResult)
00470 #define POS_SEARCH_DEBUG_BLOCK1(debugV1,debugV2)
00471 #define POS_SEARCH_DEBUG_BLOCK2
00472 #define POS_SEARCH_DEBUG_BLOCK3
00473 #endif
00474
00475 #define POS_SECONDARY_SEARCH_BLOCK(debugV1,debugV2) {\
00476 \
00477 POS_SEARCH_DEBUG_BLOCK1(debugV1,debugV2)\
00478 \
00479 sKey = parNodeLocations[j];\
00480 lastLevel = d-1;\
00481 foundKey = seq::maxLowerBound<ot::TreeNode>(in, sKey, idx,&i,NULL);\
00482 DEBUG_CHECK_FAST_MAX_LOWER_BOUND(in, sKey,idx, foundKey) \
00483 if ( foundKey && !((in[idx].getAnchor()==sKey.getAnchor()) && (in[idx].getFlag()&ot::TreeNode::NODE))) {\
00484 foundKey=false;\
00485 }\
00486 if ( foundKey ) {\
00487 nlist[8*i+j] = idx;\
00488 } else {\
00489 \
00490 findGhost = true;\
00491 }\
00492 }
00493
00494 #define POS_SEARCH_BLOCK(debugV1,debugV2) {\
00495 \
00496 sKey = nodeLocations[j];\
00497 lastLevel = d;\
00498 foundKey = seq::maxLowerBound<ot::TreeNode>(in, sKey, idx,&i,NULL);\
00499 DEBUG_CHECK_FAST_MAX_LOWER_BOUND(in, sKey,idx, foundKey) \
00500 if(foundKey && !( (in[idx].isAncestor(sKey) ) || (in[idx]==sKey))) {\
00501 foundKey=false;\
00502 }\
00503 if(foundKey) {\
00504 \
00505 if((in[idx].getAnchor()==sKey.getAnchor())&&(in[idx].getFlag() & ot::TreeNode::NODE)) {\
00506 \
00507 nlist[8*i+j] = idx;\
00508 } else {\
00509 POS_SECONDARY_SEARCH_BLOCK(debugV1,debugV2)\
00510 }\
00511 } else {\
00512 if(i >= m_uiElementBegin) {\
00513 \
00514 \
00515 \
00516 \
00517 POS_SEARCH_DEBUG_BLOCK2\
00518 \
00519 \
00520 POS_SECONDARY_SEARCH_BLOCK(debugV1,debugV2)\
00521 } else {\
00522 \
00523 \
00524 findGhost=true;\
00525 }\
00526 }\
00527 if ( findGhost ) {\
00528 findGhost=false;\
00529 \
00530 \
00531 ot::TreeNode newKey(sKey.getX(),sKey.getY(),sKey.getZ(),\
00532 lastLevel,m_uiDimension,m_uiMaxDepth);\
00533 unsigned int k=1;\
00534 while ( (idx+k) < in.size() ) {\
00535 if ( in[idx+k].getParent() == newKey ) {\
00536 if ( in[idx+k].getFlag() & ot::TreeNode::NODE ) {\
00537 k++;\
00538 continue;\
00539 } else {\
00540 \
00541 findGhost=true;\
00542 break;\
00543 }\
00544 }\
00545 if ( in[idx+k] > newKey.getDLD() ) {\
00546 findGhost = false;\
00547 break;\
00548 } else {\
00549 k++;\
00550 }\
00551 }\
00552 if (findGhost) {\
00553 nlist[8*i+j] = idx+k;\
00554 POS_SEARCH_DEBUG_BLOCK3\
00555 } else {\
00556 nlist[8*i+j] = m_uiLocalBufferSize;\
00557 }\
00558 }\
00559 }
00560
00561
00562
00563 for (unsigned int j = 0; j < 8; j++) {
00564 if(found[j]) {
00565 continue;
00566 }
00567 bool foundKey=false;
00568 bool findGhost=false;
00569 unsigned int idx;
00570 unsigned int lastLevel;
00571 ot::TreeNode sKey(m_uiDimension, m_uiMaxDepth);
00572
00573 switch (j) {
00574 case 0: {
00575 nodeLocations[j] =
00576 ot::TreeNode(x, y, z, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00577 parNodeLocations[j] =
00578 ot::TreeNode(parX, parY, parZ, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00579
00580 if ( !(in[i].getFlag() & ot::TreeNode::NODE ) ) {
00581
00582 #ifdef __DEBUG_DA_NLIST__
00583 if ( (ch_num == 0) || (ch_num == 7) ) {
00584 std::cout << "Failing for index " << i << " with child num " << ch_num << std::endl;
00585 assert(false);
00586 }
00587 #endif
00588
00589 sKey = parNodeLocations[j];
00590 lastLevel = d-1;
00591 foundKey = seq::maxLowerBound<ot::TreeNode> (in, sKey, idx,NULL,&i);
00592 #ifdef __DEBUG_DA_NLIST__
00593 CHECK_FAST_MAX_LOWER_BOUND(in, sKey,idx, foundKey)
00594 #endif
00595 if ( foundKey &&
00596 (!( (in[idx].getAnchor() == sKey.getAnchor()) && (in[idx].getFlag() & ot::TreeNode::NODE) )) ) {
00597 foundKey=false;
00598 }
00599 if ( !foundKey ) {
00600 #ifdef __DEBUG_DA_NLIST__
00601
00602 if(i >= m_uiElementBegin) {
00603 std::cout<<m_iRankActive<<" i = "<<i<<" preGhostElemEnd "<<m_uiPreGhostElementSize
00604 <<" elemBeg: "<<m_uiElementBegin<<" elemEnd: "<<m_uiElementEnd<<std::endl;
00605 }
00606 assert (i < m_uiElementBegin);
00607 #endif
00608
00609 nlist[8*i+j] = i;
00610 } else {
00611 nlist[8*i+j] = idx;
00612 }
00613 } else {
00614 nlist[8*i+j] = i;
00615 }
00616 break;
00617 }
00618 case 1: {
00619 nodeLocations[j] =
00620 ot::TreeNode(x+sz, y, z, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00621 parNodeLocations[j] =
00622 ot::TreeNode(parX+(sz<<1u), parY, parZ, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00623 POS_SEARCH_BLOCK(1,6)
00624 break;
00625 }
00626 case 2: {
00627 nodeLocations[j] =
00628 ot::TreeNode(x, y+sz, z, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00629 parNodeLocations[j] =
00630 ot::TreeNode(parX, parY+(sz<<1u), parZ, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00631 POS_SEARCH_BLOCK(2,5)
00632 break;
00633 }
00634 case 3: {
00635 nodeLocations[j] =
00636 ot::TreeNode(x+sz, y+sz, z, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00637 parNodeLocations[j] =
00638 ot::TreeNode(parX+(sz<<1u), parY+(sz<<1u), parZ, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00639 POS_SEARCH_BLOCK(3,4)
00640 break;
00641 }
00642 case 4: {
00643 nodeLocations[j] =
00644 ot::TreeNode(x, y, z+sz, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00645 parNodeLocations[j] =
00646 ot::TreeNode(parX, parY, parZ+(sz<<1u), m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00647 POS_SEARCH_BLOCK(4,3)
00648 break;
00649 }
00650 case 5: {
00651 nodeLocations[j] =
00652 ot::TreeNode(x+sz, y, z+sz, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00653 parNodeLocations[j] =
00654 ot::TreeNode(parX+(sz<<1u), parY, parZ+(sz<<1u), m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00655 POS_SEARCH_BLOCK(5,2)
00656 break;
00657 }
00658 case 6: {
00659 nodeLocations[j] =
00660 ot::TreeNode(x, y+sz, z+sz, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00661 parNodeLocations[j] =
00662 ot::TreeNode(parX, parY+(sz<<1u), parZ+(sz<<1u), m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00663 POS_SEARCH_BLOCK(6,1)
00664 break;
00665 }
00666 case 7: {
00667 nodeLocations[j] =
00668 ot::TreeNode(x+sz, y+sz, z+sz, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00669 parNodeLocations[j] =
00670 ot::TreeNode(parX+(sz<<1u), parY+(sz<<1u), parZ+(sz<<1u), m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00671 POS_SEARCH_BLOCK(7,0)
00672 break;
00673 }
00674 default: {
00675 std::cerr << RED<<"Wrong node number in " << __func__ <<NRM<< std::endl;
00676 assert(false);
00677 break;
00678 }
00679 }
00680 }
00681
00682 #undef POS_SEARCH_BLOCK
00683 #undef POS_SECONDARY_SEARCH_BLOCK
00684 #undef POS_SEARCH_DEBUG_BLOCK1
00685 #undef POS_SEARCH_DEBUG_BLOCK2
00686 #undef POS_SEARCH_DEBUG_BLOCK3
00687
00688
00689
00690
00691
00692
00693
00694 #ifdef __DEBUG_DA_NLIST__
00695 if ( (i >= m_uiElementBegin) && (i < m_uiElementEnd) &&
00696 ( (nlist[8*i] < m_uiElementBegin) || (nlist[8*i] >= m_uiPostGhostBegin) ) ) {
00697 std::cout << "At index " << i << " anchor is at " << nlist[8*i] <<
00698 " where elemBegin is at " << m_uiElementBegin << " and postGhBegin is "
00699 << m_uiPostGhostBegin << std::endl;
00700 std::cout << "RANK is " << m_iRankActive << std::endl;
00701 std::cout << "NList is ";
00702 for (unsigned int j=0; j<8; j++) {
00703 std::cout << nlist[8*i+j] << ", ";
00704 }
00705 std::cout << std::endl;
00706 std::cout<<m_iRankActive<<" failingOct: "<<in[i]<<std::endl<<" it's parent: "<<in[i].getParent()<<std::endl;
00707 if( nlist[8*i] < in.size() ) {
00708 std::cout<<m_iRankActive<<" failingOct's anchor is actually mapped to: "<<in[nlist[8*i]]<<std::endl;
00709 }
00710 assert(false);
00711 }
00712 #endif
00713
00714 #ifdef __DEBUG_DA_NLIST__
00715
00716 if( (i >= m_uiElementBegin) && (i < m_uiPostGhostBegin) ) {
00717 for(unsigned int j=0; j< 8; j++) {
00718 if(nlist[8*i + j] < m_uiElementBegin) {
00719 std::cout<<m_iRankActive<<" Trying to send yourself as a Post Ghost ELEMENT for i = "
00720 <<i<<" j = "<<j<<std::endl;
00721 assert(false);
00722 }
00723 if( (nlist[8*i+j] >= m_uiPostGhostBegin) && (nlist[8*i+j] < m_uiLocalBufferSize) ) {
00724 TreeNode tmpToSend = in[nlist[8*i+j]];
00725 tmpToSend.setWeight(i);
00726 checkSecondRing.push_back(tmpToSend);
00727 }
00728 }
00729 }
00730 #endif
00731
00732
00733 unsigned char _mask=0;
00734 unsigned int numOutOfBounds = 0;
00735 for ( unsigned int j=0; j<8; j++) {
00736 if ( ( nlist[8*i+j] < m_uiElementBegin ) || (nlist[8*i+j] >= m_uiPostGhostBegin) ) {
00737 numOutOfBounds++;
00738 }
00739
00740 if(nlist[8*i+j] >= m_uiLocalBufferSize) {
00741
00742 continue;
00743 }
00744
00745
00746 unsigned int _x,_y,_z, _d;
00747 _x = in[nlist[8*i+j]].getX();
00748 _y = in[nlist[8*i+j]].getY();
00749 _z = in[nlist[8*i+j]].getZ();
00750 _d = in[nlist[8*i+j]].getLevel();
00751 if ( !(in[nlist[8*i+j]].getFlag() & ot::TreeNode::NODE ) ) {
00752
00753 _x = ( ( _x >> ( m_uiMaxDepth - _d + 1 ) ) << ( m_uiMaxDepth - _d + 1 ) );
00754 _y = ( ( _y >> ( m_uiMaxDepth - _d + 1 ) ) << ( m_uiMaxDepth - _d + 1 ) );
00755 _z = ( ( _z >> ( m_uiMaxDepth - _d + 1 ) ) << ( m_uiMaxDepth - _d + 1 ) );
00756 }
00757
00758 switch (j) {
00759 case 0:
00760 if ( !(in[i].getFlag() & ot::TreeNode::NODE ) ) {
00761 _mask |= (1 << j);
00762 }
00763 break;
00764 case 1:
00765
00766
00767 if ( ( (x+sz) != _x ) || ( y != _y ) || ( z != _z ) ) {
00768 _mask |= (1 << j);
00769 }
00770 break;
00771 case 2:
00772
00773
00774 if ( ( x != _x ) || ( (y+sz) != _y ) || ( z != _z ) ) {
00775 _mask |= (1 << j);
00776 }
00777 break;
00778 case 3:
00779
00780
00781 if ( ( (x+sz) != _x ) || ( (y+sz) != _y ) || ( z != _z ) ) {
00782 _mask |= (1 << j);
00783 }
00784 break;
00785 case 4:
00786
00787
00788 if ( ( x != _x ) || ( y != _y ) || ( (z+sz) != _z ) ) {
00789 _mask |= (1 << j);
00790 }
00791 break;
00792 case 5:
00793
00794
00795 if ( ( (x+sz) != _x ) || ( y != _y ) || ( (z+sz) != _z ) ) {
00796 _mask |= (1 << j);
00797 }
00798 break;
00799 case 6:
00800
00801
00802 if ( ( x != _x ) || ( (y+sz) != _y ) || ( (z+sz) != _z ) ) {
00803 _mask |= (1 << j);
00804 }
00805 break;
00806 case 7:
00807 if ( ( (x+sz) != _x ) || ( (y+sz) != _y ) || ( (z+sz) != _z ) ) {
00808 _mask |= (1 << j);
00809 }
00810 break;
00811 }
00812 }
00813
00814
00815 if (numOutOfBounds == 8) {
00816
00817 _mask = ot::DA_FLAGS::FOREIGN;
00818 } else if (numOutOfBounds) {
00819
00820 in[i].orFlag(ot::DA_FLAGS::DEP_ELEM);
00821 }
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835 for (int j=0; j<8; j++) {
00836 if (nlist[8*i+j] >= m_uiLocalBufferSize) {
00837 if (_mask != ot::DA_FLAGS::FOREIGN) {
00838 #ifdef __DEBUG_DA_NLIST__
00839 assert(j);
00840 #endif
00841
00842 nodeLocations[j].setWeight(m_iRankActive);
00843 parNodeLocations[j].setWeight(m_iRankActive);
00844 primaryKeys.push_back(nodeLocations[j]);
00845 secondaryKeys.push_back(parNodeLocations[j]);
00846
00847 nlist[8*i+j] = m_uiLocalBufferSize +
00848 static_cast<unsigned int>(extraAtEnd.size());
00849 nodeLocations[j].setWeight(i);
00850 extraAtEnd.push_back(nodeLocations[j]);
00851 }
00852 }
00853 }
00854
00855
00856 m_ucpLutMasks[2*i+1] = _mask;
00857 }
00858
00859 #ifdef __DEBUG_DA_NLIST__
00860 MPI_Barrier(m_mpiCommActive);
00861 if(!m_iRankActive) {
00862 std::cout<<std::endl;
00863 std::cout<<"Finished Elemental Loop for Set# "<<numFullLoopCtr<<std::endl;
00864 std::cout<<std::endl;
00865 }
00866 MPI_Barrier(m_mpiCommActive);
00867 #endif
00868
00869 #ifdef __DEBUG_DA_NLIST__
00870 MPI_Barrier(m_mpiCommActive);
00871 seq::makeVectorUnique<ot::TreeNode>(chkMissedPrimary, false);
00872 int* chkMissedPrimarySendCounts = new int[m_iNpesActive];
00873 for(int i = 0; i < m_iNpesActive; i++) {
00874 chkMissedPrimarySendCounts[i] = 0;
00875 }
00876
00877 for(unsigned int i = 0; i < chkMissedPrimary.size(); i++) {
00878
00879 unsigned int idx;
00880 bool found = seq::maxLowerBound<TreeNode >(m_tnMinAllBlocks, chkMissedPrimary[i], idx, NULL, NULL);
00881 assert(found);
00882
00883 assert(idx > m_iRankActive);
00884 chkMissedPrimarySendCounts[idx]++;
00885 }
00886
00887 int* chkMissedPrimaryRecvCounts = new int[m_iNpesActive];
00888 par::Mpi_Alltoall<int>(chkMissedPrimarySendCounts, chkMissedPrimaryRecvCounts, 1, m_mpiCommActive);
00889
00890 int* chkMissedPrimarySendOffsets = new int[m_iNpesActive];
00891 int* chkMissedPrimaryRecvOffsets = new int[m_iNpesActive];
00892 chkMissedPrimarySendOffsets[0] = 0;
00893 chkMissedPrimaryRecvOffsets[0] = 0;
00894 for(int i = 1; i < m_iNpesActive; i++) {
00895 chkMissedPrimarySendOffsets[i] = chkMissedPrimarySendOffsets[i-1] + chkMissedPrimarySendCounts[i-1] ;
00896 chkMissedPrimaryRecvOffsets[i] = chkMissedPrimaryRecvOffsets[i-1] + chkMissedPrimaryRecvCounts[i-1] ;
00897 }
00898 unsigned int chkMissedPrimaryRecvSz = chkMissedPrimaryRecvOffsets[m_iNpesActive - 1] +
00899 chkMissedPrimaryRecvCounts[m_iNpesActive - 1];
00900 std::vector<ot::TreeNode> chkMissedPrimaryRecvBuffer(chkMissedPrimaryRecvSz);
00901 ot::TreeNode* chkMissedPrimarySendPtr = NULL;
00902 ot::TreeNode* chkMissedPrimaryRecvPtr = NULL;
00903 if(!(chkMissedPrimary.empty())) {
00904 chkMissedPrimarySendPtr = (&(*(chkMissedPrimary.begin())));
00905 }
00906 if(!(chkMissedPrimaryRecvBuffer.empty())) {
00907 chkMissedPrimaryRecvPtr = (&(*(chkMissedPrimaryRecvBuffer.begin())));
00908 }
00909 par::Mpi_Alltoallv_sparse<ot::TreeNode>( chkMissedPrimarySendPtr, chkMissedPrimarySendCounts,
00910 chkMissedPrimarySendOffsets, chkMissedPrimaryRecvPtr,
00911 chkMissedPrimaryRecvCounts, chkMissedPrimaryRecvOffsets, m_mpiCommActive);
00912
00913 for(unsigned int i = 0; i < chkMissedPrimaryRecvBuffer.size(); i++) {
00914 unsigned int idx;
00915 bool found = seq::maxLowerBound<TreeNode >(in, chkMissedPrimaryRecvBuffer[i], idx, NULL, NULL);
00916 assert(found);
00917 assert( (idx >= m_uiElementBegin) && (idx < m_uiPostGhostBegin) );
00918
00919 assert(in[idx].getAnchor() == chkMissedPrimaryRecvBuffer[i].getAnchor());
00920 assert( !(in[idx].getFlag() & ot::TreeNode::NODE) );
00921 }
00922
00923 delete [] chkMissedPrimarySendCounts;
00924 delete [] chkMissedPrimaryRecvCounts;
00925 delete [] chkMissedPrimarySendOffsets;
00926 delete [] chkMissedPrimaryRecvOffsets;
00927 chkMissedPrimaryRecvBuffer.clear();
00928 MPI_Barrier(m_mpiCommActive);
00929 if(!m_iRankActive) {
00930 std::cout<<std::endl;
00931 std::cout<<"Passed Test for Missed Primary "<<numFullLoopCtr<<std::endl;
00932 std::cout<<std::endl;
00933 }
00934 MPI_Barrier(m_mpiCommActive);
00935 #endif
00936
00937
00938
00939
00940
00941
00942 std::vector<unsigned int> ScndScatterMap;
00943
00944 std::vector<unsigned int> ScndSendProcs;
00945 std::vector<unsigned int> ScndSendCounts;
00946
00947 std::vector<unsigned int> ScndRecvProcs;
00948 std::vector<unsigned int> ScndRecvCounts;
00949
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988 ot::TreeNode rootNode(m_uiDimension,m_uiMaxDepth);
00989
00990 assert(!in.empty());
00991 assert(m_uiElementBegin < in.size());
00992 assert(m_uiPostGhostBegin >= 1);
00993 assert((m_uiPostGhostBegin-1) < in.size());
00994
00995 std::vector<ot::TreeNode> failedPrimaryKeys = primaryKeys;
00996 std::vector<ot::TreeNode> failedSecondaryKeys = secondaryKeys;
00997
00998
00999 seq::makeVectorUnique<ot::TreeNode>(failedPrimaryKeys, false);
01000 seq::makeVectorUnique<ot::TreeNode>(failedSecondaryKeys, false);
01001
01002 #ifdef __DEBUG_DA_NLIST__
01003 MPI_Barrier(m_mpiCommActive);
01004
01005 if(!m_iRankActive) {
01006 std::cout<<std::endl;
01007 std::cout<<"Made Failed Keys Unique. Finding Partition Next."<<std::endl;
01008 std::cout<<std::endl;
01009 }
01010 MPI_Barrier(m_mpiCommActive);
01011 #endif
01012
01013 unsigned int PrimaryKeysSz = static_cast<unsigned int>(failedPrimaryKeys.size());
01014 unsigned int SecondaryKeysSz = static_cast<unsigned int>(failedSecondaryKeys.size());
01015
01016
01017 unsigned int *partPrimary = NULL;
01018 if(PrimaryKeysSz) {
01019 partPrimary = new unsigned int[PrimaryKeysSz];
01020 }
01021 unsigned int *partSecondary = NULL;
01022 if(SecondaryKeysSz) {
01023 partSecondary = new unsigned int[SecondaryKeysSz];
01024 }
01025
01026 for (unsigned int i=0; i<PrimaryKeysSz; i++) {
01027 unsigned int idx;
01028
01029 bool found = seq::maxLowerBound<TreeNode >(m_tnMinAllBlocks, failedPrimaryKeys[i], idx, NULL, NULL);
01030 assert(found);
01031 partPrimary[i] = idx;
01032 }
01033
01034 for (unsigned int i=0; i<SecondaryKeysSz; i++) {
01035 unsigned int idx;
01036
01037 bool found = seq::maxLowerBound<TreeNode >(m_tnMinAllBlocks, failedSecondaryKeys[i], idx, NULL, NULL);
01038 assert(found);
01039 partSecondary[i] = idx;
01040 }
01041
01042 int *numKeysSendP = new int[m_iNpesActive];
01043 int *numKeysSendS = new int[m_iNpesActive];
01044 int *numKeysRecvP = new int[m_iNpesActive];
01045 int *numKeysRecvS = new int[m_iNpesActive];
01046
01047 for (int i=0; i<m_iNpesActive; i++) {
01048 numKeysSendP[i] = 0;
01049 numKeysSendS[i] = 0;
01050 }
01051
01052 for (unsigned int i=0; i<PrimaryKeysSz; i++) {
01053 numKeysSendP[partPrimary[i]]++;
01054 }
01055 for (unsigned int i=0; i<SecondaryKeysSz; i++) {
01056 numKeysSendS[partSecondary[i]]++;
01057 }
01058
01059 #ifdef __DEBUG_DA_NLIST__
01060 MPI_Barrier(m_mpiCommActive);
01061 if(!m_iRankActive) {
01062 std::cout<<std::endl;
01063 std::cout<<"First ALL2ALL for Second Ring..."<<std::endl;
01064 std::cout<<std::endl;
01065 }
01066 MPI_Barrier(m_mpiCommActive);
01067 #endif
01068
01069
01070
01071 PROF_BUILD_NLIST_COMM_BEGIN
01072
01073 par::Mpi_Alltoall<int>(numKeysSendP, numKeysRecvP, 1, m_mpiCommActive);
01074 par::Mpi_Alltoall<int>(numKeysSendS, numKeysRecvS, 1, m_mpiCommActive);
01075
01076 PROF_BUILD_NLIST_COMM_END
01077
01078
01079 int *sendOffsetsP = new int[m_iNpesActive]; sendOffsetsP[0] = 0;
01080 int *recvOffsetsP = new int[m_iNpesActive]; recvOffsetsP[0] = 0;
01081 int *numKeysTmpP = new int[m_iNpesActive]; numKeysTmpP[0] = 0;
01082
01083 int *sendOffsetsS = new int[m_iNpesActive]; sendOffsetsS[0] = 0;
01084 int *recvOffsetsS = new int[m_iNpesActive]; recvOffsetsS[0] = 0;
01085 int *numKeysTmpS = new int[m_iNpesActive]; numKeysTmpS[0] = 0;
01086
01087
01088 for (int i = 1; i < m_iNpesActive; i++) {
01089 sendOffsetsP[i] = sendOffsetsP[i-1] + numKeysSendP[i-1];
01090 recvOffsetsP[i] = recvOffsetsP[i-1] + numKeysRecvP[i-1];
01091 numKeysTmpP[i] = 0;
01092
01093 sendOffsetsS[i] = sendOffsetsS[i-1] + numKeysSendS[i-1];
01094 recvOffsetsS[i] = recvOffsetsS[i-1] + numKeysRecvS[i-1];
01095 numKeysTmpS[i] = 0;
01096 }
01097
01098
01099 std::vector<ot::TreeNode> sendKp (PrimaryKeysSz);
01100 std::vector<ot::TreeNode> recvKp (recvOffsetsP[m_iNpesActive-1] + numKeysRecvP[m_iNpesActive-1]);
01101
01102 std::vector<ot::TreeNode> sendKs (SecondaryKeysSz);
01103 std::vector<ot::TreeNode> recvKs (recvOffsetsS[m_iNpesActive-1] + numKeysRecvS[m_iNpesActive-1]);
01104
01105 for (unsigned int i=0; i< PrimaryKeysSz; i++) {
01106 unsigned int ni = numKeysTmpP[partPrimary[i]];
01107 numKeysTmpP[partPrimary[i]]++;
01108
01109 sendKp[sendOffsetsP[partPrimary[i]] + ni] = failedPrimaryKeys[i];
01110 }
01111
01112 for (unsigned int i=0; i< SecondaryKeysSz; i++) {
01113 unsigned int ni = numKeysTmpS[partSecondary[i]];
01114 numKeysTmpS[partSecondary[i]]++;
01115
01116 sendKs[sendOffsetsS[partSecondary[i]] + ni] = failedSecondaryKeys[i];
01117 }
01118
01119 failedPrimaryKeys.clear();
01120 failedSecondaryKeys.clear();
01121
01122 if(partPrimary) {
01123 delete [] partPrimary;
01124 partPrimary = NULL;
01125 }
01126
01127 if(partSecondary) {
01128 delete [] partSecondary;
01129 partSecondary = NULL;
01130 }
01131
01132 if(numKeysTmpP) {
01133 delete [] numKeysTmpP;
01134 numKeysTmpP = NULL;
01135 }
01136
01137 if(numKeysTmpS) {
01138 delete [] numKeysTmpS;
01139 numKeysTmpS = NULL;
01140 }
01141
01142 ot::TreeNode* sendKpPtr = NULL;
01143 ot::TreeNode* recvKpPtr = NULL;
01144 ot::TreeNode* sendKsPtr = NULL;
01145 ot::TreeNode* recvKsPtr = NULL;
01146 if(!sendKp.empty()) {
01147 sendKpPtr = &(*(sendKp.begin()));
01148 }
01149 if(!recvKp.empty()) {
01150 recvKpPtr = &(*(recvKp.begin()));
01151 }
01152 if(!sendKs.empty()) {
01153 sendKsPtr = &(*(sendKs.begin()));
01154 }
01155 if(!recvKs.empty()) {
01156 recvKsPtr = &(*(recvKs.begin()));
01157 }
01158
01159 PROF_BUILD_NLIST_COMM_BEGIN
01160
01161 par::Mpi_Alltoallv_sparse<ot::TreeNode>( sendKpPtr, numKeysSendP, sendOffsetsP,
01162 recvKpPtr, numKeysRecvP, recvOffsetsP, m_mpiCommActive);
01163
01164 par::Mpi_Alltoallv_sparse<ot::TreeNode>( sendKsPtr, numKeysSendS, sendOffsetsS,
01165 recvKsPtr, numKeysRecvS, recvOffsetsS, m_mpiCommActive);
01166
01167 PROF_BUILD_NLIST_COMM_END
01168
01169 sendKp.clear();
01170 sendKs.clear();
01171
01172 delete [] sendOffsetsP;
01173 sendOffsetsP = NULL;
01174
01175 delete [] recvOffsetsP;
01176 recvOffsetsP = NULL;
01177
01178 delete [] numKeysSendP;
01179 numKeysSendP = NULL;
01180
01181 delete [] numKeysRecvP;
01182 numKeysRecvP = NULL;
01183
01184 delete [] sendOffsetsS;
01185 sendOffsetsS = NULL;
01186
01187 delete [] recvOffsetsS;
01188 recvOffsetsS = NULL;
01189
01190 delete [] numKeysSendS;
01191 numKeysSendS = NULL;
01192
01193 delete [] numKeysRecvS;
01194 numKeysRecvS = NULL;
01195
01196 std::vector<ot::NodeAndRanks> recvK2P;
01197 std::vector<ot::NodeAndRanks> recvK2S;
01198
01199
01200 for (int i = 0; i < recvKp.size(); i++) {
01201 ot::NodeAndRanks tmp;
01202 tmp.node = recvKp[i];
01203 tmp.ranks.push_back(recvKp[i].getWeight());
01204 recvK2P.push_back(tmp);
01205 }
01206
01207 for (int i=0;i<recvKs.size();i++) {
01208 ot::NodeAndRanks tmp;
01209 tmp.node = recvKs[i];
01210 tmp.ranks.push_back(recvKs[i].getWeight());
01211 recvK2S.push_back(tmp);
01212 }
01213
01214 recvKp.clear();
01215 recvKs.clear();
01216
01217 std::sort(recvK2P.begin(),recvK2P.end());
01218 std::sort(recvK2S.begin(),recvK2S.end());
01219
01220
01221 if (recvK2P.size() >= 2) {
01222 std::vector<ot::NodeAndRanks> tmp(recvK2P.size());
01223 tmp[0] = recvK2P[0];
01224 unsigned int tmpSize=1;
01225 for (unsigned int i=1;i<recvK2P.size();i++) {
01226 if (tmp[tmpSize-1] != recvK2P[i]) {
01227
01228 tmp[tmpSize] = recvK2P[i];
01229 tmpSize++;
01230 } else {
01231 tmp[tmpSize-1].ranks.push_back(recvK2P[i].ranks[0]);
01232 }
01233 }
01234 tmp.resize(tmpSize);
01235 for (unsigned int i=0;i<tmpSize;i++) {
01236 seq::makeVectorUnique<int>(tmp[i].ranks,false);
01237 }
01238 recvK2P = tmp;
01239 tmp.clear();
01240 }
01241
01242
01243 if (recvK2S.size() >= 2) {
01244 std::vector<ot::NodeAndRanks> tmp(recvK2S.size());
01245 tmp[0] = recvK2S[0];
01246 unsigned int tmpSize=1;
01247 for (unsigned int i=1;i<recvK2S.size();i++) {
01248 if (tmp[tmpSize-1] != recvK2S[i]) {
01249
01250 tmp[tmpSize] = recvK2S[i];
01251 tmpSize++;
01252 } else {
01253 tmp[tmpSize-1].ranks.push_back(recvK2S[i].ranks[0]);
01254 }
01255 }
01256 tmp.resize(tmpSize);
01257 for (unsigned int i=0;i<tmpSize;i++) {
01258 seq::makeVectorUnique<int>(tmp[i].ranks,false);
01259 }
01260 recvK2S = tmp;
01261 tmp.clear();
01262 }
01263
01264
01265 std::vector<std::vector<ot::TreeNode> > sendNodesP(m_iNpesActive);
01266 std::vector<std::vector<ot::TreeNode> > sendNodesS(m_iNpesActive);
01267
01268 std::vector<std::vector<int> > idxP(m_iNpesActive);
01269 std::vector<std::vector<int> > idxS(m_iNpesActive);
01270
01271 #ifdef __DEBUG_DA_NLIST__
01272 MPI_Barrier(m_mpiCommActive);
01273 if(!m_iRankActive) {
01274 std::cout<<std::endl;
01275 std::cout<<"Starting Local Search for Second Ring..."<<std::endl;
01276 std::cout<<std::endl;
01277 }
01278 MPI_Barrier(m_mpiCommActive);
01279 #endif
01280
01281
01282 for (unsigned int i=0; i<recvK2P.size();i++) {
01283 unsigned int idx;
01284 bool found = seq::maxLowerBound<ot::TreeNode >(in, recvK2P[i].node, idx,NULL,NULL);
01285
01286 if (found) {
01287 #ifdef __DEBUG_DA_NLIST__
01288 assert( (in[idx].isAncestor(recvK2P[i].node)) || (in[idx] == (recvK2P[i].node)) );
01289 assert( (idx >= m_uiElementBegin) && (idx < m_uiPostGhostBegin) );
01290 #endif
01291
01292 if ( (in[idx].getAnchor() != recvK2P[i].node.getAnchor()) ||
01293 (!(in[idx].getFlag() & ot::TreeNode::NODE)) ) {
01294 found = false;
01295 }
01296 }
01297
01298
01299 for (int j=0; j < recvK2P[i].ranks.size(); j++) {
01300 if ( found ) {
01301
01302 sendNodesP[recvK2P[i].ranks[j]].push_back(in[idx]);
01303 idxP[recvK2P[i].ranks[j]].push_back(idx);
01304 } else {
01305
01306 sendNodesP[recvK2P[i].ranks[j]].push_back(rootNode);
01307 idxP[recvK2P[i].ranks[j]].push_back(-1);
01308 }
01309 sendNodesP[recvK2P[i].ranks[j]][sendNodesP[recvK2P[i].ranks[j]].size()-1].setWeight(m_iRankActive);
01310 }
01311 }
01312
01313 for (unsigned int i=0; i<recvK2S.size();i++) {
01314 unsigned int idx;
01315 bool found = seq::maxLowerBound<ot::TreeNode >(in, recvK2S[i].node, idx, NULL, NULL);
01316 if (found) {
01317 #ifdef __DEBUG_DA_NLIST__
01318 assert( (in[idx].isAncestor(recvK2S[i].node)) || (in[idx] == (recvK2S[i].node)) );
01319 assert( (idx >= m_uiElementBegin) && (idx < m_uiPostGhostBegin) );
01320 #endif
01321 if ( (in[idx].getAnchor() != recvK2S[i].node.getAnchor()) || (!(in[idx].getFlag() & ot::TreeNode::NODE)) ) {
01322 found = false;
01323 }
01324 }
01325
01326 for (int j=0; j < recvK2S[i].ranks.size(); j++) {
01327 if ( found ) {
01328
01329 sendNodesS[recvK2S[i].ranks[j]].push_back(in[idx]);
01330 idxS[recvK2S[i].ranks[j]].push_back(idx);
01331 } else {
01332
01333 sendNodesS[recvK2S[i].ranks[j]].push_back(rootNode);
01334 idxS[recvK2S[i].ranks[j]].push_back(-1);
01335 }
01336 sendNodesS[recvK2S[i].ranks[j]][sendNodesS[recvK2S[i].ranks[j]].size()-1].setWeight(m_iRankActive);
01337 }
01338 }
01339
01340 recvK2P.clear();
01341 recvK2S.clear();
01342
01343 for (unsigned int i = 0; i < m_iNpesActive; i++) {
01344 seq::makeVectorUnique<TreeNode>(sendNodesP[i],false);
01345 seq::makeVectorUnique<TreeNode>(sendNodesS[i],false);
01346 seq::makeVectorUnique<int>(idxP[i],false);
01347 seq::makeVectorUnique<int>(idxS[i],false);
01348 }
01349
01350 numKeysSendP = new int[m_iNpesActive];
01351 numKeysSendS = new int[m_iNpesActive];
01352 numKeysRecvP = new int[m_iNpesActive];
01353 numKeysRecvS = new int[m_iNpesActive];
01354
01355 for (int i=0; i<m_iNpesActive; i++) {
01356 numKeysSendP[i] = static_cast<int>(sendNodesP[i].size());
01357 numKeysSendS[i] = static_cast<int>(sendNodesS[i].size());
01358 }
01359
01360 #ifdef __DEBUG_DA_NLIST__
01361 MPI_Barrier(m_mpiCommActive);
01362 if(!m_iRankActive) {
01363 std::cout<<std::endl;
01364 std::cout<<"Sending Results after Local Search for Second Ring..."<<std::endl;
01365 std::cout<<std::endl;
01366 }
01367 MPI_Barrier(m_mpiCommActive);
01368 #endif
01369
01370
01371 PROF_BUILD_NLIST_COMM_BEGIN
01372
01373 par::Mpi_Alltoall<int>(numKeysSendP, numKeysRecvP, 1, m_mpiCommActive);
01374 par::Mpi_Alltoall<int>(numKeysSendS, numKeysRecvS, 1, m_mpiCommActive);
01375
01376 PROF_BUILD_NLIST_COMM_END
01377
01378
01379 sendOffsetsP = new int[m_iNpesActive]; sendOffsetsP[0] = 0;
01380 sendOffsetsS = new int[m_iNpesActive]; sendOffsetsS[0] = 0;
01381 recvOffsetsP = new int[m_iNpesActive]; recvOffsetsP[0] = 0;
01382 recvOffsetsS = new int[m_iNpesActive]; recvOffsetsS[0] = 0;
01383
01384
01385 for (int i=1; i<m_iNpesActive; i++) {
01386 sendOffsetsP[i] = sendOffsetsP[i-1] + numKeysSendP[i-1];
01387 recvOffsetsP[i] = recvOffsetsP[i-1] + numKeysRecvP[i-1];
01388
01389 sendOffsetsS[i] = sendOffsetsS[i-1] + numKeysSendS[i-1];
01390 recvOffsetsS[i] = recvOffsetsS[i-1] + numKeysRecvS[i-1];
01391 }
01392
01393
01394 sendKp.resize(sendOffsetsP[m_iNpesActive-1] + numKeysSendP[m_iNpesActive-1]);
01395 recvKp.resize(recvOffsetsP[m_iNpesActive-1] + numKeysRecvP[m_iNpesActive-1]);
01396
01397 sendKs.resize(sendOffsetsS[m_iNpesActive-1] + numKeysSendS[m_iNpesActive-1]);
01398 recvKs.resize(recvOffsetsS[m_iNpesActive-1] + numKeysRecvS[m_iNpesActive-1]);
01399
01400 std::vector<int> idxSendKp(sendOffsetsP[m_iNpesActive-1] + numKeysSendP[m_iNpesActive-1]);
01401 std::vector<int> idxSendKs(sendOffsetsS[m_iNpesActive-1] + numKeysSendS[m_iNpesActive-1]);
01402
01403 for (unsigned int i = 0; i < m_iNpesActive; i++) {
01404 for (unsigned int j = 0; j < numKeysSendP[i]; j++) {
01405
01406 sendKp[sendOffsetsP[i] + j] = sendNodesP[i][j];
01407 idxSendKp[sendOffsetsP[i] + j] = idxP[i][j];
01408
01409 #ifdef __DEBUG_DA_NLIST__
01410 assert( ((sendKp[sendOffsetsP[i] + j] == rootNode) && (idxSendKp[sendOffsetsP[i] + j] == -1)) ||
01411 (in[idxSendKp[sendOffsetsP[i] + j]] == sendKp[sendOffsetsP[i] + j]) );
01412 #endif
01413 }
01414 for (unsigned int j = 0; j < numKeysSendS[i]; j++) {
01415
01416 sendKs[sendOffsetsS[i] + j] = sendNodesS[i][j];
01417 idxSendKs[sendOffsetsS[i] + j] = idxS[i][j];
01418
01419 #ifdef __DEBUG_DA_NLIST__
01420 assert( ((sendKs[sendOffsetsS[i] + j] == rootNode) && (idxSendKs[sendOffsetsS[i] + j] == -1)) ||
01421 (in[idxSendKs[sendOffsetsS[i] + j]] == sendKs[sendOffsetsS[i] + j]) );
01422 #endif
01423 }
01424 }
01425
01426 sendNodesP.clear();
01427 sendNodesS.clear();
01428
01429 idxP.clear();
01430 idxS.clear();
01431
01432 sendKpPtr = NULL;
01433 recvKpPtr = NULL;
01434 sendKsPtr = NULL;
01435 recvKsPtr = NULL;
01436 if(!sendKp.empty()) {
01437 sendKpPtr = &(*(sendKp.begin()));
01438 }
01439 if(!sendKs.empty()) {
01440 sendKsPtr = &(*(sendKs.begin()));
01441 }
01442 if(!recvKp.empty()) {
01443 recvKpPtr = &(*(recvKp.begin()));
01444 }
01445 if(!recvKs.empty()) {
01446 recvKsPtr = &(*(recvKs.begin()));
01447 }
01448
01449 PROF_BUILD_NLIST_COMM_BEGIN
01450
01451 par::Mpi_Alltoallv_sparse<ot::TreeNode>( sendKpPtr, numKeysSendP, sendOffsetsP,
01452 recvKpPtr, numKeysRecvP, recvOffsetsP, m_mpiCommActive);
01453
01454 par::Mpi_Alltoallv_sparse<ot::TreeNode>( sendKsPtr, numKeysSendS, sendOffsetsS,
01455 recvKsPtr, numKeysRecvS, recvOffsetsS, m_mpiCommActive);
01456
01457 PROF_BUILD_NLIST_COMM_END
01458
01459
01460 unsigned int actualRecvKpSz = static_cast<unsigned int>(recvKp.size());
01461 unsigned int actualRecvKsSz = static_cast<unsigned int>(recvKs.size());
01462
01463
01464
01465 std::vector<TreeNode> tmpRecvKp;
01466 std::vector<TreeNode> tmpRecvKs;
01467
01468 std::vector<unsigned int> kp2ActualKp;
01469 std::vector<unsigned int> ks2ActualKs;
01470
01471 tmpRecvKp.push_back(rootNode);
01472 kp2ActualKp.push_back(recvKp.size());
01473 for(unsigned int i = 0; i < recvKp.size(); i++) {
01474 if(recvKp[i] > rootNode) {
01475 tmpRecvKp.push_back(recvKp[i]);
01476 kp2ActualKp.push_back(i);
01477 }
01478 }
01479
01480 tmpRecvKs.push_back(rootNode);
01481 ks2ActualKs.push_back(recvKs.size());
01482 for(unsigned int i = 0; i < recvKs.size(); i++) {
01483 if(recvKs[i] > rootNode) {
01484 tmpRecvKs.push_back(recvKs[i]);
01485 ks2ActualKs.push_back(i);
01486 }
01487 }
01488
01489 recvKp = tmpRecvKp;
01490 recvKs = tmpRecvKs;
01491
01492 tmpRecvKp.clear();
01493 tmpRecvKs.clear();
01494
01495 #ifdef __DEBUG_DA_NLIST__
01496 MPI_Barrier(m_mpiCommActive);
01497 assert(seq::test::isUniqueAndSorted(recvKp));
01498 assert(seq::test::isUniqueAndSorted(recvKs));
01499 if(!m_iRankActive) {
01500 std::cout<<std::endl;
01501 std::cout<<"Processing Results in Second Ring..."<<std::endl;
01502 std::cout<<std::endl;
01503 }
01504 MPI_Barrier(m_mpiCommActive);
01505 #endif
01506
01507 unsigned int *oldToNewIdx = NULL;
01508 bool *fromPrimary = NULL;
01509 bool *fromSecondary = NULL;
01510 if(!extraAtEnd.empty()) {
01511 oldToNewIdx = new unsigned int[extraAtEnd.size()];
01512 fromPrimary = new bool[extraAtEnd.size()];
01513 fromSecondary = new bool[extraAtEnd.size()];
01514 }
01515 std::vector<seq::IndexHolder<ot::TreeNode> > extraIndices (extraAtEnd.size());
01516
01517 for (unsigned int i = 0; i < extraAtEnd.size(); i++) {
01518 unsigned int idx;
01519 bool found = seq::maxLowerBound<ot::TreeNode >(recvKp, primaryKeys[i], idx, NULL, NULL);
01520
01521 if (found) {
01522
01523 if (recvKp[idx] == rootNode) {
01524 found = false;
01525 fromPrimary[i] = false;
01526 } else {
01527 #ifdef __DEBUG_DA_NLIST__
01528 assert( recvKp[idx].getFlag() & ot::TreeNode::NODE );
01529 #endif
01530 if( recvKp[idx].getAnchor() != primaryKeys[i].getAnchor() ) {
01531
01532 found = false;
01533 fromPrimary[i] = false;
01534 }else {
01535 oldToNewIdx[i] = idx;
01536 fromPrimary[i] = true;
01537 fromSecondary[i] = false;
01538 TreeNode tmp = recvKp[idx];
01539 tmp.setWeight(extraAtEnd[i].getWeight());
01540 extraAtEnd[i] = tmp;
01541 extraIndices[i].value = &(extraAtEnd[i]);
01542 extraIndices[i].index = i;
01543 }
01544 }
01545 }else {
01546 assert(false);
01547 }
01548
01549 if (!found) {
01550 unsigned int idx;
01551 found = seq::maxLowerBound<ot::TreeNode >(recvKs, secondaryKeys[i], idx, NULL, NULL);
01552
01553 #ifdef __DEBUG_DA_NLIST__
01554 assert( found );
01555 assert( recvKs[idx] != rootNode );
01556 assert( recvKs[idx].getFlag() & ot::TreeNode::NODE );
01557 assert( recvKs[idx].getAnchor() == secondaryKeys[i].getAnchor() );
01558 #endif
01559
01560
01561
01562 unsigned int idxTmp;
01563 ot::TreeNode* inPtr = NULL;
01564 if(!in.empty()) {
01565 inPtr = &(*(in.begin()));
01566 }
01567 found = seq::BinarySearch<ot::TreeNode>(inPtr,
01568 static_cast<unsigned int>(in.size()), recvKs[idx], &idxTmp);
01569
01570 if(found) {
01571 fromSecondary[i] = false;
01572 oldToNewIdx[i] = idxTmp;
01573 TreeNode tmp = in[idxTmp];
01574 tmp.setWeight(extraAtEnd[i].getWeight());
01575 extraAtEnd[i] = tmp;
01576 extraIndices[i].value = &(extraAtEnd[i]);
01577 extraIndices[i].index = i;
01578 }else {
01579 fromSecondary[i] = true;
01580 oldToNewIdx[i] = idx;
01581 TreeNode tmp = recvKs[idx];
01582 tmp.setWeight(extraAtEnd[i].getWeight());
01583 extraAtEnd[i] = tmp;
01584 extraIndices[i].value = &(extraAtEnd[i]);
01585 extraIndices[i].index = i;
01586 }
01587 }
01588 }
01589
01590 primaryKeys.clear();
01591 secondaryKeys.clear();
01592 extraAtEnd.clear();
01593
01594 std::sort(extraIndices.begin(),extraIndices.end());
01595
01596 std::vector<ot::TreeNode> sortedUniqueExtras(extraIndices.size());
01597 std::vector<unsigned int> chosenIndexIntoOld(extraIndices.size());
01598
01599
01600 std::vector<std::vector< unsigned int> > indicesIntoOld(extraIndices.size());
01601
01602
01603 std::vector<std::vector< unsigned int> > indicesIntoLUT(extraIndices.size());
01604
01605 if (!extraIndices.empty()) {
01606 sortedUniqueExtras[0] = *(extraIndices[0].value);
01607 chosenIndexIntoOld[0] = extraIndices[0].index;
01608
01609 indicesIntoOld[0].push_back(extraIndices[0].index);
01610 indicesIntoLUT[0].push_back(extraIndices[0].value->getWeight());
01611 }
01612
01613 if (extraIndices.size() >= 2) {
01614 unsigned int tmpSize=1;
01615 for (unsigned int i=1;i<extraIndices.size();i++) {
01616 if (sortedUniqueExtras[tmpSize-1] != *(extraIndices[i].value)) {
01617
01618 sortedUniqueExtras[tmpSize] = *(extraIndices[i].value);
01619 chosenIndexIntoOld[tmpSize] = extraIndices[i].index;
01620
01621 indicesIntoOld[tmpSize].push_back(extraIndices[i].index);
01622 indicesIntoLUT[tmpSize].push_back(extraIndices[i].value->getWeight());
01623 tmpSize++;
01624 } else {
01625 indicesIntoOld[tmpSize-1].push_back(extraIndices[i].index);
01626 indicesIntoLUT[tmpSize-1].push_back(extraIndices[i].value->getWeight());
01627 }
01628 }
01629
01630 sortedUniqueExtras.resize(tmpSize);
01631
01632 #ifdef __DEBUG_DA_NLIST__
01633 assert(seq::test::isUniqueAndSorted<TreeNode >(sortedUniqueExtras));
01634 #endif
01635
01636 chosenIndexIntoOld.resize(tmpSize);
01637
01638 indicesIntoOld.resize(tmpSize);
01639 indicesIntoLUT.resize(tmpSize);
01640
01641 for (unsigned int i=0;i<tmpSize;i++) {
01642 std::sort(indicesIntoOld[i].begin(),indicesIntoOld[i].end());
01643 std::sort(indicesIntoLUT[i].begin(),indicesIntoLUT[i].end());
01644 #ifdef __DEBUG_DA_NLIST__
01645
01646 assert(seq::test::isUniqueAndSorted<unsigned int>(indicesIntoOld[i]));
01647 assert(seq::test::isUniqueAndSorted<unsigned int>(indicesIntoLUT[i]));
01648 #endif
01649 }
01650
01651 }
01652
01653 extraIndices.clear();
01654
01655
01656
01657
01658
01659
01660
01661
01662
01663
01664
01665
01666
01667
01668
01669
01670
01671
01672
01673
01674
01675
01676
01677
01678 unsigned int *recvCntsExtras = new unsigned int[m_iNpesActive];
01679
01680 std::vector<char> pickingP(actualRecvKpSz);
01681 std::vector<char> pickingS(actualRecvKsSz);
01682
01683 std::vector<char> pickedP(sendKp.size());
01684 std::vector<char> pickedS(sendKs.size());
01685
01686 for (unsigned int i = 0; i < actualRecvKpSz; i++) {
01687 pickingP[i] = 0;
01688 }
01689
01690 for (unsigned int i = 0; i < actualRecvKsSz; i++) {
01691 pickingS[i] = 0;
01692 }
01693
01694 for (unsigned int i = 0; i < m_iNpesActive; i++) {
01695 recvCntsExtras[i] = 0;
01696 }
01697
01698
01699
01700
01701 std::vector< std::vector<unsigned int> > secondRingCorrections;
01702
01703 for (unsigned int i=0;i<sortedUniqueExtras.size();i++) {
01704
01705
01706
01707
01708
01709
01710
01711
01712
01713
01714
01715
01716
01717
01718
01719
01720
01721
01722
01723
01724
01725
01726
01727
01728
01729
01730
01731
01732
01733
01734
01735
01736
01737
01738
01739
01740 if (fromPrimary[chosenIndexIntoOld[i]]) {
01741 #ifdef __DEBUG_DA_NLIST__
01742 assert( recvKp[oldToNewIdx[chosenIndexIntoOld[i]]] == sortedUniqueExtras[i] );
01743 assert( recvKp[oldToNewIdx[chosenIndexIntoOld[i]]] != rootNode );
01744 #endif
01745 in.push_back(recvKp[oldToNewIdx[chosenIndexIntoOld[i]]]);
01746 recvCntsExtras[recvKp[oldToNewIdx[chosenIndexIntoOld[i]]].getWeight()]++;
01747 pickingP[kp2ActualKp[oldToNewIdx[chosenIndexIntoOld[i]]]] = 1;
01748 } else if(fromSecondary[chosenIndexIntoOld[i]]) {
01749 #ifdef __DEBUG_DA_NLIST__
01750 assert( recvKs[oldToNewIdx[chosenIndexIntoOld[i]]] == sortedUniqueExtras[i] );
01751 assert( recvKs[oldToNewIdx[chosenIndexIntoOld[i]]] != rootNode );
01752 #endif
01753 in.push_back(recvKs[oldToNewIdx[chosenIndexIntoOld[i]]]);
01754 recvCntsExtras[recvKs[oldToNewIdx[chosenIndexIntoOld[i]]].getWeight()]++;
01755 pickingS[ks2ActualKs[oldToNewIdx[chosenIndexIntoOld[i]]]] = 1;
01756 }
01757
01758
01759
01760
01761
01762 for (unsigned int j =0;j < indicesIntoLUT[i].size(); j++) {
01763 for (unsigned int k=0; k < 8; k++) {
01764 for (unsigned int l = 0;l < indicesIntoOld[i].size(); l++) {
01765 if (nlist[8*(indicesIntoLUT[i][j])+k] == (m_uiLocalBufferSize + indicesIntoOld[i][l])) {
01766 std::vector<unsigned int> ringCorrectionsTuple(3);
01767 ringCorrectionsTuple[0] = indicesIntoLUT[i][j];
01768 ringCorrectionsTuple[1] = k;
01769
01770 if( fromPrimary[chosenIndexIntoOld[i]] || fromSecondary[chosenIndexIntoOld[i]] ) {
01771 ringCorrectionsTuple[2] = (static_cast<unsigned int>(in.size()) - 1);
01772 }else {
01773 ringCorrectionsTuple[2] = oldToNewIdx[chosenIndexIntoOld[i]];
01774 }
01775
01776 secondRingCorrections.push_back(ringCorrectionsTuple);
01777 break;
01778 }
01779 }
01780 }
01781 }
01782
01783 }
01784
01785 for(unsigned int i = 0; i < secondRingCorrections.size(); i++) {
01786
01787 unsigned int elemId = secondRingCorrections[i][0];
01788 unsigned int vtxId = secondRingCorrections[i][1];
01789 unsigned int lutVal = secondRingCorrections[i][2];
01790 nlist[8*elemId+vtxId] = lutVal;
01791
01792 #ifdef __DEBUG_DA_NLIST__
01793 assert( lutVal < in.size() );
01794 #endif
01795
01796
01797
01798 unsigned int _x,_y,_z;
01799
01800 unsigned int x = in[elemId].getX();
01801 unsigned int y = in[elemId].getY();
01802 unsigned int z = in[elemId].getZ();
01803 unsigned int d = in[elemId].getLevel();
01804 unsigned int sz = (1u << (m_uiMaxDepth - d));
01805
01806 #ifdef __DEBUG_DA_NLIST__
01807
01808
01809
01810 if( (elemId >= m_uiElementBegin) && (elemId < m_uiPostGhostBegin) ) {
01811 if(in[lutVal] < in[m_uiElementBegin]) {
01812 std::cout<<m_iRankActive<<" Trying to send yourself as a Post Ghost ELEMENT for elemId = "
01813 <<elemId<<" vtxId = "<<vtxId<<std::endl;
01814 assert(false);
01815 }
01816 if( (m_uiPostGhostBegin < in.size()) && (in[lutVal] >= in[m_uiPostGhostBegin]) ) {
01817 TreeNode tmpToSend = in[lutVal];
01818 tmpToSend.setWeight(elemId);
01819 checkSecondRing.push_back(tmpToSend);
01820 }
01821 }
01822 #endif
01823
01824 _x = in[lutVal].getX();
01825 _y = in[lutVal].getY();
01826 _z = in[lutVal].getZ();
01827
01828 #ifdef __DEBUG_DA_NLIST__
01829 if ( !(in[lutVal].getFlag() & ot::TreeNode::NODE ) ) {
01830
01831 assert(false);
01832 }
01833 #endif
01834
01835 switch (vtxId) {
01836 case 0:
01837 assert(false);
01838 break;
01839 case 1:
01840 if ( ( (x+sz) != _x ) || ( y != _y ) || ( z != _z ) ) {
01841 m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01842 }
01843 break;
01844 case 2:
01845 if ( ( x != _x ) || ( (y+sz) != _y ) || ( z != _z ) ) {
01846 m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01847 }
01848 break;
01849 case 3:
01850 if ( ( (x+sz) != _x ) || ( (y+sz) != _y ) || ( z != _z ) ) {
01851 m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01852 }
01853 break;
01854 case 4:
01855 if ( ( x != _x ) || ( y != _y ) || ( (z+sz) != _z ) ) {
01856 m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01857 }
01858 break;
01859 case 5:
01860 if ( ( (x+sz) != _x ) || ( y != _y ) || ( (z+sz) != _z ) ) {
01861 m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01862 }
01863 break;
01864 case 6:
01865 if ( ( x != _x ) || ( (y+sz) != _y ) || ( (z+sz) != _z ) ) {
01866 m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01867 }
01868 break;
01869 case 7:
01870 if ( ( (x+sz) != _x ) || ( (y+sz) != _y ) || ( (z+sz) != _z ) ) {
01871 m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01872 }
01873 break;
01874 }
01875
01876 }
01877
01878 secondRingCorrections.clear();
01879
01880
01881
01882
01883 sortedUniqueExtras.clear();
01884 chosenIndexIntoOld.clear();
01885
01886 indicesIntoLUT.clear();
01887 indicesIntoOld.clear();
01888
01889 kp2ActualKp.clear();
01890 ks2ActualKs.clear();
01891
01892 sendKp.clear();
01893 sendKs.clear();
01894 recvKp.clear();
01895 recvKs.clear();
01896
01897 if(oldToNewIdx) {
01898 delete [] oldToNewIdx;
01899 oldToNewIdx = NULL;
01900 }
01901
01902 if(fromPrimary) {
01903 delete [] fromPrimary;
01904 fromPrimary = NULL;
01905 }
01906
01907 if(fromSecondary) {
01908 delete [] fromSecondary;
01909 fromSecondary = NULL;
01910 }
01911
01912
01913 for (unsigned int i=0; i<m_iNpesActive; i++) {
01914 if (recvCntsExtras[i]) {
01915 ScndRecvProcs.push_back(i);
01916 ScndRecvCounts.push_back(recvCntsExtras[i]);
01917 #ifdef __DEBUG_DA_NLIST__
01918 std::cout<<m_iRankActive<<" Scnd Recv P : "<<i<<" Scnd Recv C : "<< recvCntsExtras[i]<<std::endl;
01919 assert(i != m_iRankActive);
01920 #endif
01921 }
01922 }
01923
01924 if(recvCntsExtras) {
01925 delete [] recvCntsExtras;
01926 recvCntsExtras = NULL;
01927 }
01928
01929 #ifdef __DEBUG_DA_NLIST__
01930 MPI_Barrier(m_mpiCommActive);
01931 if(!m_iRankActive) {
01932 std::cout<<std::endl;
01933 std::cout<<"Returning Selection in Second Ring..."<<std::endl;
01934 std::cout<<std::endl;
01935 }
01936 MPI_Barrier(m_mpiCommActive);
01937 #endif
01938
01939 char* pickingPptr = NULL;
01940 char* pickedPptr = NULL;
01941 char* pickingSptr = NULL;
01942 char* pickedSptr = NULL;
01943
01944 if(!pickingP.empty()) {
01945 pickingPptr = &(*(pickingP.begin()));
01946 }
01947 if(!pickingS.empty()) {
01948 pickingSptr = &(*(pickingS.begin()));
01949 }
01950 if(!pickedP.empty()) {
01951 pickedPptr = &(*(pickedP.begin()));
01952 }
01953 if(!pickedS.empty()) {
01954 pickedSptr = &(*(pickedS.begin()));
01955 }
01956
01957 PROF_BUILD_NLIST_COMM_BEGIN
01958
01959 par::Mpi_Alltoallv_sparse<char>(pickingPptr ,numKeysRecvP, recvOffsetsP ,
01960 pickedPptr, numKeysSendP, sendOffsetsP, m_mpiCommActive);
01961
01962 par::Mpi_Alltoallv_sparse<char>(pickingSptr ,numKeysRecvS, recvOffsetsS,
01963 pickedSptr, numKeysSendS, sendOffsetsS , m_mpiCommActive);
01964
01965 PROF_BUILD_NLIST_COMM_END
01966
01967 pickingP.clear();
01968 pickingS.clear();
01969
01970 #ifdef __DEBUG_DA_NLIST__
01971 MPI_Barrier(m_mpiCommActive);
01972 if(!m_iRankActive) {
01973 std::cout<<std::endl;
01974 std::cout<<"Second ScatterMap..."<<std::endl;
01975 std::cout<<std::endl;
01976 }
01977 MPI_Barrier(m_mpiCommActive);
01978 #endif
01979
01980
01981 unsigned int cnt=0, pickedCnt=0;
01982 unsigned int pCnt=0, sCnt=0;
01983 for (unsigned int i=0; i < static_cast<unsigned int>(m_iNpesActive); i++) {
01984 cnt=0;
01985 pickedCnt=0;
01986
01987 while (cnt < (numKeysSendP[i] + numKeysSendS[i]) ) {
01988
01989
01990
01991
01992 if ( (sCnt >= (sendOffsetsS[i] + numKeysSendS[i])) ||
01993 ( ( pCnt < (sendOffsetsP[i] + numKeysSendP[i]) ) &&
01994 ( idxSendKp[pCnt] <= idxSendKs[sCnt]) ) ) {
01995 #ifdef __DEBUG_DA_NLIST__
01996 if(idxSendKp[pCnt] == -1) {
01997 assert(!pickedP[pCnt]);
01998 }
01999 #endif
02000 if (pickedP[pCnt]) {
02001 ScndScatterMap.push_back(idxSendKp[pCnt]);
02002 pickedCnt++;
02003 }
02004 pCnt++;
02005 } else if ( ( pCnt >=
02006 static_cast<unsigned int>(sendOffsetsP[i] + numKeysSendP[i]) ) ||
02007 ( sCnt < static_cast<unsigned int>(sendOffsetsS[i] + numKeysSendS[i]) ) ) {
02008 #ifdef __DEBUG_DA_NLIST__
02009 if(idxSendKs[sCnt] == -1) {
02010 assert(!pickedS[sCnt]);
02011 }
02012 #endif
02013 if (pickedS[sCnt]) {
02014 ScndScatterMap.push_back(idxSendKs[sCnt]);
02015 pickedCnt++;
02016 }
02017 sCnt++;
02018 } else {
02019 std::cout << "Scnd skipped both" << std::endl;
02020 }
02021 cnt++;
02022 }
02023 if (pickedCnt) {
02024 ScndSendProcs.push_back(i);
02025 ScndSendCounts.push_back(pickedCnt);
02026 #ifdef __DEBUG_DA_NLIST__
02027 std::cout<<m_iRankActive<<" Scnd Send P : "<<i<<" Scnd Send C : "<< pickedCnt<<std::endl;
02028 assert(i != m_iRankActive);
02029 #endif
02030 }
02031 }
02032 #ifdef __DEBUG_DA_NLIST__
02033 for(unsigned int i = 0; i < m_iNpesActive; i++) {
02034 unsigned int numToSend_i = 0;
02035 for(unsigned int j = sendOffsetsP[i]; j < (sendOffsetsP[i] + numKeysSendP[i]); j++) {
02036 if(pickedP[j]) {
02037 numToSend_i++;
02038 }
02039 }
02040 for(unsigned int j = sendOffsetsS[i]; j < (sendOffsetsS[i] + numKeysSendS[i]); j++) {
02041 if(pickedS[j]) {
02042 numToSend_i++;
02043 }
02044 }
02045 if(numToSend_i) {
02046 std::cout<<m_iRankActive<<" Actually should send "<<numToSend_i<<" to "<<i<<std::endl;
02047 }
02048 }
02049 #endif
02050
02051
02052 m_uiLocalBufferSize = static_cast<unsigned int>(in.size());
02053
02054 idxSendKp.clear();
02055 idxSendKs.clear();
02056
02057 delete [] sendOffsetsP;
02058 sendOffsetsP = NULL;
02059
02060 delete [] sendOffsetsS;
02061 sendOffsetsS = NULL;
02062
02063 delete [] numKeysSendP;
02064 numKeysSendP = NULL;
02065
02066 delete [] numKeysSendS;
02067 numKeysSendS = NULL;
02068
02069 delete [] numKeysRecvP;
02070 numKeysRecvP = NULL;
02071
02072 delete [] recvOffsetsP;
02073 recvOffsetsP = NULL;
02074
02075 delete [] recvOffsetsS;
02076 recvOffsetsS = NULL;
02077
02078 delete [] numKeysRecvS;
02079 numKeysRecvS = NULL;
02080
02081 pickedP.clear();
02082 pickedS.clear();
02083
02084 #ifdef __DEBUG_DA_NLIST__
02085 MPI_Barrier(m_mpiCommActive);
02086 if(!m_iRankActive) {
02087 std::cout<<std::endl;
02088 std::cout<<"Finished Correction for Second Ring..."<<std::endl;
02089 std::cout<<std::endl;
02090 }
02091 MPI_Barrier(m_mpiCommActive);
02092 #endif
02093
02094
02095
02096 #ifdef __DEBUG_DA_NLIST__
02097 MPI_Barrier(m_mpiCommActive);
02098
02099
02100
02101
02102
02103
02104 std::vector<std::vector<unsigned int> > secondRingExpectedScatterMap(m_iNpesActive);
02105 for(unsigned int i = 0; i < checkSecondRing.size(); i++) {
02106 unsigned int bId;
02107
02108 bool bucketFound = seq::maxLowerBound<ot::TreeNode>(m_tnMinAllBlocks, checkSecondRing[i], bId, NULL, NULL);
02109 assert(bucketFound);
02110 assert(bId < m_iNpesActive);
02111 assert( m_tnMinAllBlocks[bId] <= checkSecondRing[i] );
02112 secondRingExpectedScatterMap[bId].push_back(checkSecondRing[i].getWeight());
02113 }
02114
02115 assert(seq::test::isUniqueAndSorted(m_uipSendProcs));
02116
02117 unsigned int toSendCnt = 0;
02118 unsigned int currOffset = 0;
02119 for(unsigned int i = 0; i < m_iNpesActive; i++) {
02120 seq::makeVectorUnique<unsigned int>(secondRingExpectedScatterMap[i], false) ;
02121 if( (toSendCnt < m_uipSendProcs.size()) && (m_uipSendProcs[toSendCnt] == i) ) {
02122 for(unsigned int j = currOffset; j < (currOffset + m_uipSendCounts[toSendCnt] - 1); j++ ) {
02123 assert(m_uipScatterMap[j] < m_uipScatterMap[j+1]);
02124 }
02125 unsigned int st = 0;
02126 for(unsigned int j = 0; j < secondRingExpectedScatterMap[i].size(); j++) {
02127 bool foundIt = false;
02128 for(unsigned int k = st; k < m_uipSendCounts[toSendCnt]; k++) {
02129 assert( (currOffset + k) < m_uipScatterMap.size() );
02130 if( m_uipScatterMap[currOffset + k] == secondRingExpectedScatterMap[i][j] ) {
02131 foundIt = true;
02132
02133
02134 st = (k+1);
02135 break;
02136 }
02137 }
02138 if(!foundIt) {
02139 std::cout<<m_iRankActive<<" should have sent in["<<
02140 secondRingExpectedScatterMap[i][j]<<"]: "<<in[secondRingExpectedScatterMap[i][j]]
02141 <<" to "<<i<<" in primary. But did not."<<std::endl;
02142 assert(false);
02143 }
02144 }
02145 currOffset += m_uipSendCounts[toSendCnt];
02146 toSendCnt++;
02147 }else if( !(secondRingExpectedScatterMap[i].empty()) ) {
02148 std::cout<<m_iRankActive<<" should have sent some elements to "<<i<<" in primary."
02149 <<" But did not. toSendCnt: "<<toSendCnt<<" primarySz: "<<(m_uipSendProcs.size())<<std::endl;
02150 std::cout<<m_iRankActive<<" For example, This element was not found in primary ScatterMap: "
02151 <<in[secondRingExpectedScatterMap[i][0]]<<std::endl;
02152 assert(false);
02153 }
02154 }
02155
02156 secondRingExpectedScatterMap.clear();
02157 checkSecondRing.clear();
02158
02159 if(!m_iRankActive) {
02160 std::cout<<" Finished Checking Apriori send for Second Ring.."<<std::endl;
02161 }
02162 MPI_Barrier(m_mpiCommActive);
02163 #endif
02164
02165
02166
02167
02168 std::vector<seq::IndexHolder<ot::TreeNode> > inHolder(in.size());
02169 for (unsigned int i=0; i < in.size(); i++) {
02170 in[i].setWeight(i);
02171 inHolder[i].value = &in[i];
02172 if( (i >= nelem) || ( (i < iLoopEnd) &&
02173 (m_ucpLutMasks[(2*i) + 1] == ot::DA_FLAGS::FOREIGN) ) ) {
02174
02175 inHolder[i].index = 0;
02176 } else {
02177 inHolder[i].index = 1;
02178 }
02179 }
02180
02181 #ifdef __DEBUG_DA_NLIST__
02182 MPI_Barrier(m_mpiCommActive);
02183 if(!m_iRankActive) {
02184 std::cout<<std::endl;
02185 std::cout<<"created inHolder..."<<std::endl;
02186 std::cout<<std::endl;
02187 }
02188 MPI_Barrier(m_mpiCommActive);
02189 #endif
02190
02191
02192 std::sort( inHolder.begin(), inHolder.end() );
02193
02194 assert(m_uiElementBegin < in.size());
02195 TreeNode myFirstOctant = in[m_uiElementBegin];
02196 assert( (m_uiPostGhostBegin - 1) < in.size() );
02197 TreeNode myLastOctant = in[m_uiPostGhostBegin - 1];
02198
02199
02200 nelem = 0;
02201 if(m_uiElementSize) {
02202 while( (nelem < in.size()) &&
02203 ((*(inHolder[nelem].value)) <= in[m_uiElementEnd - 1]) ) {
02204 nelem++;
02205 }
02206 } else {
02207 while( ( nelem < in.size() ) &&
02208 ( (*(inHolder[nelem].value)) < myFirstOctant ) &&
02209 ( !((inHolder[nelem].value)->getFlag() & ot::TreeNode::BOUNDARY) ) ) {
02210 nelem++;
02211 }
02212 }
02213
02214 #ifdef __DEBUG_DA_NLIST__
02215 MPI_Barrier(m_mpiCommActive);
02216 if(!m_iRankActive) {
02217 std::cout<<std::endl;
02218 std::cout<<"Corrected NELEM..."<<std::endl;
02219 std::cout<<std::endl;
02220 }
02221 MPI_Barrier(m_mpiCommActive);
02222 #endif
02223
02224
02225
02226 std::vector<unsigned int> oldToNew(in.size());
02227
02228 for (unsigned int i = 0; i < in.size(); i++) {
02229 oldToNew[inHolder[i].value->getWeight()] = i;
02230 }
02231
02232 #ifdef __DEBUG_DA_NLIST__
02233 MPI_Barrier(m_mpiCommActive);
02234 if(!m_iRankActive) {
02235 std::cout<<std::endl;
02236 std::cout<<"Computed Old2New..."<<std::endl;
02237 std::cout<<std::endl;
02238 }
02239 MPI_Barrier(m_mpiCommActive);
02240 #endif
02241
02242
02243 std::vector<ot::TreeNode> tmpIn(in.size());
02244 for(unsigned int i = 0; i < in.size(); i++) {
02245 tmpIn[i] = *(inHolder[i].value);
02246 tmpIn[i].setWeight(inHolder[i].index);
02247 }
02248
02249 in = tmpIn;
02250 tmpIn.clear();
02251 inHolder.clear();
02252
02253 #ifdef __DEBUG_DA_NLIST__
02254 MPI_Barrier(m_mpiCommActive);
02255 if(!m_iRankActive) {
02256 std::cout<<std::endl;
02257 std::cout<<"Reshuffled In."<<std::endl;
02258 std::cout<<std::endl;
02259 }
02260 MPI_Barrier(m_mpiCommActive);
02261 #endif
02262
02263
02264
02265 m_uiPreGhostElementSize = 0;
02266 while( (m_uiPreGhostElementSize < in.size()) && (in[m_uiPreGhostElementSize] < myFirstOctant) ) {
02267 if ( !(in[m_uiPreGhostElementSize].getFlag() & ot::TreeNode::BOUNDARY) ) {
02268 m_uiPreGhostElementSize++;
02269 }else {
02270 break;
02271 }
02272 }
02273
02274 if(nelem != (m_uiPreGhostElementSize + m_uiElementSize) ) {
02275 std::cout<<"Processor "<<m_iRankAll<<" failing: nelem = "<<nelem
02276 <<" pgElemSz = "<<m_uiPreGhostElementSize<<" elemSz = "<<m_uiElementSize<<std::endl;
02277 }
02278 assert( nelem == (m_uiPreGhostElementSize + m_uiElementSize) );
02279 #ifdef __DEBUG_DA_NLIST__
02280 MPI_Barrier(m_mpiCommActive);
02281 if(!m_iRankActive) {
02282 std::cout<<std::endl;
02283 std::cout<<"PreGhost Counters Reset."<<std::endl;
02284 std::cout<<std::endl;
02285 }
02286 MPI_Barrier(m_mpiCommActive);
02287 #endif
02288
02289
02290 m_uiElementBegin = m_uiPreGhostElementSize;
02291 m_uiPostGhostBegin = m_uiPreGhostElementSize;
02292 while( (m_uiPostGhostBegin < in.size()) && (in[m_uiPostGhostBegin] <= myLastOctant) ) {
02293 if(in[m_uiPostGhostBegin] < myFirstOctant) {
02294 m_uiElementBegin++;
02295 }
02296 m_uiPostGhostBegin++;
02297 }
02298 m_uiElementEnd = m_uiElementBegin + m_uiElementSize;
02299
02300 #ifdef __DEBUG_DA_NLIST__
02301 MPI_Barrier(m_mpiCommActive);
02302 if(!m_iRankActive) {
02303 std::cout<<std::endl;
02304 std::cout<<"All Counters Reset."<<std::endl;
02305 std::cout<<std::endl;
02306 }
02307 MPI_Barrier(m_mpiCommActive);
02308 #endif
02309
02310 std::vector<unsigned int> nlistNew(8*nelem);
02311 std::vector<unsigned char> hnMaskNew(nelem);
02312
02313 #ifdef __DEBUG_DA_NLIST__
02314 MPI_Barrier(m_mpiCommActive);
02315 if(!m_iRankActive) {
02316 std::cout<<std::endl;
02317 std::cout<<"Allocated memory for the new nlist and masks."<<std::endl;
02318 std::cout<<std::endl;
02319 }
02320 MPI_Barrier(m_mpiCommActive);
02321 #endif
02322
02323
02324
02325
02326
02327
02328
02329
02330 for (unsigned int i = 0; i < iLoopEnd; i++) {
02331 #ifdef __DEBUG_DA_NLIST__
02332 assert(i < oldToNew.size() );
02333 assert(oldToNew[i] < nelem);
02334 if( numFullLoopCtr == 0 ) {
02335 assert(oldToNew[i] < m_uiPreGhostElementSize);
02336 }
02337 assert( (2*i+1) < m_ucpLutMasks.size() );
02338 #endif
02339
02340 hnMaskNew[oldToNew[i]] = m_ucpLutMasks[2*i + 1];
02341
02342 if(m_ucpLutMasks[2*i+1] == ot::DA_FLAGS::FOREIGN) {
02343 continue;
02344 }
02345
02346 unsigned int iiOld = 8*i;
02347 unsigned int iiNew = 8*oldToNew[i];
02348 for (unsigned int j=0; j<8; j++) {
02349
02350 #ifdef __DEBUG_DA_NLIST__
02351 assert( (iiNew + j) < nlistNew.size() );
02352 assert( (iiOld + j) < nlist.size() );
02353 if( nlist[iiOld + j] >= oldToNew.size() ) {
02354 std::cout<<m_iRankActive<<": oldToNew.size(): "<<oldToNew.size()<<" nlist[8*"<<i<<"+"<<j<<"]: "<<nlist[iiOld+j]<<std::endl
02355 <<"oldToNew["<<i<<"] = "<<oldToNew[i]<<" elem: "<<in[oldToNew[i]]<<std::endl;
02356 assert(false);
02357 }
02358 #endif
02359
02360 nlistNew[iiNew + j] = oldToNew[ nlist[iiOld + j] ];
02361 }
02362 }
02363
02364 #ifdef __DEBUG_DA_NLIST__
02365 MPI_Barrier(m_mpiCommActive);
02366 if(!m_iRankActive) {
02367 std::cout<<std::endl;
02368 std::cout<<"Corrected Nlist."<<std::endl;
02369 std::cout<<std::endl;
02370 }
02371 MPI_Barrier(m_mpiCommActive);
02372 #endif
02373
02374
02375
02376
02377 m_ucpLutMasks.resize(2*nelem);
02378 if(numFullLoopCtr) {
02379 for(unsigned int i = 0; i < nelem; i++) {
02380 if(in[i].getWeight() == 0) {
02381 m_ucpLutMasks[2*i + 1] = ot::DA_FLAGS::FOREIGN;
02382 }else {
02383 m_ucpLutMasks[2*i + 1] = hnMaskNew[i];
02384 }
02385 }
02386 }else {
02387 for(unsigned int i = 0; i < m_uiPreGhostElementSize; i++) {
02388 if(in[i].getWeight() == 0) {
02389 m_ucpLutMasks[2*i + 1] = ot::DA_FLAGS::FOREIGN;
02390 }else {
02391 m_ucpLutMasks[2*i + 1] = hnMaskNew[i];
02392 }
02393 }
02394 }
02395 hnMaskNew.clear();
02396
02397 for(unsigned int i = 0; i < in.size(); i++) {
02398 in[i].setWeight(1);
02399 }
02400
02401 nlist = nlistNew;
02402 nlistNew.clear();
02403
02404 #ifdef __DEBUG_DA_NLIST__
02405 MPI_Barrier(m_mpiCommActive);
02406 if(!m_iRankActive) {
02407 std::cout<<std::endl;
02408 std::cout<<"Corrected HnMasks and Nlists."<<std::endl;
02409 std::cout<<std::endl;
02410 }
02411 MPI_Barrier(m_mpiCommActive);
02412 #endif
02413
02414
02415 std::vector<unsigned int> tmpScatterMap;
02416 std::vector<unsigned int> tmpSendProcs;
02417 std::vector<unsigned int> tmpSendCounts;
02418 std::vector<unsigned int> tmpRecvProcs;
02419 std::vector<unsigned int> tmpRecvCounts;
02420
02421
02422
02423
02424 unsigned int primaryCnt = 0;
02425 unsigned int secondaryCnt = 0;
02426 while( (primaryCnt < m_uipSendProcs.size()) && (secondaryCnt < ScndSendProcs.size()) ) {
02427 if( m_uipSendProcs[primaryCnt] < ScndSendProcs[secondaryCnt] ) {
02428 tmpSendProcs.push_back(m_uipSendProcs[primaryCnt]);
02429 tmpSendCounts.push_back(m_uipSendCounts[primaryCnt]);
02430 primaryCnt++;
02431 }else if( m_uipSendProcs[primaryCnt] > ScndSendProcs[secondaryCnt] ) {
02432 tmpSendProcs.push_back(ScndSendProcs[secondaryCnt]);
02433 tmpSendCounts.push_back(ScndSendCounts[secondaryCnt]);
02434 secondaryCnt++;
02435 }else {
02436
02437
02438 tmpSendProcs.push_back(m_uipSendProcs[primaryCnt]);
02439
02440 tmpSendCounts.push_back(m_uipSendCounts[primaryCnt] + ScndSendCounts[secondaryCnt]);
02441 primaryCnt++;
02442
02443 secondaryCnt++;
02444 }
02445 }
02446
02447
02448 while( primaryCnt < m_uipSendProcs.size() ) {
02449 tmpSendProcs.push_back(m_uipSendProcs[primaryCnt]);
02450 tmpSendCounts.push_back(m_uipSendCounts[primaryCnt]);
02451 primaryCnt++;
02452 }
02453
02454
02455 while( secondaryCnt < ScndSendProcs.size() ) {
02456 tmpSendProcs.push_back(ScndSendProcs[secondaryCnt]);
02457 tmpSendCounts.push_back(ScndSendCounts[secondaryCnt]);
02458 secondaryCnt++;
02459 }
02460
02461 #ifdef __DEBUG_DA_NLIST__
02462 MPI_Barrier(m_mpiCommActive);
02463 assert( seq::test::isUniqueAndSorted<unsigned int>(m_uipSendProcs) );
02464 assert( seq::test::isUniqueAndSorted<unsigned int>(ScndSendProcs) );
02465 assert( seq::test::isUniqueAndSorted<unsigned int>(tmpSendProcs) );
02466 for( unsigned int i = 0; i < m_uipSendProcs.size(); i++) {
02467 std::cout<<m_iRankActive<<" --> "<<m_uipSendProcs[i]<<" (P) "<<m_uipSendCounts[i]<<std::endl;
02468 }
02469 std::cout<<m_iRankActive<<" P-Scatter: "<<m_uipScatterMap.size()<<std::endl;
02470 assert( m_uipSendProcs.size() == m_uipSendCounts.size() );
02471 MPI_Barrier(m_mpiCommActive);
02472 for( unsigned int i = 0; i < ScndSendProcs.size(); i++) {
02473 std::cout<<m_iRankActive<<" --> "<<ScndSendProcs[i]<<" (S) "<<ScndSendCounts[i]<<std::endl;
02474 }
02475 std::cout<<m_iRankActive<<" S-Scatter: "<<ScndScatterMap.size()<<std::endl;
02476 assert( ScndSendProcs.size() == ScndSendCounts.size() );
02477 MPI_Barrier(m_mpiCommActive);
02478 for( unsigned int i = 0; i < tmpSendProcs.size(); i++) {
02479 std::cout<<m_iRankActive<<" --> "<<tmpSendProcs[i]<<" (T) "<<tmpSendCounts[i]<<std::endl;
02480 }
02481 assert( tmpSendProcs.size() == tmpSendCounts.size() );
02482 MPI_Barrier(m_mpiCommActive);
02483 if(!m_iRankActive) {
02484 std::cout<<std::endl;
02485 std::cout<<"Corrected SendCnts and SendProcs."<<std::endl;
02486 std::cout<<std::endl;
02487 }
02488 MPI_Barrier(m_mpiCommActive);
02489 #endif
02490
02491
02492
02493
02494 primaryCnt = 0;
02495 secondaryCnt = 0;
02496 unsigned int primarySz = 0;
02497 unsigned int secondarySz = 0;
02498 while( (primaryCnt < m_uipSendProcs.size()) && (secondaryCnt < ScndSendProcs.size()) ) {
02499 if( m_uipSendProcs[primaryCnt] < ScndSendProcs[secondaryCnt] ) {
02500 unsigned int numSent = 0;
02501 while( numSent < m_uipSendCounts[primaryCnt] ) {
02502 tmpScatterMap.push_back(oldToNew[m_uipScatterMap[primarySz++]]);
02503 numSent++;
02504 }
02505 primaryCnt++;
02506 }else if( m_uipSendProcs[primaryCnt] > ScndSendProcs[secondaryCnt] ) {
02507 unsigned int numSent = 0;
02508 while( numSent < ScndSendCounts[secondaryCnt] ) {
02509 tmpScatterMap.push_back(oldToNew[ScndScatterMap[secondarySz++]]);
02510 numSent++;
02511 }
02512 secondaryCnt++;
02513 }else {
02514
02515
02516 unsigned int nP = 0;
02517 unsigned int nS = 0;
02518
02519
02520 while( ( nP < m_uipSendCounts[primaryCnt] ) && ( nS < ScndSendCounts[secondaryCnt] ) ) {
02521 #ifdef __DEBUG_DA_NLIST__
02522
02523
02524 if( m_uipScatterMap[primarySz] == ScndScatterMap[secondarySz] ) {
02525 std::cout<<m_iRankActive<<" is sending "<<
02526 in[oldToNew[m_uipScatterMap[primarySz]]]<<" to "
02527 <<m_uipSendProcs[primaryCnt]<<" in both primary and secondary."<<std::endl;
02528 assert(false);
02529 }
02530 #endif
02531
02532
02533
02534 if( m_uipScatterMap[primarySz] < ScndScatterMap[secondarySz] ) {
02535 #ifdef __DEBUG_DA_NLIST__
02536 assert( oldToNew[m_uipScatterMap[primarySz]] < oldToNew[ScndScatterMap[secondarySz]] );
02537 #endif
02538 tmpScatterMap.push_back(oldToNew[m_uipScatterMap[primarySz++]]);
02539 nP++;
02540 }else {
02541 #ifdef __DEBUG_DA_NLIST__
02542 assert( oldToNew[m_uipScatterMap[primarySz]] > oldToNew[ScndScatterMap[secondarySz]] );
02543 #endif
02544 tmpScatterMap.push_back(oldToNew[ScndScatterMap[secondarySz++]]);
02545 nS++;
02546 }
02547 }
02548
02549
02550 while( nP < m_uipSendCounts[primaryCnt] ) {
02551 tmpScatterMap.push_back(oldToNew[m_uipScatterMap[primarySz++]]);
02552 nP++;
02553 }
02554
02555
02556 while( nS < ScndSendCounts[secondaryCnt] ) {
02557 tmpScatterMap.push_back(oldToNew[ScndScatterMap[secondarySz++]]);
02558 nS++;
02559 }
02560
02561 primaryCnt++;
02562 secondaryCnt++;
02563 }
02564 }
02565
02566 #ifdef __DEBUG_DA_NLIST__
02567
02568 if( primarySz < m_uipScatterMap.size() ) {
02569 assert(primaryCnt < m_uipSendProcs.size());
02570 }else {
02571 assert(primaryCnt == m_uipSendProcs.size());
02572 }
02573
02574 if( secondarySz < ScndScatterMap.size() ) {
02575 assert(secondaryCnt < ScndSendProcs.size());
02576 }else {
02577 assert(secondaryCnt == ScndSendProcs.size());
02578 }
02579 #endif
02580
02581
02582 while( primarySz < m_uipScatterMap.size() ) {
02583 tmpScatterMap.push_back(oldToNew[m_uipScatterMap[primarySz++]]);
02584 }
02585
02586
02587 while( secondarySz < ScndScatterMap.size() ) {
02588 tmpScatterMap.push_back(oldToNew[ScndScatterMap[secondarySz++]]);
02589 }
02590
02591 #ifdef __DEBUG_DA_NLIST__
02592 MPI_Barrier(m_mpiCommActive);
02593 if(!m_iRankActive) {
02594 std::cout<<std::endl;
02595 std::cout<<"Merged Primary and Secondary Scatter Maps."<<std::endl;
02596 std::cout<<std::endl;
02597 }
02598
02599 std::cout<<m_iRankActive<<" pScatterSz: "<<m_uipScatterMap.size()<<" sScatterSz: "
02600 <<ScndScatterMap.size()<<" tScatterSz: "<<tmpScatterMap.size()<<std::endl;
02601
02602 unsigned int debug_pCnt = 0;
02603 unsigned int debug_sCnt = 0;
02604 unsigned int debug_tCnt = 0;
02605 unsigned int debug_pOff = 0;
02606 unsigned int debug_sOff = 0;
02607 unsigned int debug_tOff = 0;
02608 for(unsigned int i = 0; i< m_iNpesActive; i++) {
02609 std::vector<unsigned int> pSctList;
02610 std::vector<unsigned int> sSctList;
02611 std::vector<unsigned int> tSctList;
02612 if( (debug_pCnt < m_uipSendProcs.size()) && (m_uipSendProcs[debug_pCnt] == i) ) {
02613 for(unsigned int j = 0; j < m_uipSendCounts[debug_pCnt]; j++) {
02614 pSctList.push_back(m_uipScatterMap[debug_pOff + j]);
02615 }
02616 debug_pOff += m_uipSendCounts[debug_pCnt];
02617 debug_pCnt++;
02618 }
02619 if( (debug_sCnt < ScndSendProcs.size()) && (ScndSendProcs[debug_sCnt] == i) ) {
02620 for(unsigned int j = 0; j < ScndSendCounts[debug_sCnt]; j++) {
02621 sSctList.push_back(ScndScatterMap[debug_sOff + j]);
02622 }
02623 debug_sOff += ScndSendCounts[debug_sCnt];
02624 debug_sCnt++;
02625 }
02626 if( (debug_tCnt < tmpSendProcs.size()) && (tmpSendProcs[debug_tCnt] == i) ) {
02627 for(unsigned int j = 0; j < tmpSendCounts[debug_tCnt]; j++) {
02628 tSctList.push_back(tmpScatterMap[debug_tOff + j]);
02629 }
02630 debug_tOff += tmpSendCounts[debug_tCnt];
02631 debug_tCnt++;
02632 }
02633
02634 assert( tSctList.size() == (pSctList.size() + sSctList.size()) );
02635
02636 assert( seq::test::isUniqueAndSorted<unsigned int>(pSctList) );
02637 assert( seq::test::isUniqueAndSorted<unsigned int>(sSctList) );
02638
02639
02640 std::vector<unsigned int> ttList;
02641 for(unsigned int j=0; j < sSctList.size(); j++) {
02642 ttList.push_back(oldToNew[sSctList[j]]);
02643 }
02644 for(unsigned int j=0; j < pSctList.size(); j++) {
02645 ttList.push_back(oldToNew[pSctList[j]]);
02646 }
02647
02648 sort(ttList.begin(),ttList.end());
02649 assert( seq::test::isUniqueAndSorted<unsigned int>(ttList) );
02650
02651
02652 assert(ttList.size() == tSctList.size());
02653 for (unsigned int j = 0; j < ttList.size(); j++) {
02654 if (ttList[j] != tSctList[j]) {
02655 std::cout << m_iRankActive << ": MergeFailed at " << j << std::endl;
02656 std::cout << ttList[j] << " != " << tSctList[j] << std::endl;
02657 for (unsigned int k = 0; k < pSctList.size(); k++) {
02658 std::cout << ttList[k] << ", " << tSctList[k]
02659 << ", " << pSctList[k] << std::endl;
02660 }
02661 for (unsigned int k = 0; k < sSctList.size(); k++) {
02662 std::cout << sSctList[k] << std::endl;
02663 }
02664 assert(false);
02665 }
02666 }
02667
02668 ttList.clear();
02669 pSctList.clear();
02670 sSctList.clear();
02671 tSctList.clear();
02672 }
02673
02674 assert( debug_pCnt == m_uipSendProcs.size() );
02675 assert( debug_sCnt == ScndSendProcs.size() );
02676 assert( debug_tCnt == tmpSendProcs.size() );
02677
02678 assert( tmpScatterMap.size() == (m_uipScatterMap.size() + ScndScatterMap.size()) );
02679 MPI_Barrier(m_mpiCommActive);
02680 #endif
02681
02682 oldToNew.clear();
02683
02684 primaryCnt = 0;
02685 secondaryCnt = 0;
02686 while( (primaryCnt < m_uipRecvProcs.size()) && (secondaryCnt < ScndRecvProcs.size()) ) {
02687 if( m_uipRecvProcs[primaryCnt] < ScndRecvProcs[secondaryCnt] ) {
02688 tmpRecvProcs.push_back(m_uipRecvProcs[primaryCnt]);
02689 tmpRecvCounts.push_back(m_uipRecvCounts[primaryCnt]);
02690 primaryCnt++;
02691 }else if( m_uipRecvProcs[primaryCnt] > ScndRecvProcs[secondaryCnt] ) {
02692 tmpRecvProcs.push_back(ScndRecvProcs[secondaryCnt]);
02693 tmpRecvCounts.push_back(ScndRecvCounts[secondaryCnt]);
02694 secondaryCnt++;
02695 }else {
02696
02697 tmpRecvProcs.push_back(m_uipRecvProcs[primaryCnt]);
02698 tmpRecvCounts.push_back(m_uipRecvCounts[primaryCnt] + ScndRecvCounts[secondaryCnt]);
02699 primaryCnt++;
02700
02701 secondaryCnt++;
02702 }
02703 }
02704
02705 while( primaryCnt < m_uipRecvProcs.size() ) {
02706 tmpRecvProcs.push_back(m_uipRecvProcs[primaryCnt]);
02707 tmpRecvCounts.push_back(m_uipRecvCounts[primaryCnt]);
02708 primaryCnt++;
02709 }
02710
02711 while( secondaryCnt < ScndRecvProcs.size() ) {
02712 tmpRecvProcs.push_back(ScndRecvProcs[secondaryCnt]);
02713 tmpRecvCounts.push_back(ScndRecvCounts[secondaryCnt]);
02714 secondaryCnt++;
02715 }
02716
02717 #ifdef __DEBUG_DA_NLIST__
02718 MPI_Barrier(m_mpiCommActive);
02719 assert( seq::test::isUniqueAndSorted<unsigned int>(m_uipRecvProcs) );
02720 assert( seq::test::isUniqueAndSorted<unsigned int>(ScndRecvProcs) );
02721 assert( seq::test::isUniqueAndSorted<unsigned int>(tmpRecvProcs) );
02722 for( unsigned int i = 0; i < m_uipRecvProcs.size(); i++) {
02723 std::cout<<m_iRankActive<<" <-- "<<m_uipRecvProcs[i]<<" (P) "<<m_uipRecvCounts[i]<<std::endl;
02724 }
02725 assert( m_uipRecvProcs.size() == m_uipRecvCounts.size() );
02726 MPI_Barrier(m_mpiCommActive);
02727 for( unsigned int i = 0; i < ScndRecvProcs.size(); i++) {
02728 std::cout<<m_iRankActive<<" <-- "<<ScndRecvProcs[i]<<" (S) "<<ScndRecvCounts[i]<<std::endl;
02729 }
02730 assert( ScndRecvProcs.size() == ScndRecvCounts.size() );
02731 MPI_Barrier(m_mpiCommActive);
02732 for( unsigned int i = 0; i < tmpRecvProcs.size(); i++) {
02733 std::cout<<m_iRankActive<<" <-- "<<tmpRecvProcs[i]<<" (T) "<<tmpRecvCounts[i]<<std::endl;
02734 }
02735 assert( tmpRecvProcs.size() == tmpRecvCounts.size() );
02736 MPI_Barrier(m_mpiCommActive);
02737 #endif
02738
02739 ScndScatterMap.clear();
02740 ScndSendProcs.clear();
02741 ScndSendCounts.clear();
02742 ScndRecvProcs.clear();
02743 ScndRecvCounts.clear();
02744
02745 m_uipScatterMap = tmpScatterMap;
02746 m_uipSendProcs = tmpSendProcs;
02747 m_uipSendCounts = tmpSendCounts;
02748 m_uipRecvProcs = tmpRecvProcs;
02749 m_uipRecvCounts = tmpRecvCounts;
02750
02751 tmpScatterMap.clear();
02752 tmpSendProcs.clear();
02753 tmpSendCounts.clear();
02754 tmpRecvProcs.clear();
02755 tmpRecvCounts.clear();
02756
02757 #ifdef __DEBUG_DA_NLIST__
02758 MPI_Barrier(m_mpiCommActive);
02759 if(!m_iRankActive) {
02760 std::cout<<std::endl;
02761 std::cout<<"Corrected RecvCounts and RecvProcs."<<std::endl;
02762 std::cout<<std::endl;
02763 }
02764 MPI_Barrier(m_mpiCommActive);
02765 #endif
02766
02767 }
02768
02769
02770 m_uipSendOffsets.resize(m_uipSendCounts.size());
02771 m_uipRecvOffsets.resize(m_uipRecvCounts.size());
02772
02773 if ( m_uipSendCounts.size() ) {
02774 m_uipSendOffsets[0] = 0;
02775 for (unsigned int i=1; i < m_uipSendCounts.size(); i++) {
02776 m_uipSendOffsets[i] = (m_uipSendCounts[i-1] + m_uipSendOffsets[i-1]);
02777 }
02778 }
02779
02780 if ( m_uipRecvCounts.size() ) {
02781 bool adjustedAlready = false;
02782 if(m_uipRecvProcs[0] < static_cast<unsigned int>(m_iRankActive)) {
02783 m_uipRecvOffsets[0] = 0;
02784 }else {
02785 m_uipRecvOffsets[0] = m_uiPostGhostBegin;
02786 adjustedAlready = true;
02787 }
02788 for (unsigned int i=1; i < m_uipRecvCounts.size(); i++) {
02789 if( (m_uipRecvProcs[i] < m_iRankActive) || adjustedAlready ) {
02790 m_uipRecvOffsets[i] = (m_uipRecvCounts[i-1] + m_uipRecvOffsets[i-1]);
02791 }else {
02792 m_uipRecvOffsets[i] = m_uiPostGhostBegin;
02793 adjustedAlready = true;
02794 }
02795 }
02796 }
02797
02798
02799
02800
02801 m_uipElemScatterMap.clear();
02802 m_uipElemSendOffsets.clear();
02803 m_uipElemSendProcs.clear();
02804 m_uipElemSendCounts.clear();
02805 for(unsigned int i = 0; i < m_uipSendProcs.size(); i++) {
02806 if(m_uipSendProcs[i] > static_cast<unsigned int>(m_iRankActive)) {
02807 unsigned int numElemProcs = static_cast<unsigned int>(m_uipElemSendOffsets.size());
02808 m_uipElemSendOffsets.push_back(m_uipElemScatterMap.size());
02809 for(unsigned int j = m_uipSendOffsets[i]; j < (m_uipSendCounts[i] + m_uipSendOffsets[i]); j++) {
02810 if(m_uipScatterMap[j] < m_uiElementEnd) {
02811 m_uipElemScatterMap.push_back(m_uipScatterMap[j]);
02812 }else {
02813 break;
02814 }
02815 }
02816 unsigned int currCount =
02817 (static_cast<unsigned int>(m_uipElemScatterMap.size())
02818 - m_uipElemSendOffsets[numElemProcs]);
02819 if(currCount) {
02820 m_uipElemSendProcs.push_back(m_uipSendProcs[i]);
02821 m_uipElemSendCounts.push_back(currCount);
02822 }else {
02823 m_uipElemSendOffsets.resize(numElemProcs);
02824 }
02825 }
02826 }
02827
02828 m_uipElemRecvOffsets.clear();
02829 m_uipElemRecvProcs.clear();
02830 m_uipElemRecvCounts.clear();
02831 for(unsigned int i = 0; i < m_uipRecvProcs.size(); i++) {
02832 if(m_uipRecvOffsets[i] < m_uiPreGhostElementSize) {
02833
02834 m_uipElemRecvProcs.push_back(m_uipRecvProcs[i]);
02835 m_uipElemRecvOffsets.push_back(m_uipRecvOffsets[i]);
02836 if( (m_uipRecvOffsets[i] + m_uipRecvCounts[i]) <= m_uiPreGhostElementSize) {
02837 m_uipElemRecvCounts.push_back(m_uipRecvCounts[i]);
02838 } else {
02839 m_uipElemRecvCounts.push_back(m_uiPreGhostElementSize - m_uipRecvOffsets[i]);
02840 }
02841 }
02842 }
02843
02844
02845
02846
02847
02848
02849 for ( unsigned int i = 0; i < m_uipScatterMap.size(); i++) {
02850 unsigned int idx = m_uipScatterMap[i];
02851 #ifdef __DEBUG_DA_NLIST__
02852 assert( (idx >= m_uiElementBegin) && (idx < m_uiPostGhostBegin) );
02853 #endif
02854
02855 if ( idx < nelem) {
02856 if (m_ucpLutMasks[2*idx + 1] & 1) {
02857 #ifdef __DEBUG_DA_NLIST__
02858 assert( !(in[idx].getFlag() & ot::TreeNode::NODE) );
02859 #endif
02860
02861 m_uipScatterMap[i] = nlist[8*idx];
02862 }
02863 } else {
02864
02865
02866 if ( !(in[idx].getFlag() & ot::TreeNode::NODE)) {
02867 ot::TreeNode tn = in[idx].getParent();
02868 while ( in[idx] > tn) {
02869 idx--;
02870 }
02871
02872
02873 idx++;
02874 m_uipScatterMap[i] = idx;
02875 #ifdef __DEBUG_DA_NLIST__
02876 assert( in[m_uipScatterMap[i]].getAnchor() == tn.getAnchor() );
02877 assert( (m_uipScatterMap[i] >= m_uiElementBegin) && (m_uipScatterMap[i] < m_uiPostGhostBegin) );
02878 assert( in[m_uipScatterMap[i]].getFlag() & ot::TreeNode::NODE );
02879 #endif
02880 }
02881 }
02882 }
02883
02884 #ifdef __DEBUG_DA_NLIST__
02885 MPI_Barrier(m_mpiCommActive);
02886 if(!m_iRankActive) {
02887 std::cout<<std::endl;
02888 std::cout<<"Corrected New Scatter Map for hanging anchors."<<std::endl;
02889 std::cout<<std::endl;
02890 }
02891 MPI_Barrier(m_mpiCommActive);
02892 #endif
02893
02894
02895 for(unsigned int i=0;i<nelem;i++) {
02896 if(m_ucpLutMasks[2*i+1] == ot::DA_FLAGS::FOREIGN) {
02897 for(unsigned int j=0;j<8;j++) {
02898 nlist[8*i + j] = i;
02899 }
02900 }
02901 }
02902
02903
02904 bool *isNode = NULL;
02905 if(m_uiLocalBufferSize) {
02906 isNode = new bool[m_uiLocalBufferSize];
02907 }
02908 for (unsigned int i = 0; i < m_uiLocalBufferSize; i++) {
02909 isNode[i] = false;
02910 }
02911
02912 for ( unsigned int i = 0; i < nelem; i++) {
02913 if(m_ucpLutMasks[2*i+1] == ot::DA_FLAGS::FOREIGN) {
02914 continue;
02915 }
02916 for (unsigned int j = 0; j < 8; j++) {
02917 #ifdef __DEBUG_DA_NLIST__
02918 assert(nlist[8*i + j] < m_uiLocalBufferSize);
02919 #endif
02920 isNode[nlist[8*i + j]] = true;
02921 }
02922 }
02923
02924 for (unsigned int i = 0; i < m_uiLocalBufferSize; i++) {
02925 if ( isNode[i] ) {
02926 #ifdef __DEBUG_DA_NLIST__
02927 if ( (i >= m_uiElementBegin) && (i < m_uiPostGhostBegin) ) {
02928 assert( (in[i].getFlag() & ot::TreeNode::NODE) );
02929 }
02930 #endif
02931 in[i].orFlag( ot::TreeNode::NODE );
02932 }
02933 }
02934
02935 #ifdef __DEBUG_DA_NLIST__
02936 MPI_Barrier(m_mpiCommActive);
02937 if(!m_iRankActive) {
02938 std::cout<<std::endl;
02939 std::cout<<"Finished is NODE..."<<std::endl;
02940 std::cout<<std::endl;
02941 }
02942 MPI_Barrier(m_mpiCommActive);
02943 #endif
02944
02945
02946 unsigned int elemNodeSz = 0;
02947 unsigned int bndNodeSz = 0;
02948 unsigned int preGhostNodeSz = 0;
02949 unsigned int preBndNodeSz = 0;
02950 unsigned int postGhostNodeSz = 0;
02951 unsigned int postBndNodeSz = 0;
02952
02953 for (unsigned int i = 0; i < m_uiElementBegin; i++) {
02954 if ( isNode[i] && (!(in[i].getFlag() & ot::TreeNode::BOUNDARY)) ) {
02955 preGhostNodeSz++;
02956 } else if (isNode[i]) {
02957 preBndNodeSz++;
02958 }
02959 }
02960
02961
02962 for (unsigned int i = m_uiElementBegin; i < m_uiElementEnd; i++) {
02963 if (isNode[i]) {
02964 elemNodeSz++;
02965 }
02966 }
02967
02968 for (unsigned int i = m_uiElementEnd; i < m_uiPostGhostBegin; i++) {
02969 if (isNode[i]) {
02970 bndNodeSz++;
02971 }
02972 }
02973
02974 for (unsigned int i = m_uiPostGhostBegin; i < m_uiLocalBufferSize; i++) {
02975 if (isNode[i] && (!(in[i].getFlag() & ot::TreeNode::BOUNDARY)) ) {
02976 postGhostNodeSz++;
02977 } else if (isNode[i]) {
02978 postBndNodeSz++;
02979 }
02980 }
02981
02982 #ifdef __DEBUG_DA_NLIST__
02983 MPI_Barrier(m_mpiCommActive);
02984 if(!m_iRankActive) {
02985 std::cout<<std::endl;
02986 std::cout<<"Finished setting sizes..."<<std::endl;
02987 std::cout<<std::endl;
02988 }
02989 MPI_Barrier(m_mpiCommActive);
02990 #endif
02991
02992 if(isNode) {
02993 delete [] isNode;
02994 isNode = NULL;
02995 }
02996
02997
02998 m_uiPreGhostNodeSize = preGhostNodeSz;
02999 m_uiPreGhostBoundaryNodeSize = preBndNodeSz;
03000 m_uiPostGhostNodeSize = postGhostNodeSz + postBndNodeSz;
03001 m_uiNodeSize = elemNodeSz;
03002 m_uiBoundaryNodeSize = bndNodeSz;
03003
03004 #ifdef __DEBUG_DA_NLIST__
03005 MPI_Barrier(m_mpiCommActive);
03006 std::cout<<m_iRankActive<<" Just Before Compression...."<<std::endl<<std::endl;
03007 std::cout<<m_iRankActive<<" "<<m_uiPreGhostNodeSize<<" "<<m_uiPreGhostBoundaryNodeSize
03008 <<" "<<m_uiPostGhostNodeSize<<" "<<m_uiNodeSize<<" "
03009 <<m_uiBoundaryNodeSize<<std::endl<<std::endl;
03010 MPI_Barrier(m_mpiCommActive);
03011 #endif
03012
03013
03014
03015
03016
03017 if(m_bCompressLut) {
03018 m_ucpLutRemainders.resize(8*nelem);
03019 m_ucpSortOrders.resize(nelem);
03020 }
03021
03022 bool foundBeg = false;
03023 bool foundEnd = false;
03024
03025 m_uiIndependentElementSize = 0;
03026 for (unsigned int i = 0; i < nelem; i++) {
03027
03028 unsigned int ii = 8*i;
03029
03030 unsigned int x = in[i].getX();
03031 unsigned int y = in[i].getY();
03032 unsigned int z = in[i].getZ();
03033
03034 if(m_bCompressLut) {
03035 unsigned int d = in[i].getLevel();
03036 unsigned int sz = 1u << (m_uiMaxDepth - d);
03037
03038 if(!(m_ucpLutMasks[2*i+1])){
03039 unsigned int xp = x + sz;
03040 unsigned int yp = y + sz;
03041 unsigned int zp = z + sz;
03042
03043 unsigned int _x = x^xp;
03044 unsigned int _y = y^yp;
03045 unsigned int _z = z^zp;
03046
03047 if (_x > _y) {
03048 if ( _y > _z) {
03049 m_ucpSortOrders[i] = ot::DA_FLAGS::ZYX;
03050 } else if ( _x > _z ) {
03051 m_ucpSortOrders[i] = ot::DA_FLAGS::YZX;
03052 } else {
03053 m_ucpSortOrders[i] = ot::DA_FLAGS::YXZ;
03054 }
03055 } else {
03056 if ( _x > _z) {
03057 m_ucpSortOrders[i] = ot::DA_FLAGS::ZXY;
03058 } else if ( _y > _z ) {
03059 m_ucpSortOrders[i] = ot::DA_FLAGS::XZY;
03060 } else {
03061 m_ucpSortOrders[i] = ot::DA_FLAGS::XYZ;
03062 }
03063 }
03064 }else {
03065
03066 unsigned int len_par = (unsigned int)(1u << ( m_uiMaxDepth - d +1 ) );
03067
03068 unsigned int a = x % len_par;
03069 unsigned int b = y % len_par;
03070 unsigned int c = z % len_par;
03071
03072 a /= sz;
03073 b /= sz;
03074 c /= sz;
03075
03076 m_ucpSortOrders[i] = (4*c + 2*b + a);
03077 }
03078 }
03079
03080
03081 if ( !foundBeg && !(in[i].getFlag() & ot::DA_FLAGS::DEP_ELEM)
03082 && (m_ucpLutMasks[2*i+1] != ot::DA_FLAGS::FOREIGN) ) {
03083
03084 foundBeg = true;
03085 m_uiIndependentElementBegin = i;
03086 m_ptIndependentOffset = Point(x,y,z);
03087 }
03088
03089
03090 if ( !foundEnd && !(in[nelem-i-1].getFlag() & ot::DA_FLAGS::DEP_ELEM)
03091 && (m_ucpLutMasks[2*(nelem-i-1)+1] != ot::DA_FLAGS::FOREIGN) ) {
03092
03093 foundEnd = true;
03094 m_uiIndependentElementEnd = nelem - i;
03095 }
03096
03097
03098
03099
03100 if( (!(in[i].getFlag() & ot::DA_FLAGS::DEP_ELEM)) &&
03101 (m_ucpLutMasks[2*i+1] != ot::DA_FLAGS::FOREIGN) ) {
03102 m_uiIndependentElementSize++;
03103 }
03104
03105 if ( i == m_uiElementBegin ) {
03106 m_uiElementQuotient = static_cast<unsigned int>(m_uspLutQuotients.size());
03107 }
03108
03109 if ( i == m_uiIndependentElementBegin ) {
03110 m_uiIndependentElementQuotient = static_cast<unsigned int>(m_uspLutQuotients.size());
03111 }
03112
03113
03114 m_ucpLutMasks[2*i] = 0;
03115
03116
03117 if(m_bCompressLut) {
03118
03119
03120
03121
03122
03123
03124 unsigned short q;
03125 unsigned int offset;
03126 std::vector<unsigned int> nl(8);
03127 for (unsigned int ij = 0; ij < 8; ij++) {
03128 nl[ij] = nlist[ii+ij];
03129 }
03130 std::sort(nl.begin(), nl.end());
03131
03132
03133
03134 offset = i - nl[0];
03135 q = (offset >> 8);
03136 m_ucpLutRemainders[8*i] = (offset%256);
03137 if (q) {
03138 m_ucpLutMasks[2*i] |= 1;
03139 m_uspLutQuotients.push_back(q);
03140 }
03141
03142 for (unsigned int j = 1; j < 8; j++) {
03143 offset = nl[j] - nl[j-1];
03144 q = (offset >> 8);
03145 m_ucpLutRemainders[8*i + j] = (offset%256);
03146 if (q) {
03147 m_ucpLutMasks[2*i] |= (1 << j);
03148 m_uspLutQuotients.push_back(q);
03149 }
03150 }
03151 nl.clear();
03152 }
03153
03154 }
03155
03156
03157 if(!m_bCompressLut) {
03158 m_uiNlist = nlist;
03159 }
03160
03161
03162 nlist.clear();
03163
03164 #ifdef __DEBUG_DA_NLIST__
03165 MPI_Barrier(m_mpiCommActive);
03166 std::cout<<m_iRankActive<<" Just After Compression...."<<std::endl<<std::endl;
03167 std::cout<<m_iRankActive<<" "<<m_uiPreGhostNodeSize<<" "
03168 <<m_uiPreGhostBoundaryNodeSize<<" "<<m_uiPostGhostNodeSize
03169 <<" "<<m_uiNodeSize<<" "<<m_uiBoundaryNodeSize<<std::endl<<std::endl;
03170 MPI_Barrier(m_mpiCommActive);
03171 std::cout << m_iRankActive << ": Leaving " << __func__ << std::endl;
03172 MPI_Barrier(m_mpiCommActive);
03173 #endif
03174
03175 PROF_BUILD_NLIST_END
03176
03177 }
03178 }
03179
03180