Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

odaBuildNlist.C

Go to the documentation of this file.
00001 
00009 #include "oda.h"
00010 #include "parUtils.h"
00011 #include "seqUtils.h"
00012 #include "colors.h"
00013 #include "nodeAndRanks.h"
00014 #include "testUtils.h"
00015 
00016 #ifdef __DEBUG__
00017 #ifndef __DEBUG_DA_NLIST__
00018 #define __DEBUG_DA_NLIST__
00019 #endif
00020 #endif
00021 
00022 #ifdef __DEBUG_DA__
00023 #ifndef __DEBUG_DA_NLIST__
00024 #define __DEBUG_DA_NLIST__
00025 #endif
00026 #endif
00027 
00028 #ifdef __DEBUG_DA_NLIST__
00029 #ifndef __MEASURE_BUILD_NLIST__
00030 #define __MEASURE_BUILD_NLIST__
00031 #endif
00032 #endif
00033 
00034 //Warning: DO NOT MEDDLE WITH THIS PIECE OF CODE!!!
00035 
00036 namespace ot {
00037 
00038 #define CHECK_FAST_MAX_LOWER_BOUND(arr,key,fastIdx,fastResult) { \
00039   unsigned int tmpMlbIdx;\
00040   bool tmpMlbResult;\
00041   tmpMlbResult = seq::maxLowerBound<ot::TreeNode> (arr, key, tmpMlbIdx,NULL,NULL);\
00042   assert(tmpMlbResult == fastResult);\
00043   assert(tmpMlbIdx == fastIdx);\
00044 }
00045 
00046 //Build Node list using 4-way searches...
00047 void DA::buildNodeList(std::vector<ot::TreeNode> &in) {
00048 #ifdef __PROF_WITH_BARRIER__
00049   MPI_Barrier(m_mpiCommActive);
00050 #endif
00051   PROF_BUILD_NLIST_BEGIN
00052     // everybody except for the boundary and positive ghosts should be elements,
00053     // This means that anything that is not a boundary should be an element.
00054     // The only extra elements added are the ghost elements.
00055     // Initially will store all 8 indices. will not be compressed.
00056 
00057     // compute number of elements.
00058     unsigned int nelem = m_uiPreGhostElementSize + m_uiElementSize;
00059 
00060   std::vector<unsigned int> nlist;
00061 
00062   /*
00063      The first iteration is for pre-ghosts only.
00064      The second iteration is for own elements. 
00065      The first iteration does not use 4-way searches. The second iteration does.
00066      */
00067   for(unsigned int numFullLoopCtr = 0; numFullLoopCtr < 2; numFullLoopCtr++) {
00068 
00069     // Some storage ...
00070     std::vector<ot::TreeNode> primaryKeys;
00071     std::vector<ot::TreeNode> secondaryKeys;
00072     std::vector<ot::TreeNode> extraAtEnd;
00073 
00074 #ifdef __DEBUG_DA_NLIST__
00075     MPI_Barrier(m_mpiCommActive);
00076     std::vector<ot:: TreeNode > checkSecondRing;
00077     std::vector<ot::TreeNode> chkMissedPrimary;
00078 #endif
00079 
00080     unsigned int iLoopSt,iLoopEnd;
00081     if( numFullLoopCtr == 0) {
00082       //PreGhosts only...
00083       iLoopSt = 0;
00084       iLoopEnd = m_uiPreGhostElementSize;
00085     }else {
00086       //nelem and m_uiPreGhostElementSize would have been changed in the
00087       //first iteration. It's ok. Use the new values only.
00088       //Own elements only...
00089       iLoopSt = m_uiPreGhostElementSize;
00090       iLoopEnd = nelem;
00091     }
00092 
00093 #ifdef __DEBUG_DA_NLIST__
00094     MPI_Barrier(m_mpiCommActive);
00095     if(!m_iRankActive) {
00096       std::cout<<"numFullLoopCtr: "<<numFullLoopCtr<<std::endl;
00097     }
00098     std::cout<<m_iRankActive<<": preElemSz: "<<m_uiPreGhostElementSize
00099       <<" elemBeg: "<<m_uiElementBegin<<" elemEnd: "<<m_uiElementEnd
00100       <<" postGhostBegin: "<<m_uiPostGhostBegin
00101       <<" locBufferSz: "<<m_uiLocalBufferSize
00102       <<" nelem: "<<nelem
00103       <<" iLoopSt: "<<iLoopSt
00104       <<" iLoopEnd: "<<iLoopEnd<<std::endl;
00105     assert(m_uiElementBegin < m_uiPostGhostBegin);
00106     std::cout<<m_iRankActive<<" my First Octant(elem/Bnd): "<<in[m_uiElementBegin]<<std::endl;
00107     MPI_Barrier(m_mpiCommActive);
00108 #endif
00109 
00110 
00111     //Malloc...
00112     //In the first iteration, we need to only allocate for pre-ghosts.
00113     //The second iteration, it would already be resized to the correct size.
00114     //So there would be no change.
00115     if( numFullLoopCtr == 0) {
00116       nlist.resize(8*iLoopEnd);
00117       m_ucpLutMasks.resize(2*iLoopEnd);
00118     }
00119 
00120     //Loop through all the elements in this set and set LUTs.
00121     for (unsigned int i = iLoopSt; i < iLoopEnd; i++) {
00122 
00123       m_ucpLutMasks[2*i + 1] = 0;
00124 
00125       std::vector<ot::TreeNode> nodeLocations(8);
00126       std::vector<ot::TreeNode> parNodeLocations(8);
00127       // get basic info ...
00128       unsigned int d   = in[i].getLevel();
00129       unsigned int x = in[i].getX();
00130       unsigned int y = in[i].getY();
00131       unsigned int z = in[i].getZ();
00132 
00133       //Cryptic Implementation:
00134       unsigned int parX = ( ( x >> ( m_uiMaxDepth - d + 1 ) ) << ( m_uiMaxDepth - d + 1 ) );
00135       unsigned int parY = ( ( y >> ( m_uiMaxDepth - d + 1 ) ) << ( m_uiMaxDepth - d + 1 ) );
00136       unsigned int parZ = ( ( z >> ( m_uiMaxDepth - d + 1 ) ) << ( m_uiMaxDepth - d + 1 ) );
00137 
00138       unsigned int sz = 1u << (m_uiMaxDepth - d);
00139       unsigned int len_par = (unsigned int)(1u<<( m_uiMaxDepth  - d +1 ) );
00140 
00141       unsigned int a = x % len_par;
00142       unsigned int b = y % len_par;
00143       unsigned int c = z % len_par;
00144 
00145       a /= sz;
00146       b /= sz;
00147       c /= sz;
00148 
00149       unsigned int ch_num = (4*c + 2*b + a);
00150 
00151 #ifdef __DEBUG_DA_NLIST__
00152       if ( !ch_num || (ch_num==7) ) {
00153         if ( !(in[i].getFlag() & ot::TreeNode::NODE) ) {
00154           std::cerr << RED"Nodes are marked wrongly "NRM << std::endl;
00155           assert(false);
00156         }
00157       }
00158 #endif
00159 
00160       bool found[8];
00161       // haven't found anything yet. Set Default values.
00162       for (unsigned int k = 0; k < 8; k++) {
00163         nlist[8*i + k] = m_uiLocalBufferSize;
00164         found[k] = false;
00165       }//end for k
00166 
00167       //~~~~~~~~~~~~~~~~~~~~~~~NEGATIVE SEARCH~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00168       if(numFullLoopCtr == 1) {
00169         unsigned int idnx, idny, idnz;
00170         //Basic Idea of a negative search. 
00171         //1. Search and find elements on the negative faces. 
00172         //2. If the result is a brother and the Vtx in question is hanging, swap and copy.
00173         //3. In all other cases, simply copy.
00174         //4. Note, copy only if the nlist is pointing to a valid location
00175 
00176         bool foundNegX=false, foundNegY=false, foundNegZ=false;
00177 
00178         // first lets do the 3 negative searches and we'll decide how to use it later ...
00179         // search negative X
00180         if (x) {
00181           ot::TreeNode knx( x-1, y, z, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00182           foundNegX = seq::maxLowerBound<ot::TreeNode> (in, knx, idnx,NULL,&i);
00183 #ifdef __DEBUG_DA_NLIST__
00184           CHECK_FAST_MAX_LOWER_BOUND(in,knx,idnx,foundNegX)
00185 #endif
00186 
00187             if ( foundNegX && (!( (in[idnx].isAncestor(knx) ) || (in[idnx] == knx) )) ) {
00188               foundNegX=false;
00189             }
00190           if ( foundNegX && (m_ucpLutMasks[2*idnx+1] == ot::DA_FLAGS::FOREIGN) ) {
00191             foundNegX=false;
00192           }
00193         }
00194 
00195         // search negative Y
00196         if (y) {
00197           ot::TreeNode kny( x, y-1, z, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00198           foundNegY = seq::maxLowerBound<ot::TreeNode> (in, kny, idny,NULL,&i) ;
00199 #ifdef __DEBUG_DA_NLIST__
00200           CHECK_FAST_MAX_LOWER_BOUND(in,kny,idny,foundNegY)
00201 #endif
00202             if ( foundNegY && (!( (in[idny].isAncestor(kny) ) || (in[idny] == kny) )) ) {
00203               foundNegY=false;
00204             }
00205           if ( foundNegY && (m_ucpLutMasks[2*idny+1] == ot::DA_FLAGS::FOREIGN) ) {
00206             foundNegY=false;
00207           }
00208         }
00209 
00210         // search negative Z
00211         if (z) {
00212           ot::TreeNode knz( x, y, z-1, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00213           foundNegZ = seq::maxLowerBound<ot::TreeNode> (in, knz, idnz,NULL,&i) ;
00214 #ifdef __DEBUG_DA_NLIST__
00215           CHECK_FAST_MAX_LOWER_BOUND(in,knz,idnz,foundNegZ)
00216 #endif
00217             if ( foundNegZ && (!( (in[idnz].isAncestor(knz) ) || (in[idnz] == knz) )) ) {
00218               foundNegZ=false;
00219             }
00220           if ( foundNegZ && (m_ucpLutMasks[2*idnz+1] == ot::DA_FLAGS::FOREIGN) ) {
00221             foundNegZ=false;
00222           }
00223         }
00224 
00225         // How we use the results of the negative search depends on the child number ...
00226         // if here my is the current element and x, y, and z are the elements in the 
00227         // respective negative directions, then the default mapping sans corrections is
00228         // 
00229         // my[0] = x[1] = y[2] = z[4]
00230         // my[1] = y[3] = z[5]
00231         // my[2] = x[3] = z[6]
00232         // my[3] = z[7];
00233         // my[4] = x[5] = y[6]
00234         // my[5] = y[7];
00235         // my[6] = x[7];
00236         // 
00237         // The corrections will appear for all children except for child zero ... 
00238         // and will be explained when they are performed.
00239 
00240 #define NEG_SEARCH_BLOCK1(idx,n1l,n1r,n2l,n2r,n3l,n3r,n4l,n4r) {\
00241   if(nlist[8*idx+n1r] < m_uiLocalBufferSize) {\
00242     nlist[8*i+n1l] = nlist[8*idx+n1r];\
00243     found[n1l] = true;\
00244   }\
00245   if(nlist[8*idx+n2r] < m_uiLocalBufferSize) {\
00246     nlist[8*i+n2l] = nlist[8*idx+n2r];\
00247     found[n2l] = true;\
00248   }\
00249   if(nlist[8*idx+n3r] < m_uiLocalBufferSize) {\
00250     nlist[8*i+n3l] = nlist[8*idx+n3r];\
00251     found[n3l] = true;\
00252   }\
00253   if(nlist[8*idx+n4r] < m_uiLocalBufferSize) {\
00254     nlist[8*i+n4l] = nlist[8*idx+n4r];\
00255     found[n4l] = true;\
00256   }\
00257 }
00258 
00259 #define NEG_SEARCH_BLOCK2(idx,n1l,n1r,n2l,n2r,n3l,n3r,n4l,n4r) {\
00260   unsigned char negMask;\
00261   if ( in[i].getLevel() != in[idx].getLevel()) {\
00262     negMask = 0;\
00263   } else {\
00264     negMask = m_ucpLutMasks[2*idx+1];\
00265   }\
00266   if (negMask != ot::DA_FLAGS::FOREIGN) {\
00267     if ( negMask & (1 << n1r)) {\
00268       if(nlist[8*idx+n1l] < m_uiLocalBufferSize) {\
00269         nlist[8*i+n1l] = nlist[8*idx+n1l];\
00270         found[n1l] = true;\
00271       }\
00272     } else {\
00273       if(nlist[8*idx+n1r] < m_uiLocalBufferSize) {\
00274         nlist[8*i+n1l] = nlist[8*idx+n1r];\
00275         found[n1l] = true;\
00276       }\
00277     }\
00278     if ( negMask & (1 << n2r)) {\
00279       if(nlist[8*idx+n2l] < m_uiLocalBufferSize) {\
00280         nlist[8*i+n2l] = nlist[8*idx+n2l];\
00281         found[n2l] = true;\
00282       }\
00283     } else {\
00284       if(nlist[8*idx+n2r] < m_uiLocalBufferSize) {\
00285         nlist[8*i+n2l] = nlist[8*idx+n2r];\
00286         found[n2l] = true;\
00287       }\
00288     }\
00289     if ( negMask & (1 << n3r)) {\
00290       if(nlist[8*idx+n3l] < m_uiLocalBufferSize) {\
00291         nlist[8*i+n3l] = nlist[8*idx+n3l];\
00292         found[n3l] = true;\
00293       }\
00294     } else {\
00295       if(nlist[8*idx+n3r] < m_uiLocalBufferSize) {\
00296         nlist[8*i+n3l] = nlist[8*idx+n3r];\
00297         found[n3l] = true;\
00298       }\
00299     }\
00300     if ( negMask & (1 << n4r)) {\
00301       if(nlist[8*idx+n4l] < m_uiLocalBufferSize) {\
00302         nlist[8*i+n4l] = nlist[8*idx+n4l];\
00303         found[n4l] = true;\
00304       }\
00305     } else {\
00306       if(nlist[8*idx+n4r] < m_uiLocalBufferSize) {\
00307         nlist[8*i+n4l] = nlist[8*idx+n4r];\
00308         found[n4l] = true;\
00309       }\
00310     }\
00311   }\
00312 }
00313 
00314 #define NEG_SEARCH_BLOCK1X NEG_SEARCH_BLOCK1(idnx,0,1,2,3,4,5,6,7)
00315 #define NEG_SEARCH_BLOCK2X NEG_SEARCH_BLOCK2(idnx,0,1,2,3,4,5,6,7)
00316 
00317 #define NEG_SEARCH_BLOCK1Y NEG_SEARCH_BLOCK1(idny,0,2,1,3,4,6,5,7)
00318 #define NEG_SEARCH_BLOCK2Y NEG_SEARCH_BLOCK2(idny,0,2,1,3,4,6,5,7)
00319 
00320 #define NEG_SEARCH_BLOCK1Z NEG_SEARCH_BLOCK1(idnz,0,4,1,5,2,6,3,7)
00321 #define NEG_SEARCH_BLOCK2Z NEG_SEARCH_BLOCK2(idnz,0,4,1,5,2,6,3,7)
00322 
00323 switch (ch_num) {
00324   case 0: {
00325             //No Negative brothers.
00326             if (foundNegX) {
00327               NEG_SEARCH_BLOCK1X
00328             }
00329             if (foundNegY) {
00330               NEG_SEARCH_BLOCK1Y
00331             }
00332             if (foundNegZ) {
00333               NEG_SEARCH_BLOCK1Z
00334             }
00335             break;
00336           }
00337   case 1: {
00338             //negX could be a brother. 
00339             if (foundNegX) {
00340               NEG_SEARCH_BLOCK2X
00341             }
00342             if (foundNegY) {
00343               NEG_SEARCH_BLOCK1Y
00344             }
00345             if (foundNegZ) {
00346               NEG_SEARCH_BLOCK1Z
00347             }
00348             break;
00349           }
00350   case 2: {
00351             //negY could be a brother.
00352             if (foundNegX) {
00353               NEG_SEARCH_BLOCK1X
00354             }
00355             if (foundNegY) {
00356               NEG_SEARCH_BLOCK2Y
00357             }
00358             if (foundNegZ) {
00359               NEG_SEARCH_BLOCK1Z
00360             }
00361             break;
00362           }
00363   case 3: {
00364             //negX and negY could be brothers.
00365             if (foundNegX) {
00366               NEG_SEARCH_BLOCK2X
00367             }
00368             if (foundNegY) {
00369               NEG_SEARCH_BLOCK2Y
00370             }
00371             if (foundNegZ) {
00372               NEG_SEARCH_BLOCK1Z
00373             }
00374             break;
00375           }
00376   case 4: {
00377             //negZ could be a brother.
00378             if (foundNegX) {
00379               NEG_SEARCH_BLOCK1X
00380             }
00381             if (foundNegY) { 
00382               NEG_SEARCH_BLOCK1Y
00383             }
00384             if (foundNegZ) {
00385               NEG_SEARCH_BLOCK2Z
00386             }
00387             break;
00388           }
00389   case 5: {
00390             //negX and negZ could be brothers.
00391             if (foundNegX) {
00392               NEG_SEARCH_BLOCK2X
00393             }
00394             if (foundNegY) {  
00395               NEG_SEARCH_BLOCK1Y
00396             }
00397             if (foundNegZ) {
00398               NEG_SEARCH_BLOCK2Z
00399             }
00400             break;
00401           }
00402   case 6: {
00403             //negY and negZ could be brothers.
00404             if (foundNegX) {
00405               NEG_SEARCH_BLOCK1X
00406             }
00407             if (foundNegY) {
00408               NEG_SEARCH_BLOCK2Y
00409             }
00410             if (foundNegZ) {
00411               NEG_SEARCH_BLOCK2Z
00412             }
00413             break;
00414           }
00415   case 7: {
00416             //negX negY and negZ could be brothers.
00417             if (foundNegX) {
00418               NEG_SEARCH_BLOCK2X
00419             }
00420             if (foundNegY) {
00421               NEG_SEARCH_BLOCK2Y
00422             }
00423             if (foundNegZ) {
00424               NEG_SEARCH_BLOCK2Z
00425             }
00426             break;
00427           }
00428   default: {
00429              std::cerr << "Wrong Child Number " << ch_num << std::endl;
00430              assert(false);
00431              break;
00432            }
00433 }//end cases
00434 }//end if for negative
00435 
00436 #undef NEG_SEARCH_BLOCK1X
00437 #undef NEG_SEARCH_BLOCK2X
00438 #undef NEG_SEARCH_BLOCK1Y
00439 #undef NEG_SEARCH_BLOCK2Y 
00440 #undef NEG_SEARCH_BLOCK1Z 
00441 #undef NEG_SEARCH_BLOCK2Z
00442 #undef NEG_SEARCH_BLOCK1
00443 #undef NEG_SEARCH_BLOCK2
00444 
00445 //~~~~~~~~~~~~~~~~~~~~~POSITIVE SEARCH~~~~~~~~~~~~~~~~~~~~~~
00446 #ifdef __DEBUG_DA_NLIST__
00447 #define DEBUG_CHECK_FAST_MAX_LOWER_BOUND(arr,key,fastIdx,fastResult) CHECK_FAST_MAX_LOWER_BOUND(arr,key,fastIdx,fastResult)
00448 
00449 #define POS_SEARCH_DEBUG_BLOCK1(debugV1,debugV2) {\
00450   if ( (ch_num == debugV1) || (ch_num == debugV2) ) {\
00451     std::cout << "Failing for index " << i << " with child num " << ch_num << std::endl;\
00452     assert(false);\
00453   }\
00454 }
00455 
00456 #define  POS_SEARCH_DEBUG_BLOCK2 {\
00457   assert(sKey > in[m_uiPostGhostBegin - 1]);\
00458   chkMissedPrimary.push_back(sKey);\
00459 }
00460 
00461 #define POS_SEARCH_DEBUG_BLOCK3 {\
00462   assert( (idx+k) < m_uiLocalBufferSize );\
00463   assert( in[idx+k].getParent() == newKey );\
00464   assert( !(in[idx+k].getFlag() & ot::TreeNode::NODE) );\
00465   assert( ((idx+k) < m_uiElementBegin) || (((idx+k) >= m_uiPostGhostBegin)) );\
00466 }
00467 
00468 #else
00469 #define DEBUG_CHECK_FAST_MAX_LOWER_BOUND(arr,key,fastIdx,fastResult) 
00470 #define POS_SEARCH_DEBUG_BLOCK1(debugV1,debugV2)
00471 #define POS_SEARCH_DEBUG_BLOCK2
00472 #define POS_SEARCH_DEBUG_BLOCK3
00473 #endif
00474 
00475 #define POS_SECONDARY_SEARCH_BLOCK(debugV1,debugV2) {\
00476   /* if this is not a node, i.e., it is hanging*/\
00477   POS_SEARCH_DEBUG_BLOCK1(debugV1,debugV2)\
00478   /* All other cNums are anchored at the parent+(2*sz,0,0).*/\
00479   sKey = parNodeLocations[j];\
00480   lastLevel = d-1;\
00481   foundKey = seq::maxLowerBound<ot::TreeNode>(in, sKey, idx,&i,NULL);\
00482   DEBUG_CHECK_FAST_MAX_LOWER_BOUND(in, sKey,idx, foundKey) \
00483   if ( foundKey && !((in[idx].getAnchor()==sKey.getAnchor()) && (in[idx].getFlag()&ot::TreeNode::NODE))) {\
00484     foundKey=false;\
00485   }\
00486   if ( foundKey ) {\
00487     nlist[8*i+j] = idx;\
00488   } else {\
00489     /* Can happen for own elements too. */\
00490     findGhost = true;\
00491   }\
00492 }
00493 
00494 #define POS_SEARCH_BLOCK(debugV1,debugV2) {\
00495   /* first search in default location */\
00496   sKey = nodeLocations[j];\
00497   lastLevel = d;\
00498   foundKey = seq::maxLowerBound<ot::TreeNode>(in, sKey, idx,&i,NULL);\
00499   DEBUG_CHECK_FAST_MAX_LOWER_BOUND(in, sKey,idx, foundKey) \
00500   if(foundKey && !( (in[idx].isAncestor(sKey) ) || (in[idx]==sKey))) {\
00501     foundKey=false;\
00502   }\
00503   if(foundKey) {\
00504     /*found somebody*/\
00505     if((in[idx].getAnchor()==sKey.getAnchor())&&(in[idx].getFlag() & ot::TreeNode::NODE)) {\
00506       /*found a node, so set it.*/\
00507       nlist[8*i+j] = idx;\
00508     } else {\
00509       POS_SECONDARY_SEARCH_BLOCK(debugV1,debugV2)\
00510     }\
00511   } else {\
00512     if(i >= m_uiElementBegin) {\
00513       /*The primary search for some vertex of my own element failed*/\
00514       /*This should only happen if the node we are looking for is*/\
00515       /*a post-ghost and it is hanging. When we do the check later*/\
00516       /*we must also test that this node is a real anchor. */\
00517       POS_SEARCH_DEBUG_BLOCK2\
00518       /*Treat this case just as if the search successfully returned*/\
00519       /*a node, but it turned out to be hanging*/\
00520       POS_SECONDARY_SEARCH_BLOCK(debugV1,debugV2)\
00521     } else {\
00522       /* this is a pre-ghost so it is normal*/\
00523       /*to miss some primary searches */\
00524       findGhost=true;\
00525     }\
00526   }\
00527   if ( findGhost ) {\
00528     findGhost=false;\
00529     /* need to find the ghost. */\
00530     /* check if idx+k is valid */\
00531     ot::TreeNode newKey(sKey.getX(),sKey.getY(),sKey.getZ(),\
00532         lastLevel,m_uiDimension,m_uiMaxDepth);\
00533     unsigned int k=1;\
00534     while ( (idx+k) < in.size() ) {\
00535       if ( in[idx+k].getParent() == newKey ) {\
00536         if ( in[idx+k].getFlag() & ot::TreeNode::NODE ) {\
00537           k++;\
00538           continue;\
00539         } else {\
00540           /* found the correct node. (hanging)*/\
00541           findGhost=true;\
00542           break;\
00543         }\
00544       }\
00545       if ( in[idx+k] > newKey.getDLD() ) {\
00546         findGhost = false;\
00547         break;\
00548       } else {\
00549         k++;\
00550       }\
00551     }\
00552     if (findGhost) {\
00553       nlist[8*i+j] = idx+k;\
00554       POS_SEARCH_DEBUG_BLOCK3\
00555     } else {\
00556       nlist[8*i+j] = m_uiLocalBufferSize;\
00557     }\
00558   }\
00559 }
00560 
00561 // find the eight vertices ...
00562 //Only 0 is a special case.
00563 for (unsigned int j = 0; j < 8; j++) {
00564   if(found[j]) {
00565     continue;
00566   }
00567   bool foundKey=false;
00568   bool findGhost=false;
00569   unsigned int idx;
00570   unsigned int lastLevel;
00571   ot::TreeNode sKey(m_uiDimension, m_uiMaxDepth);
00572 
00573   switch (j) {
00574     case 0: {
00575               nodeLocations[j] =
00576                 ot::TreeNode(x, y, z, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00577               parNodeLocations[j] = 
00578                 ot::TreeNode(parX, parY, parZ, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00579               // first search is not required since we are searching for i
00580               if ( !(in[i].getFlag() & ot::TreeNode::NODE ) ) {
00581                 // if this is not a node, i.e., it is hanging
00582 #ifdef __DEBUG_DA_NLIST__
00583                 if ( (ch_num == 0) || (ch_num == 7) ) {
00584                   std::cout << "Failing for index " << i << " with child num " << ch_num << std::endl;
00585                   assert(false);
00586                 }
00587 #endif
00588                 // All other child numbers are anchored at the parent.
00589                 sKey = parNodeLocations[j];
00590                 lastLevel = d-1;
00591                 foundKey = seq::maxLowerBound<ot::TreeNode> (in, sKey, idx,NULL,&i);
00592 #ifdef __DEBUG_DA_NLIST__
00593                 CHECK_FAST_MAX_LOWER_BOUND(in, sKey,idx, foundKey) 
00594 #endif
00595                   if ( foundKey && 
00596                       (!( (in[idx].getAnchor() == sKey.getAnchor()) && (in[idx].getFlag() & ot::TreeNode::NODE) )) ) {
00597                     foundKey=false;
00598                   }
00599                 if ( !foundKey ) {
00600 #ifdef __DEBUG_DA_NLIST__
00601                   // should only happen for ghosts ...
00602                   if(i >= m_uiElementBegin) {
00603                     std::cout<<m_iRankActive<<" i = "<<i<<" preGhostElemEnd "<<m_uiPreGhostElementSize
00604                       <<" elemBeg: "<<m_uiElementBegin<<" elemEnd: "<<m_uiElementEnd<<std::endl;
00605                   }
00606                   assert (i < m_uiElementBegin);
00607 #endif
00608                   // for node zero, simply default to i.
00609                   nlist[8*i+j] = i;
00610                 } else {
00611                   nlist[8*i+j] = idx;
00612                 }
00613               } else {
00614                 nlist[8*i+j] = i;
00615               }
00616               break;
00617             }
00618     case 1: {
00619               nodeLocations[j] = 
00620                 ot::TreeNode(x+sz, y, z, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00621               parNodeLocations[j] = 
00622                 ot::TreeNode(parX+(sz<<1u), parY, parZ, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00623               POS_SEARCH_BLOCK(1,6) 
00624                 break;
00625             }
00626     case 2: {
00627               nodeLocations[j] = 
00628                 ot::TreeNode(x, y+sz, z, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00629               parNodeLocations[j] = 
00630                 ot::TreeNode(parX, parY+(sz<<1u), parZ, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00631               POS_SEARCH_BLOCK(2,5) 
00632                 break;
00633             }
00634     case 3: {
00635               nodeLocations[j] = 
00636                 ot::TreeNode(x+sz, y+sz, z, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00637               parNodeLocations[j] = 
00638                 ot::TreeNode(parX+(sz<<1u), parY+(sz<<1u), parZ, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00639               POS_SEARCH_BLOCK(3,4) 
00640                 break;
00641             }
00642     case 4: {
00643               nodeLocations[j] = 
00644                 ot::TreeNode(x, y, z+sz, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00645               parNodeLocations[j] = 
00646                 ot::TreeNode(parX, parY, parZ+(sz<<1u), m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00647               POS_SEARCH_BLOCK(4,3) 
00648                 break;
00649             }
00650     case 5: {
00651               nodeLocations[j] = 
00652                 ot::TreeNode(x+sz, y, z+sz, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00653               parNodeLocations[j] = 
00654                 ot::TreeNode(parX+(sz<<1u), parY, parZ+(sz<<1u), m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00655               POS_SEARCH_BLOCK(5,2) 
00656                 break;
00657             }
00658     case 6: {
00659               nodeLocations[j] = 
00660                 ot::TreeNode(x, y+sz, z+sz, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00661               parNodeLocations[j] = 
00662                 ot::TreeNode(parX, parY+(sz<<1u), parZ+(sz<<1u), m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00663               POS_SEARCH_BLOCK(6,1) 
00664                 break;
00665             }
00666     case 7: {
00667               nodeLocations[j] = 
00668                 ot::TreeNode(x+sz, y+sz, z+sz, m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00669               parNodeLocations[j] = 
00670                 ot::TreeNode(parX+(sz<<1u), parY+(sz<<1u), parZ+(sz<<1u), m_uiMaxDepth, m_uiDimension, m_uiMaxDepth);
00671               POS_SEARCH_BLOCK(7,0) 
00672                 break;
00673             }
00674     default: {
00675                std::cerr << RED<<"Wrong node number in " << __func__ <<NRM<< std::endl;
00676                assert(false);
00677                break;
00678              }
00679   } // end switch
00680 } // end loop over the 8 vertices of this element...
00681 
00682 #undef POS_SEARCH_BLOCK
00683 #undef POS_SECONDARY_SEARCH_BLOCK
00684 #undef POS_SEARCH_DEBUG_BLOCK1
00685 #undef POS_SEARCH_DEBUG_BLOCK2
00686 #undef POS_SEARCH_DEBUG_BLOCK3
00687 
00688 // FINISHED Searching for Node Indices ...
00689 
00690 //Ensure that the anchor of the local element is not pointing to ghost.
00691 //This can happen only if the octant in question is a singular block
00692 //and its anchor is hanging and the 0th child of its parent is sitting on a different processor.
00693 //BlockPart should have detected this case and prevented this.
00694 #ifdef __DEBUG_DA_NLIST__
00695 if ( (i >= m_uiElementBegin) && (i < m_uiElementEnd) &&
00696     ( (nlist[8*i] < m_uiElementBegin) || (nlist[8*i] >= m_uiPostGhostBegin) ) ) {
00697   std::cout << "At index " << i << " anchor is at " << nlist[8*i] <<
00698     " where elemBegin is at " << m_uiElementBegin << " and postGhBegin is "
00699     << m_uiPostGhostBegin << std::endl;
00700   std::cout << "RANK is " << m_iRankActive << std::endl;
00701   std::cout << "NList is ";
00702   for (unsigned int j=0; j<8; j++) {
00703     std::cout << nlist[8*i+j] << ", ";
00704   }
00705   std::cout << std::endl;
00706   std::cout<<m_iRankActive<<" failingOct: "<<in[i]<<std::endl<<" it's parent: "<<in[i].getParent()<<std::endl;
00707   if( nlist[8*i] < in.size() ) {
00708     std::cout<<m_iRankActive<<" failingOct's anchor is actually mapped to: "<<in[nlist[8*i]]<<std::endl;  
00709   }
00710   assert(false);
00711 }
00712 #endif
00713 
00714 #ifdef __DEBUG_DA_NLIST__
00715 //Check if you sent yourself apriori (Second Ring). 
00716 if( (i >= m_uiElementBegin) && (i < m_uiPostGhostBegin) ) {
00717   for(unsigned int j=0; j< 8; j++) {
00718     if(nlist[8*i + j] < m_uiElementBegin) {
00719       std::cout<<m_iRankActive<<" Trying to send yourself as a Post Ghost ELEMENT for  i = "
00720         <<i<<" j = "<<j<<std::endl;
00721       assert(false);
00722     }
00723     if( (nlist[8*i+j] >= m_uiPostGhostBegin) && (nlist[8*i+j] < m_uiLocalBufferSize) ) {
00724       TreeNode tmpToSend = in[nlist[8*i+j]];
00725       tmpToSend.setWeight(i);
00726       checkSecondRing.push_back(tmpToSend);
00727     }
00728   }//end for j
00729 }
00730 #endif
00731 
00732 // compute the hanging node mask for this element ...
00733 unsigned char _mask=0;
00734 unsigned int numOutOfBounds = 0;
00735 for ( unsigned int j=0; j<8; j++) {
00736   if ( ( nlist[8*i+j] < m_uiElementBegin ) || (nlist[8*i+j] >= m_uiPostGhostBegin) ) {
00737     numOutOfBounds++;
00738   }
00739 
00740   if(nlist[8*i+j] >= m_uiLocalBufferSize) {
00741     //Skip setting mask for this vtx.
00742     continue;
00743   }
00744 
00745   // check if any of the nodes is hanging ...
00746   unsigned int _x,_y,_z, _d;
00747   _x = in[nlist[8*i+j]].getX(); 
00748   _y = in[nlist[8*i+j]].getY(); 
00749   _z = in[nlist[8*i+j]].getZ(); 
00750   _d = in[nlist[8*i+j]].getLevel();
00751   if ( !(in[nlist[8*i+j]].getFlag() & ot::TreeNode::NODE ) ) {
00752     // std::cout << "For i=" << i << " looking at parent" << std::endl;
00753     _x  = ( ( _x >> ( m_uiMaxDepth - _d + 1 ) ) << ( m_uiMaxDepth - _d + 1 ) ); 
00754     _y  = ( ( _y >> ( m_uiMaxDepth - _d + 1 ) ) << ( m_uiMaxDepth - _d + 1 ) );
00755     _z  = ( ( _z >> ( m_uiMaxDepth - _d + 1 ) ) << ( m_uiMaxDepth - _d + 1 ) );
00756   }
00757 
00758   switch (j) {
00759     case 0:
00760       if ( !(in[i].getFlag() & ot::TreeNode::NODE ) ) {
00761         _mask |= (1 << j);
00762       }
00763       break;
00764     case 1:
00765       // look at the anchor of the +x neighbor
00766       // if ( (x+sz) != _x ) {
00767       if ( ( (x+sz) != _x ) || ( y != _y ) || ( z != _z ) ) {
00768         _mask |= (1 << j);
00769       }
00770       break;
00771     case 2:
00772       // look at the anchor of the +y neighbor
00773       // if ( (y+sz) != _y ) {
00774       if ( ( x != _x ) || ( (y+sz) != _y ) || ( z != _z ) ) {
00775         _mask |= (1 << j);
00776       }
00777       break;
00778     case 3:
00779       // look at both x and y anchors ...
00780       // if ( ( (x+sz) != _x ) || ( (y+sz) != _y )  ) {
00781       if ( ( (x+sz) != _x ) || ( (y+sz) != _y ) || ( z != _z ) ) {
00782         _mask |= (1 << j);
00783       }
00784       break;
00785     case 4:
00786       // look at z anchor
00787       // if ( (z+sz) != _z ) {
00788       if ( ( x != _x ) || ( y != _y ) || ( (z+sz) != _z ) ) {
00789         _mask |= (1 << j);
00790       }
00791       break;
00792     case 5:
00793       // look at +z,+x
00794       // if ( ( (x+sz) != _x ) || ( (z+sz) != _z )  ) {
00795       if ( ( (x+sz) != _x ) || ( y != _y ) || ( (z+sz) != _z ) ) {
00796         _mask |= (1 << j);
00797       }
00798       break;
00799     case 6:
00800       // look at +z, +y
00801       // if ( ( (z+sz) != _z ) || ( (y+sz) != _y )  ) {
00802       if ( ( x != _x ) || ( (y+sz) != _y ) || ( (z+sz) != _z ) ) {
00803         _mask |= (1 << j);
00804       }
00805       break;
00806     case 7:
00807       if ( ( (x+sz) != _x ) || ( (y+sz) != _y ) || ( (z+sz) != _z ) ) {
00808         _mask |= (1 << j);
00809       }
00810       break;
00811   }//end switch-case
00812   }//end for j
00813 
00814   // store the mask ...
00815   if (numOutOfBounds == 8) {
00816     //This does not even have one writable node and hence this is not an element.
00817     _mask = ot::DA_FLAGS::FOREIGN;
00818   } else if (numOutOfBounds) {
00819     //A Dependent Element, is one which has atleast one writable node and atleast one ghosted node.
00820     in[i].orFlag(ot::DA_FLAGS::DEP_ELEM);        
00821   }
00822 
00823   //Prepare for the Ugly portion...
00824   //Sometimes, we might find a primary key but it could turn out to be hanging
00825   //and then we may not find the secondary key. In such cases, we still
00826   //generate both the priimary and secondary keys and search for both in the
00827   //following second ring correction phase. This might seem like an overkill
00828   //but this situation occurs only for pre-ghosts and singular blocks. Suppose,
00829   //the octant A searches for a secondary key B and does not find it and
00830   //suppose A is not a pre-ghost (i.e. this processor owns A). Since B is one
00831   //of the vertices of A's parent this means the sibling of A that shares the
00832   //vertex B with A's parent is not on the same processor as A (since all
00833   //direct vertices which are not hanging are communicated apriori and B can't
00834   //be hanging). Hence, A is singular
00835   for (int j=0; j<8; j++) {
00836     if (nlist[8*i+j] >= m_uiLocalBufferSize) {
00837       if (_mask != ot::DA_FLAGS::FOREIGN) {
00838 #ifdef __DEBUG_DA_NLIST__
00839         assert(j);
00840 #endif
00841         // add to list ...
00842         nodeLocations[j].setWeight(m_iRankActive);
00843         parNodeLocations[j].setWeight(m_iRankActive);
00844         primaryKeys.push_back(nodeLocations[j]);
00845         secondaryKeys.push_back(parNodeLocations[j]);
00846         // correct lookUp Table ...
00847         nlist[8*i+j] = m_uiLocalBufferSize +
00848           static_cast<unsigned int>(extraAtEnd.size());
00849         nodeLocations[j].setWeight(i);
00850         extraAtEnd.push_back(nodeLocations[j]);
00851       }//end if foreign
00852     }//end if invalid
00853   }//end for j
00854 
00855   // Store the hanging node mask
00856   m_ucpLutMasks[2*i+1] = _mask;
00857   } // end for i: All elements in this set 
00858 
00859 #ifdef __DEBUG_DA_NLIST__
00860   MPI_Barrier(m_mpiCommActive);
00861   if(!m_iRankActive) {
00862     std::cout<<std::endl;
00863     std::cout<<"Finished Elemental Loop for Set# "<<numFullLoopCtr<<std::endl;
00864     std::cout<<std::endl;
00865   }
00866   MPI_Barrier(m_mpiCommActive);
00867 #endif
00868 
00869 #ifdef __DEBUG_DA_NLIST__
00870   MPI_Barrier(m_mpiCommActive);
00871   seq::makeVectorUnique<ot::TreeNode>(chkMissedPrimary, false);
00872   int* chkMissedPrimarySendCounts = new int[m_iNpesActive];
00873   for(int i = 0; i < m_iNpesActive; i++) {
00874     chkMissedPrimarySendCounts[i] = 0; 
00875   }//end for i
00876 
00877   for(unsigned int i = 0; i < chkMissedPrimary.size(); i++) {
00878     //maxLB returns the last index in a sorted array such that a[ind] <= key and  a[index +1] > key
00879     unsigned int idx;
00880     bool found = seq::maxLowerBound<TreeNode >(m_tnMinAllBlocks, chkMissedPrimary[i], idx, NULL, NULL);
00881     assert(found);
00882     //missed keys must be post-ghosts
00883     assert(idx > m_iRankActive);
00884     chkMissedPrimarySendCounts[idx]++; 
00885   }//end for i
00886 
00887   int* chkMissedPrimaryRecvCounts = new int[m_iNpesActive];
00888   par::Mpi_Alltoall<int>(chkMissedPrimarySendCounts, chkMissedPrimaryRecvCounts, 1, m_mpiCommActive);
00889 
00890   int* chkMissedPrimarySendOffsets = new int[m_iNpesActive];
00891   int* chkMissedPrimaryRecvOffsets = new int[m_iNpesActive];
00892   chkMissedPrimarySendOffsets[0] = 0;
00893   chkMissedPrimaryRecvOffsets[0] = 0;
00894   for(int i = 1; i < m_iNpesActive; i++) {
00895     chkMissedPrimarySendOffsets[i] = chkMissedPrimarySendOffsets[i-1] + chkMissedPrimarySendCounts[i-1] ;
00896     chkMissedPrimaryRecvOffsets[i] = chkMissedPrimaryRecvOffsets[i-1] + chkMissedPrimaryRecvCounts[i-1] ;
00897   }//end for i
00898   unsigned int chkMissedPrimaryRecvSz = chkMissedPrimaryRecvOffsets[m_iNpesActive - 1] +
00899     chkMissedPrimaryRecvCounts[m_iNpesActive - 1];
00900   std::vector<ot::TreeNode> chkMissedPrimaryRecvBuffer(chkMissedPrimaryRecvSz);
00901   ot::TreeNode* chkMissedPrimarySendPtr = NULL;
00902   ot::TreeNode* chkMissedPrimaryRecvPtr = NULL;
00903   if(!(chkMissedPrimary.empty())) {
00904     chkMissedPrimarySendPtr = (&(*(chkMissedPrimary.begin())));
00905   }
00906   if(!(chkMissedPrimaryRecvBuffer.empty())) {
00907     chkMissedPrimaryRecvPtr = (&(*(chkMissedPrimaryRecvBuffer.begin())));
00908   }
00909   par::Mpi_Alltoallv_sparse<ot::TreeNode>( chkMissedPrimarySendPtr, chkMissedPrimarySendCounts,
00910       chkMissedPrimarySendOffsets, chkMissedPrimaryRecvPtr,
00911       chkMissedPrimaryRecvCounts, chkMissedPrimaryRecvOffsets, m_mpiCommActive);
00912 
00913   for(unsigned int i = 0; i < chkMissedPrimaryRecvBuffer.size(); i++) {
00914     unsigned int idx;
00915     bool found = seq::maxLowerBound<TreeNode >(in, chkMissedPrimaryRecvBuffer[i], idx, NULL, NULL);
00916     assert(found);
00917     assert( (idx >= m_uiElementBegin) && (idx < m_uiPostGhostBegin) );
00918     //Although this is a hanging node this is also some anchor
00919     assert(in[idx].getAnchor() == chkMissedPrimaryRecvBuffer[i].getAnchor());
00920     assert( !(in[idx].getFlag() & ot::TreeNode::NODE) );
00921   }//end for i
00922 
00923   delete [] chkMissedPrimarySendCounts;
00924   delete [] chkMissedPrimaryRecvCounts;
00925   delete [] chkMissedPrimarySendOffsets;
00926   delete [] chkMissedPrimaryRecvOffsets;
00927   chkMissedPrimaryRecvBuffer.clear();
00928   MPI_Barrier(m_mpiCommActive);
00929   if(!m_iRankActive) {
00930     std::cout<<std::endl;
00931     std::cout<<"Passed Test for Missed Primary "<<numFullLoopCtr<<std::endl;
00932     std::cout<<std::endl;
00933   }
00934   MPI_Barrier(m_mpiCommActive);
00935 #endif
00936 
00937   /*
00938      NOW, The Ugly Parallel Book-keeping and Corrections for the Missing Entries in the Previous Step...
00939      */
00940 
00941   // ~~~~~~~~~~~~~~~~~~ SECONDARY ~~~~~~~~~~~~~~~~~~~~~~~
00942   std::vector<unsigned int>               ScndScatterMap;
00943 
00944   std::vector<unsigned int>               ScndSendProcs;
00945   std::vector<unsigned int>               ScndSendCounts;
00946 
00947   std::vector<unsigned int>               ScndRecvProcs;
00948   std::vector<unsigned int>               ScndRecvCounts;
00949 
00950   //~~~~~~~~~~~~~~~~~~~~~~~~ Search for Failed Keys ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
00951   /*
00952      1. Each octant which has a missing entry in its node-list would have generated a primary and secondary key for that entry.
00953      This is the last for-j loop within the main i-loop.
00954      2. Multiple octants might want the same entry. Hence, we make these keys unique before sending these requests
00955      to the respective processors.
00956      3. However, we need to map the results back to the octants which generated these keys.
00957      So we keep a back-up of the original list of keys. This list will have duplicates.
00958      4. Send the unique set of Primary and Secondary keys computed above to the processors which control those domains.
00959      5. Each processor recieves the keys that lie within its domain.
00960      Now, multiple processors might have requested for the same key. So, we make the list of keys recieved unique.
00961      We need to map them back to the processors that requested them. Hence, we store the pair of keys and 
00962      a list of processors that requested that key. This is done using the NodeAndRanks class. The sort order
00963      for this class is defined only on TreeNode.
00964      6. Each processor performs a local search with this set of unique keys. Note only non-hanging nodes are considered as matches.
00965      7. The results are returned to the processors that requested the respective keys.
00966      8. The results are matched with the octants that generated the keys. 
00967      9. If a primary key returned a positive result, then it is used else the secondary key is used. 
00968      10. There is a special for the secondary key for the nlist of a pre-ghost element. It is possible that while the primary key was NOT recieved during the a-priori communication, but the secondary key was recieved. In such situations if the secondary key is to be selected, then we must select the copy that was recieved during the a-priori comm and not the one got from this second-ring correction. This must be done in order to prevent having duplicate elements in our local buffer.
00969      11. Each of the octants that were selected is given an unique id. This will be used to fix the nlist.
00970      12. The hanging masks need to be corrected as well.
00971      13. Since, the primary key could be owned by one processor and the secondary key by another. Only the processor which owns the octant that generated
00972      the keys can decide what key is actually picked. The decision is then communicated to the processors that own the keys.
00973      14. A processor might be sending some primary results and some secondary results to the same processor. So both the sets must be merged and
00974      a single scattermap is built. This is the secondary scattermap. This is from the primary scattermap, which is built in the constructor 
00975      before entering this function (BuildNodeList).
00976      15. Finally, the same processor might have sent some of its elements in the first ring (apriori comm inside the constructor) and it
00977      might send some of its elements in the second ring (below). When the actual data is sent, we want the two sets to come together.
00978      We want both the first set of octants and the second set of octants to be merged and to be sorted.
00979      16. The first set of octants and the second set of octants have to be merged and sorted. All the second set of octants are marked as FOREIGNs. So, we don't loop over them in the MatVec and we don't build their LUTs. They are simply place holders for ghost values.
00980      17. The old LUT has to be re-mapped to the new LUT, since the indices will change after step 15.
00981      18. Similarly, the scattermaps must be merged and corrected to point to the new indices.
00982      19. We can have missing nodes both for pre-ghosts as well as own elements. Hence, this second ring correction is done inside the main outer loop (numFullLoopCtr).
00983      */
00984 
00985   //~~~~~~~~~~~Correction for Second Ring Begins~~~~~~~~~~~~~~~~~~//
00986   //First Get the min and max from each processor.
00987 
00988   ot::TreeNode rootNode(m_uiDimension,m_uiMaxDepth);
00989 
00990   assert(!in.empty());
00991   assert(m_uiElementBegin < in.size());
00992   assert(m_uiPostGhostBegin >= 1);
00993   assert((m_uiPostGhostBegin-1) < in.size());
00994 
00995   std::vector<ot::TreeNode> failedPrimaryKeys = primaryKeys;
00996   std::vector<ot::TreeNode> failedSecondaryKeys = secondaryKeys;
00997 
00998   //Sort and Make Unique
00999   seq::makeVectorUnique<ot::TreeNode>(failedPrimaryKeys, false);
01000   seq::makeVectorUnique<ot::TreeNode>(failedSecondaryKeys, false);
01001 
01002 #ifdef __DEBUG_DA_NLIST__
01003   MPI_Barrier(m_mpiCommActive);
01004 
01005   if(!m_iRankActive) {
01006     std::cout<<std::endl;
01007     std::cout<<"Made Failed Keys Unique. Finding Partition Next."<<std::endl;
01008     std::cout<<std::endl;
01009   }
01010   MPI_Barrier(m_mpiCommActive);
01011 #endif
01012 
01013   unsigned int PrimaryKeysSz = static_cast<unsigned int>(failedPrimaryKeys.size());
01014   unsigned int SecondaryKeysSz = static_cast<unsigned int>(failedSecondaryKeys.size());
01015 
01016   //Now determine the processors which own these keys.
01017   unsigned int *partPrimary = NULL;
01018   if(PrimaryKeysSz) {
01019     partPrimary = new unsigned int[PrimaryKeysSz];    
01020   }
01021   unsigned int *partSecondary = NULL;
01022   if(SecondaryKeysSz) {
01023     partSecondary = new unsigned int[SecondaryKeysSz];    
01024   }
01025 
01026   for (unsigned int i=0; i<PrimaryKeysSz; i++) {
01027     unsigned int idx;
01028     //maxLB returns the last index in a sorted array such that a[ind] <= key and  a[index +1] > key
01029     bool found = seq::maxLowerBound<TreeNode >(m_tnMinAllBlocks, failedPrimaryKeys[i], idx, NULL, NULL);
01030     assert(found);
01031     partPrimary[i] = idx;
01032   }//end for i
01033 
01034   for (unsigned int i=0; i<SecondaryKeysSz; i++) {
01035     unsigned int idx;
01036     //maxLB returns the last index in a sorted array such that a[ind] <= key and  a[index +1] > key
01037     bool found = seq::maxLowerBound<TreeNode >(m_tnMinAllBlocks, failedSecondaryKeys[i], idx, NULL, NULL);
01038     assert(found);
01039     partSecondary[i] = idx;
01040   }//end for i
01041 
01042   int *numKeysSendP = new int[m_iNpesActive];
01043   int *numKeysSendS = new int[m_iNpesActive];
01044   int *numKeysRecvP = new int[m_iNpesActive];    
01045   int *numKeysRecvS = new int[m_iNpesActive];
01046 
01047   for (int i=0; i<m_iNpesActive; i++) {
01048     numKeysSendP[i] = 0;
01049     numKeysSendS[i] = 0;
01050   }
01051   // calculate the number of keys to send ...
01052   for (unsigned int i=0; i<PrimaryKeysSz; i++) {
01053     numKeysSendP[partPrimary[i]]++;      
01054   }
01055   for (unsigned int i=0; i<SecondaryKeysSz; i++) {
01056     numKeysSendS[partSecondary[i]]++;
01057   }    
01058 
01059 #ifdef __DEBUG_DA_NLIST__
01060   MPI_Barrier(m_mpiCommActive);
01061   if(!m_iRankActive) {
01062     std::cout<<std::endl;
01063     std::cout<<"First ALL2ALL for Second Ring..."<<std::endl;
01064     std::cout<<std::endl;
01065   }
01066   MPI_Barrier(m_mpiCommActive);
01067 #endif
01068 
01069 
01070   // Now do an All2All to get inumKeysRecv
01071   PROF_BUILD_NLIST_COMM_BEGIN
01072 
01073     par::Mpi_Alltoall<int>(numKeysSendP, numKeysRecvP, 1, m_mpiCommActive);
01074   par::Mpi_Alltoall<int>(numKeysSendS, numKeysRecvS, 1, m_mpiCommActive);
01075 
01076   PROF_BUILD_NLIST_COMM_END
01077 
01078     // Now create sendK
01079     int *sendOffsetsP = new int[m_iNpesActive]; sendOffsetsP[0] = 0;
01080   int *recvOffsetsP = new int[m_iNpesActive]; recvOffsetsP[0] = 0;
01081   int *numKeysTmpP = new int[m_iNpesActive]; numKeysTmpP[0] = 0; 
01082 
01083   int *sendOffsetsS = new int[m_iNpesActive]; sendOffsetsS[0] = 0;
01084   int *recvOffsetsS = new int[m_iNpesActive]; recvOffsetsS[0] = 0;
01085   int *numKeysTmpS = new int[m_iNpesActive]; numKeysTmpS[0] = 0; 
01086 
01087   // compute offsets ...
01088   for (int i = 1; i < m_iNpesActive; i++) {
01089     sendOffsetsP[i] = sendOffsetsP[i-1] + numKeysSendP[i-1];
01090     recvOffsetsP[i] = recvOffsetsP[i-1] + numKeysRecvP[i-1];
01091     numKeysTmpP[i] = 0; 
01092 
01093     sendOffsetsS[i] = sendOffsetsS[i-1] + numKeysSendS[i-1];
01094     recvOffsetsS[i] = recvOffsetsS[i-1] + numKeysRecvS[i-1];
01095     numKeysTmpS[i] = 0; 
01096   }
01097 
01098   // create the send and recv buffers ...
01099   std::vector<ot::TreeNode> sendKp (PrimaryKeysSz);
01100   std::vector<ot::TreeNode> recvKp (recvOffsetsP[m_iNpesActive-1] + numKeysRecvP[m_iNpesActive-1]);
01101 
01102   std::vector<ot::TreeNode> sendKs (SecondaryKeysSz);
01103   std::vector<ot::TreeNode> recvKs (recvOffsetsS[m_iNpesActive-1] + numKeysRecvS[m_iNpesActive-1]);
01104 
01105   for (unsigned int i=0; i< PrimaryKeysSz; i++) {
01106     unsigned int ni = numKeysTmpP[partPrimary[i]];
01107     numKeysTmpP[partPrimary[i]]++;
01108     // set entry ...
01109     sendKp[sendOffsetsP[partPrimary[i]] + ni] = failedPrimaryKeys[i];      
01110   } 
01111 
01112   for (unsigned int i=0; i< SecondaryKeysSz; i++) {
01113     unsigned int ni = numKeysTmpS[partSecondary[i]];
01114     numKeysTmpS[partSecondary[i]]++;
01115     // set entry ...
01116     sendKs[sendOffsetsS[partSecondary[i]] + ni] = failedSecondaryKeys[i];       
01117   } 
01118 
01119   failedPrimaryKeys.clear();
01120   failedSecondaryKeys.clear();
01121 
01122   if(partPrimary) {
01123     delete [] partPrimary;
01124     partPrimary = NULL;
01125   }
01126 
01127   if(partSecondary) {
01128     delete [] partSecondary;
01129     partSecondary = NULL;
01130   }
01131 
01132   if(numKeysTmpP) {
01133     delete [] numKeysTmpP;    
01134     numKeysTmpP = NULL;
01135   }
01136 
01137   if(numKeysTmpS) {
01138     delete [] numKeysTmpS;
01139     numKeysTmpS = NULL;
01140   }
01141 
01142   ot::TreeNode* sendKpPtr = NULL;
01143   ot::TreeNode* recvKpPtr = NULL;
01144   ot::TreeNode* sendKsPtr = NULL;
01145   ot::TreeNode* recvKsPtr = NULL;
01146   if(!sendKp.empty()) {
01147     sendKpPtr = &(*(sendKp.begin()));
01148   }
01149   if(!recvKp.empty()) {
01150     recvKpPtr = &(*(recvKp.begin()));
01151   }
01152   if(!sendKs.empty()) {
01153     sendKsPtr = &(*(sendKs.begin()));
01154   }
01155   if(!recvKs.empty()) {
01156     recvKsPtr = &(*(recvKs.begin()));
01157   }
01158 
01159   PROF_BUILD_NLIST_COMM_BEGIN
01160 
01161     par::Mpi_Alltoallv_sparse<ot::TreeNode>( sendKpPtr, numKeysSendP, sendOffsetsP,
01162         recvKpPtr, numKeysRecvP, recvOffsetsP, m_mpiCommActive);
01163 
01164   par::Mpi_Alltoallv_sparse<ot::TreeNode>( sendKsPtr, numKeysSendS, sendOffsetsS,
01165       recvKsPtr, numKeysRecvS, recvOffsetsS, m_mpiCommActive);
01166 
01167   PROF_BUILD_NLIST_COMM_END
01168 
01169     sendKp.clear();
01170   sendKs.clear();
01171 
01172   delete [] sendOffsetsP;
01173   sendOffsetsP = NULL;
01174 
01175   delete [] recvOffsetsP;
01176   recvOffsetsP = NULL;
01177 
01178   delete [] numKeysSendP;
01179   numKeysSendP = NULL;
01180 
01181   delete [] numKeysRecvP;
01182   numKeysRecvP = NULL;
01183 
01184   delete [] sendOffsetsS;
01185   sendOffsetsS = NULL;
01186 
01187   delete [] recvOffsetsS;
01188   recvOffsetsS = NULL;
01189 
01190   delete [] numKeysSendS;
01191   numKeysSendS = NULL;
01192 
01193   delete [] numKeysRecvS;
01194   numKeysRecvS = NULL;
01195 
01196   std::vector<ot::NodeAndRanks> recvK2P;
01197   std::vector<ot::NodeAndRanks> recvK2S;
01198   //recvKp and recvKs are NOT sorted and NOT unique.
01199   //First merge recvK into recvK2.
01200   for (int i = 0; i < recvKp.size(); i++) {
01201     ot::NodeAndRanks tmp;
01202     tmp.node = recvKp[i];
01203     tmp.ranks.push_back(recvKp[i].getWeight());
01204     recvK2P.push_back(tmp);
01205   }
01206 
01207   for (int i=0;i<recvKs.size();i++) {
01208     ot::NodeAndRanks tmp;
01209     tmp.node = recvKs[i];
01210     tmp.ranks.push_back(recvKs[i].getWeight());
01211     recvK2S.push_back(tmp);
01212   }
01213 
01214   recvKp.clear();  
01215   recvKs.clear();  
01216 
01217   std::sort(recvK2P.begin(),recvK2P.end());
01218   std::sort(recvK2S.begin(),recvK2S.end());
01219 
01220   //Make recvK2P Unique and concatenate ranks.
01221   if (recvK2P.size() >= 2) {
01222     std::vector<ot::NodeAndRanks> tmp(recvK2P.size());
01223     tmp[0] = recvK2P[0];
01224     unsigned int tmpSize=1;
01225     for (unsigned int i=1;i<recvK2P.size();i++) {
01226       if (tmp[tmpSize-1] != recvK2P[i]) {
01227         //new entry
01228         tmp[tmpSize] = recvK2P[i];
01229         tmpSize++;
01230       } else {
01231         tmp[tmpSize-1].ranks.push_back(recvK2P[i].ranks[0]);
01232       }
01233     }//end for
01234     tmp.resize(tmpSize);
01235     for (unsigned int i=0;i<tmpSize;i++) {
01236       seq::makeVectorUnique<int>(tmp[i].ranks,false);
01237     }
01238     recvK2P = tmp; 
01239     tmp.clear();
01240   }
01241 
01242   //Make recvK2S Unique and concatenate ranks.
01243   if (recvK2S.size() >= 2) {
01244     std::vector<ot::NodeAndRanks> tmp(recvK2S.size());
01245     tmp[0] = recvK2S[0];
01246     unsigned int tmpSize=1;
01247     for (unsigned int i=1;i<recvK2S.size();i++) {
01248       if (tmp[tmpSize-1] != recvK2S[i]) {
01249         //new entry
01250         tmp[tmpSize] = recvK2S[i];
01251         tmpSize++;
01252       } else {
01253         tmp[tmpSize-1].ranks.push_back(recvK2S[i].ranks[0]);
01254       }
01255     }//end for
01256     tmp.resize(tmpSize);
01257     for (unsigned int i=0;i<tmpSize;i++) {
01258       seq::makeVectorUnique<int>(tmp[i].ranks,false);
01259     }
01260     recvK2S = tmp; 
01261     tmp.clear();
01262   }
01263 
01264   //Local Search and update sendNodes and sendCnt.              
01265   std::vector<std::vector<ot::TreeNode> > sendNodesP(m_iNpesActive);
01266   std::vector<std::vector<ot::TreeNode> > sendNodesS(m_iNpesActive);
01267   //int is necessary here. Set to -1 later.
01268   std::vector<std::vector<int> > idxP(m_iNpesActive);
01269   std::vector<std::vector<int> > idxS(m_iNpesActive);
01270 
01271 #ifdef __DEBUG_DA_NLIST__
01272   MPI_Barrier(m_mpiCommActive);
01273   if(!m_iRankActive) {
01274     std::cout<<std::endl;
01275     std::cout<<"Starting Local Search for Second Ring..."<<std::endl;
01276     std::cout<<std::endl;
01277   }
01278   MPI_Barrier(m_mpiCommActive);
01279 #endif
01280 
01281   //in is sorted and unique and linear and recvK2 is sorted and unique and linear.    
01282   for (unsigned int i=0; i<recvK2P.size();i++) {
01283     unsigned int idx;    
01284     bool found = seq::maxLowerBound<ot::TreeNode >(in, recvK2P[i].node, idx,NULL,NULL);
01285 
01286     if (found) {
01287 #ifdef __DEBUG_DA_NLIST__
01288       assert( (in[idx].isAncestor(recvK2P[i].node)) || (in[idx] == (recvK2P[i].node)) );
01289       assert( (idx >= m_uiElementBegin) && (idx < m_uiPostGhostBegin) );
01290 #endif
01291 
01292       if ( (in[idx].getAnchor() != recvK2P[i].node.getAnchor()) || 
01293           (!(in[idx].getFlag() & ot::TreeNode::NODE)) ) {
01294         found = false;
01295       }
01296     }
01297 
01298     //Send the result to all the processors that want it.
01299     for (int j=0; j < recvK2P[i].ranks.size(); j++) {
01300       if ( found ) {
01301         //Send in[idx];
01302         sendNodesP[recvK2P[i].ranks[j]].push_back(in[idx]);          
01303         idxP[recvK2P[i].ranks[j]].push_back(idx);
01304       } else {
01305         //Send rootNode;  
01306         sendNodesP[recvK2P[i].ranks[j]].push_back(rootNode);          
01307         idxP[recvK2P[i].ranks[j]].push_back(-1);
01308       }
01309       sendNodesP[recvK2P[i].ranks[j]][sendNodesP[recvK2P[i].ranks[j]].size()-1].setWeight(m_iRankActive);
01310     }//end for j
01311   }//end for i          
01312 
01313   for (unsigned int i=0; i<recvK2S.size();i++) {
01314     unsigned int idx;    
01315     bool found = seq::maxLowerBound<ot::TreeNode >(in, recvK2S[i].node, idx, NULL, NULL);
01316     if (found) {
01317 #ifdef __DEBUG_DA_NLIST__
01318       assert( (in[idx].isAncestor(recvK2S[i].node)) || (in[idx] == (recvK2S[i].node)) );
01319       assert( (idx >= m_uiElementBegin) && (idx < m_uiPostGhostBegin) );
01320 #endif
01321       if ( (in[idx].getAnchor() != recvK2S[i].node.getAnchor()) || (!(in[idx].getFlag() & ot::TreeNode::NODE)) ) {
01322         found = false;
01323       }
01324     }
01325     //Send the result to all the processors that want it.
01326     for (int j=0; j < recvK2S[i].ranks.size(); j++) {
01327       if ( found ) {
01328         //Send in[idx];
01329         sendNodesS[recvK2S[i].ranks[j]].push_back(in[idx]);
01330         idxS[recvK2S[i].ranks[j]].push_back(idx);
01331       } else {
01332         //Send rootNode;  
01333         sendNodesS[recvK2S[i].ranks[j]].push_back(rootNode);
01334         idxS[recvK2S[i].ranks[j]].push_back(-1);
01335       }
01336       sendNodesS[recvK2S[i].ranks[j]][sendNodesS[recvK2S[i].ranks[j]].size()-1].setWeight(m_iRankActive);
01337     }
01338   }//end for i          
01339 
01340   recvK2P.clear(); 
01341   recvK2S.clear(); 
01342 
01343   for (unsigned int i = 0; i < m_iNpesActive; i++) {
01344     seq::makeVectorUnique<TreeNode>(sendNodesP[i],false);
01345     seq::makeVectorUnique<TreeNode>(sendNodesS[i],false);
01346     seq::makeVectorUnique<int>(idxP[i],false);
01347     seq::makeVectorUnique<int>(idxS[i],false);
01348   }
01349 
01350   numKeysSendP = new int[m_iNpesActive];
01351   numKeysSendS = new int[m_iNpesActive];
01352   numKeysRecvP = new int[m_iNpesActive];    
01353   numKeysRecvS = new int[m_iNpesActive];
01354 
01355   for (int i=0; i<m_iNpesActive; i++) {
01356     numKeysSendP[i] = static_cast<int>(sendNodesP[i].size());
01357     numKeysSendS[i] = static_cast<int>(sendNodesS[i].size());    
01358   }        
01359 
01360 #ifdef __DEBUG_DA_NLIST__
01361   MPI_Barrier(m_mpiCommActive);
01362   if(!m_iRankActive) {
01363     std::cout<<std::endl;
01364     std::cout<<"Sending Results after Local Search for Second Ring..."<<std::endl;
01365     std::cout<<std::endl;
01366   }
01367   MPI_Barrier(m_mpiCommActive);
01368 #endif
01369 
01370   // Now do an All2All to get inumKeysRecv
01371   PROF_BUILD_NLIST_COMM_BEGIN
01372 
01373     par::Mpi_Alltoall<int>(numKeysSendP, numKeysRecvP, 1, m_mpiCommActive);
01374   par::Mpi_Alltoall<int>(numKeysSendS, numKeysRecvS, 1, m_mpiCommActive);    
01375 
01376   PROF_BUILD_NLIST_COMM_END
01377 
01378     // Now create sendK
01379     sendOffsetsP = new int[m_iNpesActive]; sendOffsetsP[0] = 0;
01380   sendOffsetsS = new int[m_iNpesActive]; sendOffsetsS[0] = 0;
01381   recvOffsetsP = new int[m_iNpesActive]; recvOffsetsP[0] = 0;   
01382   recvOffsetsS = new int[m_iNpesActive]; recvOffsetsS[0] = 0;
01383 
01384   // compute offsets ...
01385   for (int i=1; i<m_iNpesActive; i++) {
01386     sendOffsetsP[i] = sendOffsetsP[i-1] + numKeysSendP[i-1];
01387     recvOffsetsP[i] = recvOffsetsP[i-1] + numKeysRecvP[i-1];
01388 
01389     sendOffsetsS[i] = sendOffsetsS[i-1] + numKeysSendS[i-1];
01390     recvOffsetsS[i] = recvOffsetsS[i-1] + numKeysRecvS[i-1];    
01391   }
01392 
01393   // create the send and recv buffers ...
01394   sendKp.resize(sendOffsetsP[m_iNpesActive-1] + numKeysSendP[m_iNpesActive-1]);
01395   recvKp.resize(recvOffsetsP[m_iNpesActive-1] + numKeysRecvP[m_iNpesActive-1]);
01396 
01397   sendKs.resize(sendOffsetsS[m_iNpesActive-1] + numKeysSendS[m_iNpesActive-1]);
01398   recvKs.resize(recvOffsetsS[m_iNpesActive-1] + numKeysRecvS[m_iNpesActive-1]);
01399 
01400   std::vector<int> idxSendKp(sendOffsetsP[m_iNpesActive-1] + numKeysSendP[m_iNpesActive-1]);
01401   std::vector<int> idxSendKs(sendOffsetsS[m_iNpesActive-1] + numKeysSendS[m_iNpesActive-1]);
01402 
01403   for (unsigned int i = 0; i < m_iNpesActive; i++) {
01404     for (unsigned int j = 0; j < numKeysSendP[i]; j++) {
01405       // set entry ...
01406       sendKp[sendOffsetsP[i] + j] = sendNodesP[i][j];       
01407       idxSendKp[sendOffsetsP[i] + j] = idxP[i][j];       
01408 
01409 #ifdef __DEBUG_DA_NLIST__
01410       assert( ((sendKp[sendOffsetsP[i] + j] == rootNode) && (idxSendKp[sendOffsetsP[i] + j] == -1)) ||
01411           (in[idxSendKp[sendOffsetsP[i] + j]] == sendKp[sendOffsetsP[i] + j]) );
01412 #endif
01413     }
01414     for (unsigned int j = 0; j < numKeysSendS[i]; j++) {
01415       // set entry ...
01416       sendKs[sendOffsetsS[i] + j] = sendNodesS[i][j];    
01417       idxSendKs[sendOffsetsS[i] + j] = idxS[i][j];       
01418 
01419 #ifdef __DEBUG_DA_NLIST__
01420       assert( ((sendKs[sendOffsetsS[i] + j] == rootNode) && (idxSendKs[sendOffsetsS[i] + j] == -1)) ||
01421           (in[idxSendKs[sendOffsetsS[i] + j]] == sendKs[sendOffsetsS[i] + j]) );
01422 #endif
01423     }
01424   } 
01425 
01426   sendNodesP.clear();
01427   sendNodesS.clear();
01428 
01429   idxP.clear();
01430   idxS.clear();
01431 
01432   sendKpPtr = NULL;
01433   recvKpPtr = NULL;
01434   sendKsPtr = NULL;
01435   recvKsPtr = NULL;
01436   if(!sendKp.empty()) {
01437     sendKpPtr = &(*(sendKp.begin()));
01438   }
01439   if(!sendKs.empty()) {
01440     sendKsPtr = &(*(sendKs.begin()));
01441   }
01442   if(!recvKp.empty()) {
01443     recvKpPtr = &(*(recvKp.begin()));
01444   }
01445   if(!recvKs.empty()) {
01446     recvKsPtr = &(*(recvKs.begin()));
01447   }
01448 
01449   PROF_BUILD_NLIST_COMM_BEGIN
01450 
01451     par::Mpi_Alltoallv_sparse<ot::TreeNode>( sendKpPtr, numKeysSendP, sendOffsetsP,
01452         recvKpPtr, numKeysRecvP, recvOffsetsP, m_mpiCommActive);
01453 
01454   par::Mpi_Alltoallv_sparse<ot::TreeNode>( sendKsPtr, numKeysSendS, sendOffsetsS,
01455       recvKsPtr, numKeysRecvS, recvOffsetsS, m_mpiCommActive);
01456 
01457   PROF_BUILD_NLIST_COMM_END
01458 
01459     //Store the sizes, will need it later
01460     unsigned int actualRecvKpSz = static_cast<unsigned int>(recvKp.size());
01461   unsigned int actualRecvKsSz = static_cast<unsigned int>(recvKs.size());
01462 
01463   //The result is sorted except for the fact that there might be multiple
01464   //roots in the middle. So remove all roots.
01465   std::vector<TreeNode> tmpRecvKp;
01466   std::vector<TreeNode> tmpRecvKs;
01467 
01468   std::vector<unsigned int> kp2ActualKp;
01469   std::vector<unsigned int> ks2ActualKs;
01470 
01471   tmpRecvKp.push_back(rootNode);
01472   kp2ActualKp.push_back(recvKp.size());
01473   for(unsigned int i = 0; i < recvKp.size(); i++) {
01474     if(recvKp[i] > rootNode) {      
01475       tmpRecvKp.push_back(recvKp[i]);
01476       kp2ActualKp.push_back(i);
01477     }
01478   }
01479 
01480   tmpRecvKs.push_back(rootNode);
01481   ks2ActualKs.push_back(recvKs.size());
01482   for(unsigned int i = 0; i < recvKs.size(); i++) {
01483     if(recvKs[i] > rootNode) {
01484       tmpRecvKs.push_back(recvKs[i]);
01485       ks2ActualKs.push_back(i);
01486     }
01487   }
01488 
01489   recvKp = tmpRecvKp;
01490   recvKs = tmpRecvKs;
01491 
01492   tmpRecvKp.clear();
01493   tmpRecvKs.clear();
01494 
01495 #ifdef __DEBUG_DA_NLIST__
01496   MPI_Barrier(m_mpiCommActive);
01497   assert(seq::test::isUniqueAndSorted(recvKp));
01498   assert(seq::test::isUniqueAndSorted(recvKs));
01499   if(!m_iRankActive) {
01500     std::cout<<std::endl;
01501     std::cout<<"Processing Results in Second Ring..."<<std::endl;
01502     std::cout<<std::endl;
01503   }
01504   MPI_Barrier(m_mpiCommActive);
01505 #endif
01506 
01507   unsigned int *oldToNewIdx = NULL;
01508   bool *fromPrimary = NULL;
01509   bool *fromSecondary = NULL;
01510   if(!extraAtEnd.empty()) {
01511     oldToNewIdx = new unsigned int[extraAtEnd.size()];
01512     fromPrimary = new bool[extraAtEnd.size()];
01513     fromSecondary = new bool[extraAtEnd.size()];
01514   }
01515   std::vector<seq::IndexHolder<ot::TreeNode> > extraIndices (extraAtEnd.size());
01516 
01517   for (unsigned int i = 0; i < extraAtEnd.size(); i++) {
01518     unsigned int idx;    
01519     bool found = seq::maxLowerBound<ot::TreeNode >(recvKp, primaryKeys[i], idx, NULL, NULL);
01520 
01521     if (found) {
01522 
01523       if (recvKp[idx] == rootNode) {
01524         found = false;
01525         fromPrimary[i] = false;
01526       } else {
01527 #ifdef __DEBUG_DA_NLIST__
01528         assert( recvKp[idx].getFlag() & ot::TreeNode::NODE );
01529 #endif
01530         if( recvKp[idx].getAnchor() != primaryKeys[i].getAnchor() ) {
01531           //Treat just like it was root.
01532           found = false;
01533           fromPrimary[i] = false;
01534         }else {
01535           oldToNewIdx[i] = idx; 
01536           fromPrimary[i] = true;
01537           fromSecondary[i] = false;
01538           TreeNode tmp = recvKp[idx];
01539           tmp.setWeight(extraAtEnd[i].getWeight());
01540           extraAtEnd[i] = tmp;
01541           extraIndices[i].value = &(extraAtEnd[i]);
01542           extraIndices[i].index = i;
01543         }
01544       }        
01545     }else {
01546       assert(false);
01547     }
01548 
01549     if (!found) {
01550       unsigned int idx;    
01551       found = seq::maxLowerBound<ot::TreeNode >(recvKs, secondaryKeys[i], idx, NULL, NULL);
01552 
01553 #ifdef __DEBUG_DA_NLIST__
01554       assert( found );
01555       assert( recvKs[idx] != rootNode );
01556       assert( recvKs[idx].getFlag() & ot::TreeNode::NODE );
01557       assert( recvKs[idx].getAnchor() == secondaryKeys[i].getAnchor() );
01558 #endif
01559       /*
01560          Handle Special Case: When a pre-ghost element did not find its primary key in the local search, the secondary key is never searched for. In such situations, when the primary result is not usable and we must pick the secondary result got from the explicit parallel search, we must first check if this secondary result is already present in the local buffer (either own or elements got through a-priori comm.) If so, we must use the one that is already present and discard the one got through the parallel search. This must be done in order to avoid duplicate octants in the local buffer.
01561          */
01562       unsigned int idxTmp;
01563       ot::TreeNode* inPtr = NULL;
01564       if(!in.empty()) {
01565         inPtr = &(*(in.begin()));
01566       }
01567       found = seq::BinarySearch<ot::TreeNode>(inPtr,
01568           static_cast<unsigned int>(in.size()), recvKs[idx], &idxTmp);
01569 
01570       if(found) {
01571         fromSecondary[i] = false;
01572         oldToNewIdx[i] = idxTmp;
01573         TreeNode tmp = in[idxTmp];
01574         tmp.setWeight(extraAtEnd[i].getWeight());
01575         extraAtEnd[i] = tmp;
01576         extraIndices[i].value = &(extraAtEnd[i]);
01577         extraIndices[i].index = i;
01578       }else {    
01579         fromSecondary[i] = true;
01580         oldToNewIdx[i] = idx; 
01581         TreeNode tmp = recvKs[idx];
01582         tmp.setWeight(extraAtEnd[i].getWeight());
01583         extraAtEnd[i] = tmp;
01584         extraIndices[i].value = &(extraAtEnd[i]);
01585         extraIndices[i].index = i;
01586       }
01587     }
01588   }//end for i
01589 
01590   primaryKeys.clear();
01591   secondaryKeys.clear();
01592   extraAtEnd.clear();
01593 
01594   std::sort(extraIndices.begin(),extraIndices.end());
01595 
01596   std::vector<ot::TreeNode> sortedUniqueExtras(extraIndices.size());    
01597   std::vector<unsigned int> chosenIndexIntoOld(extraIndices.size());
01598 
01599   //Old refers to the unsorted and non-unique extraAtEnd list 
01600   std::vector<std::vector< unsigned int> > indicesIntoOld(extraIndices.size());
01601 
01602   //LUT refers to localOcts (in)
01603   std::vector<std::vector< unsigned int> > indicesIntoLUT(extraIndices.size());
01604 
01605   if (!extraIndices.empty()) {
01606     sortedUniqueExtras[0] = *(extraIndices[0].value);
01607     chosenIndexIntoOld[0] = extraIndices[0].index;
01608 
01609     indicesIntoOld[0].push_back(extraIndices[0].index);
01610     indicesIntoLUT[0].push_back(extraIndices[0].value->getWeight());
01611   }
01612   //Make  Unique and concatenate ranks.
01613   if (extraIndices.size() >= 2) {
01614     unsigned int tmpSize=1;
01615     for (unsigned int i=1;i<extraIndices.size();i++) {
01616       if (sortedUniqueExtras[tmpSize-1] != *(extraIndices[i].value)) {
01617         //New entry
01618         sortedUniqueExtras[tmpSize] = *(extraIndices[i].value);
01619         chosenIndexIntoOld[tmpSize] =  extraIndices[i].index;
01620 
01621         indicesIntoOld[tmpSize].push_back(extraIndices[i].index);
01622         indicesIntoLUT[tmpSize].push_back(extraIndices[i].value->getWeight());
01623         tmpSize++;
01624       } else {
01625         indicesIntoOld[tmpSize-1].push_back(extraIndices[i].index);
01626         indicesIntoLUT[tmpSize-1].push_back(extraIndices[i].value->getWeight());
01627       }
01628     }//end for
01629 
01630     sortedUniqueExtras.resize(tmpSize);
01631 
01632 #ifdef __DEBUG_DA_NLIST__
01633     assert(seq::test::isUniqueAndSorted<TreeNode >(sortedUniqueExtras));
01634 #endif
01635 
01636     chosenIndexIntoOld.resize(tmpSize);
01637 
01638     indicesIntoOld.resize(tmpSize);
01639     indicesIntoLUT.resize(tmpSize);      
01640 
01641     for (unsigned int i=0;i<tmpSize;i++) {
01642       std::sort(indicesIntoOld[i].begin(),indicesIntoOld[i].end());
01643       std::sort(indicesIntoLUT[i].begin(),indicesIntoLUT[i].end());        
01644 #ifdef __DEBUG_DA_NLIST__
01645       //Ensuring that all the 8 vertices of an element map to different indices. 
01646       assert(seq::test::isUniqueAndSorted<unsigned int>(indicesIntoOld[i]));
01647       assert(seq::test::isUniqueAndSorted<unsigned int>(indicesIntoLUT[i]));
01648 #endif
01649     }//end for i      
01650 
01651   }
01652 
01653   extraIndices.clear();
01654 
01655   /*
01656      std::vector<bool> pickPrimary(sortedUniqueExtras.size());
01657      std::vector<unsigned int> oldIndex(sortedUniqueExtras.size());
01658 
01659   //The same extra key can come from primary or secondary and from multiple
01660   //pre-ghosts. So pick one.
01661   //Some arbit default rule is used to make this decision...
01662   //Here. The rule is simply if this key was some element's primary key use
01663   //that. In this case, the smallest element is chosen. Else, the last
01664   //element is chosen (secondary key).
01665   //Note: indicesIntoOld[i] is sorted and unique.
01666   for (unsigned int i=0; i < sortedUniqueExtras.size(); i++) {
01667   pickPrimary[i] = false;
01668   for (unsigned int j = 0; j < indicesIntoOld[i].size(); j++) {
01669   oldIndex[i] = indicesIntoOld[i][j];
01670   if (fromPrimary[indicesIntoOld[i][j]]) {
01671   pickPrimary[i] = true;         
01672   break;       
01673   }
01674   }//end for j
01675   }//end for i
01676   */
01677 
01678   unsigned int *recvCntsExtras = new unsigned int[m_iNpesActive];        
01679 
01680   std::vector<char> pickingP(actualRecvKpSz);
01681   std::vector<char> pickingS(actualRecvKsSz);
01682 
01683   std::vector<char> pickedP(sendKp.size());
01684   std::vector<char> pickedS(sendKs.size());
01685 
01686   for (unsigned int i = 0; i < actualRecvKpSz; i++) {
01687     pickingP[i] = 0;
01688   }
01689 
01690   for (unsigned int i = 0; i < actualRecvKsSz; i++) {
01691     pickingS[i] = 0;
01692   }
01693 
01694   for (unsigned int i = 0; i < m_iNpesActive; i++) {
01695     recvCntsExtras[i] = 0;
01696   }
01697 
01698   // std::cout << m_iRankActive << " Correcting LuTs" << std::endl;
01699 
01700   //Do not do an in-place update of nlist. Instead store the corrections. 
01701   std::vector< std::vector<unsigned int> > secondRingCorrections;
01702 
01703   for (unsigned int i=0;i<sortedUniqueExtras.size();i++) {
01704 
01705     /*
01706        if (pickPrimary[i]) {
01707 #ifdef __DEBUG_DA_NLIST__
01708 assert( recvKp[oldToNewIdx[oldIndex[i]]] == sortedUniqueExtras[i] );
01709 assert( recvKp[oldToNewIdx[oldIndex[i]]] != rootNode );
01710 #endif
01711 in.push_back(recvKp[oldToNewIdx[oldIndex[i]]]);        
01712 recvCntsExtras[recvKp[oldToNewIdx[oldIndex[i]]].getWeight()]++;
01713 pickingP[kp2ActualKp[oldToNewIdx[oldIndex[i]]]] = 1;
01714 } else {
01715 #ifdef __DEBUG_DA_NLIST__
01716 assert( recvKs[oldToNewIdx[oldIndex[i]]] == sortedUniqueExtras[i] );
01717 assert( recvKs[oldToNewIdx[oldIndex[i]]] != rootNode );
01718 #endif
01719 in.push_back(recvKs[oldToNewIdx[oldIndex[i]]]);        
01720 recvCntsExtras[recvKs[oldToNewIdx[oldIndex[i]]].getWeight()]++;
01721 pickingS[ks2ActualKs[oldToNewIdx[oldIndex[i]]]] = 1;
01722 }
01723 
01724 for (unsigned int j =0;j < indicesIntoLUT[i].size(); j++) {
01725 for (unsigned int k=0; k < 8; k++) {
01726 for (unsigned int l = 0;l < indicesIntoOld[i].size(); l++) {
01727 if (nlist[8*(indicesIntoLUT[i][j])+k] == (m_uiLocalBufferSize + indicesIntoOld[i][l])) {
01728 std::vector<unsigned int> ringCorrectionsTuple(3);
01729 ringCorrectionsTuple[0] = indicesIntoLUT[i][j];
01730 ringCorrectionsTuple[1] = k;
01731 ringCorrectionsTuple[2] = (in.size()-1);
01732 secondRingCorrections.push_back(ringCorrectionsTuple);
01733 break;
01734 } // if 
01735 } // for l                   
01736 }  // for k     
01737 } // for j
01738 */
01739 
01740   if (fromPrimary[chosenIndexIntoOld[i]]) {
01741 #ifdef __DEBUG_DA_NLIST__
01742     assert( recvKp[oldToNewIdx[chosenIndexIntoOld[i]]] == sortedUniqueExtras[i] );
01743     assert( recvKp[oldToNewIdx[chosenIndexIntoOld[i]]] != rootNode );
01744 #endif
01745     in.push_back(recvKp[oldToNewIdx[chosenIndexIntoOld[i]]]);        
01746     recvCntsExtras[recvKp[oldToNewIdx[chosenIndexIntoOld[i]]].getWeight()]++;
01747     pickingP[kp2ActualKp[oldToNewIdx[chosenIndexIntoOld[i]]]] = 1;
01748   } else if(fromSecondary[chosenIndexIntoOld[i]]) {
01749 #ifdef __DEBUG_DA_NLIST__
01750     assert( recvKs[oldToNewIdx[chosenIndexIntoOld[i]]] == sortedUniqueExtras[i] );
01751     assert( recvKs[oldToNewIdx[chosenIndexIntoOld[i]]] != rootNode );
01752 #endif
01753     in.push_back(recvKs[oldToNewIdx[chosenIndexIntoOld[i]]]);        
01754     recvCntsExtras[recvKs[oldToNewIdx[chosenIndexIntoOld[i]]].getWeight()]++;
01755     pickingS[ks2ActualKs[oldToNewIdx[chosenIndexIntoOld[i]]]] = 1;
01756   }
01757 
01758 //If the element is neither in primary nor in secondary,
01759 //then 'in' already has it. So no need to append it into 'in' again.
01760 //Note, that we will never select a copy of sortedUniqueExtras[i] from primary or secondary if the element already exists within in.
01761 
01762 for (unsigned int j =0;j < indicesIntoLUT[i].size(); j++) {
01763   for (unsigned int k=0; k < 8; k++) {
01764     for (unsigned int l = 0;l < indicesIntoOld[i].size(); l++) {
01765       if (nlist[8*(indicesIntoLUT[i][j])+k] == (m_uiLocalBufferSize + indicesIntoOld[i][l])) {
01766         std::vector<unsigned int> ringCorrectionsTuple(3);
01767         ringCorrectionsTuple[0] = indicesIntoLUT[i][j];
01768         ringCorrectionsTuple[1] = k;
01769 
01770         if( fromPrimary[chosenIndexIntoOld[i]] || fromSecondary[chosenIndexIntoOld[i]] ) {
01771           ringCorrectionsTuple[2] = (static_cast<unsigned int>(in.size()) - 1);
01772         }else {
01773           ringCorrectionsTuple[2] = oldToNewIdx[chosenIndexIntoOld[i]];
01774         }
01775 
01776         secondRingCorrections.push_back(ringCorrectionsTuple);
01777         break;
01778       } // if 
01779     } // for l                   
01780   }  // for k     
01781 } // for j
01782 
01783 } // for i
01784 
01785 for(unsigned int i = 0; i < secondRingCorrections.size(); i++) {
01786   //Actual Correction here...
01787   unsigned int elemId = secondRingCorrections[i][0];
01788   unsigned int vtxId = secondRingCorrections[i][1];
01789   unsigned int lutVal = secondRingCorrections[i][2];
01790   nlist[8*elemId+vtxId] = lutVal;
01791 
01792 #ifdef __DEBUG_DA_NLIST__
01793   assert( lutVal < in.size() );
01794 #endif
01795 
01796   // correct hanging node mask ...
01797   // check if any of the nodes is hanging ...
01798   unsigned int _x,_y,_z;
01799 
01800   unsigned int x = in[elemId].getX();
01801   unsigned int y = in[elemId].getY();
01802   unsigned int z = in[elemId].getZ();
01803   unsigned int d   = in[elemId].getLevel();
01804   unsigned int sz = (1u << (m_uiMaxDepth - d));
01805 
01806 #ifdef __DEBUG_DA_NLIST__
01807   //You might have to find the actual node via final corrections
01808   //(explicit parallel search) although you sent yourself apriori
01809   //(Second Ring). 
01810   if( (elemId >= m_uiElementBegin) && (elemId < m_uiPostGhostBegin) ) {
01811     if(in[lutVal] < in[m_uiElementBegin]) {
01812       std::cout<<m_iRankActive<<" Trying to send yourself as a Post Ghost ELEMENT for  elemId = "
01813         <<elemId<<" vtxId = "<<vtxId<<std::endl;
01814       assert(false);
01815     }
01816     if( (m_uiPostGhostBegin < in.size()) && (in[lutVal] >= in[m_uiPostGhostBegin]) ) {
01817       TreeNode tmpToSend = in[lutVal];
01818       tmpToSend.setWeight(elemId);
01819       checkSecondRing.push_back(tmpToSend);
01820     }
01821   }
01822 #endif
01823 
01824   _x = in[lutVal].getX(); 
01825   _y = in[lutVal].getY(); 
01826   _z = in[lutVal].getZ(); 
01827 
01828 #ifdef __DEBUG_DA_NLIST__
01829   if ( !(in[lutVal].getFlag() & ot::TreeNode::NODE ) ) {
01830     //Second ring must find a node only 
01831     assert(false);
01832   }
01833 #endif
01834 
01835   switch (vtxId) {
01836     case 0:
01837       assert(false);
01838       break;
01839     case 1:
01840       if ( ( (x+sz) != _x ) || ( y != _y ) || ( z != _z ) ) {
01841         m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01842       }
01843       break;
01844     case 2:
01845       if ( ( x != _x ) || ( (y+sz) != _y ) || ( z != _z ) ) {
01846         m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01847       }
01848       break;
01849     case 3:
01850       if ( ( (x+sz) != _x ) || ( (y+sz) != _y ) || ( z != _z ) ) {
01851         m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01852       }
01853       break;
01854     case 4:
01855       if ( ( x != _x ) || ( y != _y ) || ( (z+sz) != _z ) ) {
01856         m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01857       }
01858       break;
01859     case 5:
01860       if ( ( (x+sz) != _x ) || ( y != _y ) || ( (z+sz) != _z ) ) {
01861         m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01862       }
01863       break;
01864     case 6:
01865       if ( ( x != _x ) || ( (y+sz) != _y ) || ( (z+sz) != _z ) ) {
01866         m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01867       }
01868       break;
01869     case 7:
01870       if ( ( (x+sz) != _x ) || ( (y+sz) != _y ) || ( (z+sz) != _z ) ) {
01871         m_ucpLutMasks[2*elemId+1] |= (1 << vtxId);
01872       }
01873       break;
01874   } // switch (vtxId)
01875 
01876 }//end for i
01877 
01878 secondRingCorrections.clear();
01879 
01880 //pickPrimary.clear();
01881 //oldIndex.clear();
01882 
01883 sortedUniqueExtras.clear();
01884 chosenIndexIntoOld.clear();
01885 
01886 indicesIntoLUT.clear();
01887 indicesIntoOld.clear();
01888 
01889 kp2ActualKp.clear();
01890 ks2ActualKs.clear();
01891 
01892 sendKp.clear();
01893 sendKs.clear();
01894 recvKp.clear(); 
01895 recvKs.clear();  
01896 
01897 if(oldToNewIdx) {
01898   delete [] oldToNewIdx;
01899   oldToNewIdx = NULL;
01900 }
01901 
01902 if(fromPrimary) {
01903   delete [] fromPrimary;    
01904   fromPrimary = NULL;
01905 }
01906 
01907 if(fromSecondary) {
01908   delete [] fromSecondary;
01909   fromSecondary = NULL;
01910 }
01911 
01912 // Set secondary comm variables ...
01913 for (unsigned int i=0; i<m_iNpesActive; i++) {
01914   if (recvCntsExtras[i]) {
01915     ScndRecvProcs.push_back(i);
01916     ScndRecvCounts.push_back(recvCntsExtras[i]); 
01917 #ifdef __DEBUG_DA_NLIST__
01918     std::cout<<m_iRankActive<<" Scnd Recv P : "<<i<<" Scnd Recv C : "<< recvCntsExtras[i]<<std::endl;
01919     assert(i != m_iRankActive);
01920 #endif
01921   }
01922 }
01923 
01924 if(recvCntsExtras) {
01925   delete [] recvCntsExtras;    
01926   recvCntsExtras = NULL;
01927 }
01928 
01929 #ifdef __DEBUG_DA_NLIST__
01930 MPI_Barrier(m_mpiCommActive);
01931 if(!m_iRankActive) {
01932   std::cout<<std::endl;
01933   std::cout<<"Returning Selection in Second Ring..."<<std::endl;
01934   std::cout<<std::endl;
01935 }
01936 MPI_Barrier(m_mpiCommActive);
01937 #endif
01938 
01939 char* pickingPptr = NULL;
01940 char* pickedPptr = NULL;
01941 char* pickingSptr = NULL;
01942 char* pickedSptr = NULL;
01943 
01944 if(!pickingP.empty()) {
01945   pickingPptr = &(*(pickingP.begin()));
01946 }
01947 if(!pickingS.empty()) {
01948   pickingSptr = &(*(pickingS.begin()));
01949 }
01950 if(!pickedP.empty()) {
01951   pickedPptr = &(*(pickedP.begin()));
01952 }
01953 if(!pickedS.empty()) {
01954   pickedSptr = &(*(pickedS.begin()));
01955 }
01956 
01957 PROF_BUILD_NLIST_COMM_BEGIN
01958 
01959 par::Mpi_Alltoallv_sparse<char>(pickingPptr ,numKeysRecvP, recvOffsetsP , 
01960     pickedPptr, numKeysSendP, sendOffsetsP, m_mpiCommActive);
01961 
01962 par::Mpi_Alltoallv_sparse<char>(pickingSptr ,numKeysRecvS, recvOffsetsS, 
01963     pickedSptr, numKeysSendS, sendOffsetsS , m_mpiCommActive);
01964 
01965 PROF_BUILD_NLIST_COMM_END
01966 
01967 pickingP.clear();
01968 pickingS.clear();
01969 
01970 #ifdef __DEBUG_DA_NLIST__
01971 MPI_Barrier(m_mpiCommActive);
01972 if(!m_iRankActive) {
01973   std::cout<<std::endl;
01974   std::cout<<"Second ScatterMap..."<<std::endl;
01975   std::cout<<std::endl;
01976 }
01977 MPI_Barrier(m_mpiCommActive);
01978 #endif
01979 
01980 //Create ScatterMap here using sendKp, recvKp, pickedP...
01981 unsigned int cnt=0, pickedCnt=0;
01982 unsigned int pCnt=0, sCnt=0;
01983 for (unsigned int i=0; i < static_cast<unsigned int>(m_iNpesActive); i++) {
01984   cnt=0;
01985   pickedCnt=0;
01986   // merge and pick entries from both primary and secondary lists being sent to procs ...
01987   while (cnt < (numKeysSendP[i] + numKeysSendS[i]) ) {
01988     // sOver := sCnt >= (sendOffsetsS[i] + numKeysSendS[i]);
01989     // pOver := pCnt >= (sendOffsetsP[i] + numKeysSendP[i]);
01990     // sRemains = sCnt < (sendOffsetsS[i] + numKeysSendS[i]);
01991     // pRemains = pCnt < (sendOffsetsP[i] + numKeysSendP[i]);
01992     if ( (sCnt >= (sendOffsetsS[i] + numKeysSendS[i])) ||
01993         ( ( pCnt < (sendOffsetsP[i] + numKeysSendP[i]) ) &&
01994           ( idxSendKp[pCnt] <= idxSendKs[sCnt]) ) ) {
01995 #ifdef __DEBUG_DA_NLIST__
01996       if(idxSendKp[pCnt] == -1) {
01997         assert(!pickedP[pCnt]);
01998       }
01999 #endif
02000       if (pickedP[pCnt]) {
02001         ScndScatterMap.push_back(idxSendKp[pCnt]);
02002         pickedCnt++;
02003       }
02004       pCnt++;
02005     } else if ( ( pCnt >=
02006           static_cast<unsigned int>(sendOffsetsP[i] + numKeysSendP[i]) ) ||
02007         ( sCnt < static_cast<unsigned int>(sendOffsetsS[i] + numKeysSendS[i]) ) ) {
02008 #ifdef __DEBUG_DA_NLIST__
02009       if(idxSendKs[sCnt] == -1) {
02010         assert(!pickedS[sCnt]);
02011       }
02012 #endif
02013       if (pickedS[sCnt]) {
02014         ScndScatterMap.push_back(idxSendKs[sCnt]);
02015         pickedCnt++;
02016       }
02017       sCnt++;
02018     } else {
02019       std::cout << "Scnd skipped both" << std::endl;
02020     }
02021     cnt++;
02022   }//end while
02023   if (pickedCnt) {
02024     ScndSendProcs.push_back(i);  
02025     ScndSendCounts.push_back(pickedCnt);
02026 #ifdef __DEBUG_DA_NLIST__
02027     std::cout<<m_iRankActive<<" Scnd Send P : "<<i<<" Scnd Send C : "<< pickedCnt<<std::endl;
02028     assert(i != m_iRankActive);
02029 #endif
02030   }
02031 }//end for i
02032 #ifdef __DEBUG_DA_NLIST__
02033 for(unsigned int i = 0; i < m_iNpesActive; i++) {
02034   unsigned int numToSend_i = 0;
02035   for(unsigned int j = sendOffsetsP[i]; j < (sendOffsetsP[i] + numKeysSendP[i]); j++) {
02036     if(pickedP[j]) {
02037       numToSend_i++;
02038     }
02039   }//end for j
02040   for(unsigned int j = sendOffsetsS[i]; j < (sendOffsetsS[i] + numKeysSendS[i]); j++) {
02041     if(pickedS[j]) {
02042       numToSend_i++;
02043     }
02044   }//end for j
02045   if(numToSend_i) {
02046     std::cout<<m_iRankActive<<" Actually should send "<<numToSend_i<<" to "<<i<<std::endl;
02047   }
02048 }//end for i
02049 #endif
02050 
02051 //Reset LocalBuffer Size
02052 m_uiLocalBufferSize = static_cast<unsigned int>(in.size());
02053 
02054 idxSendKp.clear();
02055 idxSendKs.clear();
02056 
02057 delete [] sendOffsetsP;
02058 sendOffsetsP = NULL;
02059 
02060 delete [] sendOffsetsS;
02061 sendOffsetsS = NULL;
02062 
02063 delete [] numKeysSendP;
02064 numKeysSendP = NULL;
02065 
02066 delete [] numKeysSendS;
02067 numKeysSendS = NULL;
02068 
02069 delete [] numKeysRecvP;
02070 numKeysRecvP = NULL;
02071 
02072 delete [] recvOffsetsP;    
02073 recvOffsetsP = NULL;
02074 
02075 delete [] recvOffsetsS;
02076 recvOffsetsS = NULL;
02077 
02078 delete [] numKeysRecvS;
02079 numKeysRecvS = NULL;
02080 
02081 pickedP.clear();
02082 pickedS.clear();   
02083 
02084 #ifdef __DEBUG_DA_NLIST__
02085 MPI_Barrier(m_mpiCommActive);
02086 if(!m_iRankActive) {
02087   std::cout<<std::endl;
02088   std::cout<<"Finished Correction for Second Ring..."<<std::endl;
02089   std::cout<<std::endl;
02090 }
02091 MPI_Barrier(m_mpiCommActive);
02092 #endif
02093 
02094 //~~~~~~~~~~~~~~~~Correction for Second Ring Ends~~~~~~~~~~~~~~//
02095 
02096 #ifdef __DEBUG_DA_NLIST__
02097 MPI_Barrier(m_mpiCommActive);
02098 //CHECK if apriori comm for Second ring actually works.
02099 //Check if primary scattermap contains the right element and if it is
02100 //sent to the right processor
02101 //checkSecondRing[i] is a post-ghost element on the local proc, to which one of
02102 //my own elements are pointing to. The corresponding index of my own element is
02103 //stored in the weight of checkSecondRing[i]. 
02104 std::vector<std::vector<unsigned int> > secondRingExpectedScatterMap(m_iNpesActive);
02105 for(unsigned int i = 0; i < checkSecondRing.size(); i++) {
02106   unsigned int bId;
02107   //Find the processor that owns the node.
02108   bool bucketFound = seq::maxLowerBound<ot::TreeNode>(m_tnMinAllBlocks, checkSecondRing[i], bId, NULL, NULL);
02109   assert(bucketFound);
02110   assert(bId < m_iNpesActive);
02111   assert( m_tnMinAllBlocks[bId] <= checkSecondRing[i] );
02112   secondRingExpectedScatterMap[bId].push_back(checkSecondRing[i].getWeight());
02113 }
02114 
02115 assert(seq::test::isUniqueAndSorted(m_uipSendProcs));
02116 
02117 unsigned int toSendCnt = 0;
02118 unsigned int currOffset = 0;
02119 for(unsigned int i = 0; i < m_iNpesActive; i++) {
02120   seq::makeVectorUnique<unsigned int>(secondRingExpectedScatterMap[i], false) ;
02121   if( (toSendCnt < m_uipSendProcs.size()) && (m_uipSendProcs[toSendCnt] == i) ) {
02122     for(unsigned int j = currOffset; j < (currOffset + m_uipSendCounts[toSendCnt] - 1); j++ ) {
02123       assert(m_uipScatterMap[j] < m_uipScatterMap[j+1]);
02124     }
02125     unsigned int st = 0;    
02126     for(unsigned int j = 0; j < secondRingExpectedScatterMap[i].size(); j++) {
02127       bool foundIt = false;
02128       for(unsigned int k = st; k < m_uipSendCounts[toSendCnt]; k++) {
02129         assert( (currOffset + k) < m_uipScatterMap.size() );
02130         if( m_uipScatterMap[currOffset + k] == secondRingExpectedScatterMap[i][j] ) {
02131           foundIt = true;
02132           //secondRingExpectedScatterMap[i] is Sorted and Unique.
02133           //Primary ScatterMap is also sorted and Unique for each processor 
02134           st = (k+1); 
02135           break;
02136         }
02137       }//end for k
02138       if(!foundIt) {
02139         std::cout<<m_iRankActive<<" should have sent in["<<
02140           secondRingExpectedScatterMap[i][j]<<"]: "<<in[secondRingExpectedScatterMap[i][j]]
02141           <<"  to "<<i<<" in primary. But did not."<<std::endl; 
02142         assert(false);
02143       }
02144     }//end for j
02145     currOffset += m_uipSendCounts[toSendCnt];
02146     toSendCnt++;
02147   }else if( !(secondRingExpectedScatterMap[i].empty()) ) {
02148     std::cout<<m_iRankActive<<" should have sent some elements to "<<i<<" in primary."
02149       <<" But did not. toSendCnt: "<<toSendCnt<<" primarySz: "<<(m_uipSendProcs.size())<<std::endl; 
02150     std::cout<<m_iRankActive<<" For example, This element was not found in primary ScatterMap: "
02151       <<in[secondRingExpectedScatterMap[i][0]]<<std::endl;
02152     assert(false);
02153   }//end if ranks match
02154 }//end for i
02155 
02156 secondRingExpectedScatterMap.clear();
02157 checkSecondRing.clear();
02158 
02159 if(!m_iRankActive) {
02160   std::cout<<" Finished Checking Apriori send for Second Ring.."<<std::endl;
02161 }
02162 MPI_Barrier(m_mpiCommActive);
02163 #endif
02164 
02165 //~~~~~~~~~~~Sort In, Flag Corrections as FOREIGN, update Luts~~~~~~~~~~~~~~~~~
02166 
02167 // Now resort in, and correct nlist and scattermap.
02168 std::vector<seq::IndexHolder<ot::TreeNode> > inHolder(in.size());
02169 for (unsigned int i=0; i < in.size(); i++) {
02170   in[i].setWeight(i);
02171   inHolder[i].value = &in[i];
02172   if( (i >= nelem) || ( (i < iLoopEnd) &&
02173         (m_ucpLutMasks[(2*i) + 1] == ot::DA_FLAGS::FOREIGN) ) ) {
02174     //Mark old Foreign, new octants, postghosts and boundaries
02175     inHolder[i].index = 0;
02176   } else {
02177     inHolder[i].index = 1;
02178   }
02179 } // end i for resort set weights ...
02180 
02181 #ifdef __DEBUG_DA_NLIST__
02182 MPI_Barrier(m_mpiCommActive);
02183 if(!m_iRankActive) {
02184   std::cout<<std::endl;
02185   std::cout<<"created inHolder..."<<std::endl;
02186   std::cout<<std::endl;
02187 }
02188 MPI_Barrier(m_mpiCommActive);
02189 #endif
02190 
02191 // Now resort ...
02192 std::sort( inHolder.begin(), inHolder.end() );
02193 
02194 assert(m_uiElementBegin < in.size());
02195 TreeNode  myFirstOctant = in[m_uiElementBegin];
02196 assert( (m_uiPostGhostBegin - 1) < in.size() );
02197 TreeNode  myLastOctant = in[m_uiPostGhostBegin - 1];
02198 
02199 //Correct nelem...
02200 nelem = 0;
02201 if(m_uiElementSize) {
02202   while( (nelem < in.size()) &&
02203       ((*(inHolder[nelem].value)) <= in[m_uiElementEnd - 1]) ) {
02204     nelem++; 
02205   }
02206 } else {
02207   while( ( nelem < in.size() ) &&
02208       ( (*(inHolder[nelem].value)) < myFirstOctant ) &&
02209       ( !((inHolder[nelem].value)->getFlag() & ot::TreeNode::BOUNDARY) ) ) {
02210     nelem++; 
02211   }
02212 }
02213 
02214 #ifdef __DEBUG_DA_NLIST__
02215 MPI_Barrier(m_mpiCommActive);
02216 if(!m_iRankActive) {
02217   std::cout<<std::endl;
02218   std::cout<<"Corrected NELEM..."<<std::endl;
02219   std::cout<<std::endl;
02220 }
02221 MPI_Barrier(m_mpiCommActive);
02222 #endif
02223 
02224 // Wts of in gives us new->old mapping. However to correct
02225 // nlist we need old->new mapping. So we need to generate this.
02226 std::vector<unsigned int> oldToNew(in.size());
02227 
02228 for (unsigned int i = 0; i < in.size(); i++) {
02229   oldToNew[inHolder[i].value->getWeight()] = i;
02230 } // end i for remap indices
02231 
02232 #ifdef __DEBUG_DA_NLIST__
02233 MPI_Barrier(m_mpiCommActive);
02234 if(!m_iRankActive) {
02235   std::cout<<std::endl;
02236   std::cout<<"Computed Old2New..."<<std::endl;
02237   std::cout<<std::endl;
02238 }
02239 MPI_Barrier(m_mpiCommActive);
02240 #endif
02241 
02242 //Re-shuffle in.
02243 std::vector<ot::TreeNode> tmpIn(in.size());
02244 for(unsigned int i = 0;  i < in.size(); i++) {
02245   tmpIn[i] = *(inHolder[i].value);
02246   tmpIn[i].setWeight(inHolder[i].index);
02247 }//end for i
02248 
02249 in = tmpIn;
02250 tmpIn.clear();
02251 inHolder.clear();
02252 
02253 #ifdef __DEBUG_DA_NLIST__
02254 MPI_Barrier(m_mpiCommActive);
02255 if(!m_iRankActive) {
02256   std::cout<<std::endl;
02257   std::cout<<"Reshuffled In."<<std::endl;
02258   std::cout<<std::endl;
02259 }
02260 MPI_Barrier(m_mpiCommActive);
02261 #endif
02262 
02263 //Reset Counters...
02264 // PreGhosts ....
02265 m_uiPreGhostElementSize = 0;
02266 while( (m_uiPreGhostElementSize < in.size()) && (in[m_uiPreGhostElementSize] < myFirstOctant) ) {
02267   if ( !(in[m_uiPreGhostElementSize].getFlag() & ot::TreeNode::BOUNDARY) ) {
02268     m_uiPreGhostElementSize++;
02269   }else {
02270     break;
02271   }
02272 }//end while
02273 
02274 if(nelem != (m_uiPreGhostElementSize + m_uiElementSize) ) {
02275   std::cout<<"Processor "<<m_iRankAll<<" failing: nelem = "<<nelem
02276     <<" pgElemSz = "<<m_uiPreGhostElementSize<<" elemSz = "<<m_uiElementSize<<std::endl;
02277 }
02278 assert( nelem == (m_uiPreGhostElementSize + m_uiElementSize) );
02279 #ifdef __DEBUG_DA_NLIST__
02280 MPI_Barrier(m_mpiCommActive);
02281 if(!m_iRankActive) {
02282   std::cout<<std::endl;
02283   std::cout<<"PreGhost Counters Reset."<<std::endl;
02284   std::cout<<std::endl;
02285 }
02286 MPI_Barrier(m_mpiCommActive);
02287 #endif
02288 
02289 //Mine ...
02290 m_uiElementBegin = m_uiPreGhostElementSize;
02291 m_uiPostGhostBegin = m_uiPreGhostElementSize;
02292 while( (m_uiPostGhostBegin < in.size()) && (in[m_uiPostGhostBegin] <= myLastOctant) ) {
02293   if(in[m_uiPostGhostBegin] < myFirstOctant) {
02294     m_uiElementBegin++;
02295   }
02296   m_uiPostGhostBegin++;
02297 }//end while
02298 m_uiElementEnd = m_uiElementBegin + m_uiElementSize;
02299 
02300 #ifdef __DEBUG_DA_NLIST__
02301 MPI_Barrier(m_mpiCommActive);
02302 if(!m_iRankActive) {
02303   std::cout<<std::endl;
02304   std::cout<<"All Counters Reset."<<std::endl;
02305   std::cout<<std::endl;
02306 }
02307 MPI_Barrier(m_mpiCommActive);
02308 #endif
02309 
02310 std::vector<unsigned int> nlistNew(8*nelem);
02311 std::vector<unsigned char> hnMaskNew(nelem);
02312 
02313 #ifdef __DEBUG_DA_NLIST__
02314 MPI_Barrier(m_mpiCommActive);
02315 if(!m_iRankActive) {
02316   std::cout<<std::endl;
02317   std::cout<<"Allocated memory for the new nlist and masks."<<std::endl;
02318   std::cout<<std::endl;
02319 }
02320 MPI_Barrier(m_mpiCommActive);
02321 #endif
02322 
02323 // correct nLists ...
02324 // The loop is in the order of the old Indices ...
02325 //The first time only the nlist of preghosts are corrected.
02326 //Since, the nlist of own elements are not built in the first
02327 //outer-loop (numFullLoopCtr).
02328 //The second time the preghosts will be visited once again.
02329 //Skip correction of nlist for Foreigns 
02330 for (unsigned int i = 0; i < iLoopEnd; i++) {
02331 #ifdef __DEBUG_DA_NLIST__
02332   assert(i < oldToNew.size() );
02333   assert(oldToNew[i] < nelem);
02334   if( numFullLoopCtr == 0 ) {
02335     assert(oldToNew[i] < m_uiPreGhostElementSize);
02336   }
02337   assert( (2*i+1) < m_ucpLutMasks.size() );
02338 #endif
02339 
02340   hnMaskNew[oldToNew[i]] = m_ucpLutMasks[2*i + 1];
02341 
02342   if(m_ucpLutMasks[2*i+1] == ot::DA_FLAGS::FOREIGN) {
02343     continue;
02344   }
02345 
02346   unsigned int iiOld = 8*i;
02347   unsigned int iiNew = 8*oldToNew[i];
02348   for (unsigned int j=0; j<8; j++) {
02349     // remap ...
02350 #ifdef __DEBUG_DA_NLIST__
02351     assert( (iiNew + j) < nlistNew.size() );
02352     assert( (iiOld + j) < nlist.size()  );
02353     if( nlist[iiOld + j] >= oldToNew.size() ) {
02354       std::cout<<m_iRankActive<<": oldToNew.size(): "<<oldToNew.size()<<" nlist[8*"<<i<<"+"<<j<<"]: "<<nlist[iiOld+j]<<std::endl 
02355                                                        <<"oldToNew["<<i<<"] = "<<oldToNew[i]<<" elem: "<<in[oldToNew[i]]<<std::endl;                                        
02356       assert(false);
02357     }
02358 #endif
02359 
02360     nlistNew[iiNew + j] = oldToNew[ nlist[iiOld + j] ];
02361   }//end for j
02362 }//end for i
02363 
02364 #ifdef __DEBUG_DA_NLIST__
02365 MPI_Barrier(m_mpiCommActive);
02366 if(!m_iRankActive) {
02367   std::cout<<std::endl;
02368   std::cout<<"Corrected Nlist."<<std::endl;
02369   std::cout<<std::endl;
02370 }
02371 MPI_Barrier(m_mpiCommActive);
02372 #endif
02373 
02374 //Replace the old lists by the new lists and clear the new
02375 //lists...
02376 
02377 m_ucpLutMasks.resize(2*nelem);
02378 if(numFullLoopCtr) {
02379   for(unsigned int i = 0; i < nelem; i++) {
02380     if(in[i].getWeight() == 0) {
02381       m_ucpLutMasks[2*i + 1] = ot::DA_FLAGS::FOREIGN;
02382     }else {
02383       m_ucpLutMasks[2*i + 1] = hnMaskNew[i];
02384     }
02385   }//end for i
02386 }else {
02387   for(unsigned int i = 0; i < m_uiPreGhostElementSize; i++) {
02388     if(in[i].getWeight() == 0) {
02389       m_ucpLutMasks[2*i + 1] = ot::DA_FLAGS::FOREIGN;
02390     }else {
02391       m_ucpLutMasks[2*i + 1] = hnMaskNew[i];
02392     }
02393   }//end for i
02394 }
02395 hnMaskNew.clear();
02396 
02397 for(unsigned int i = 0; i < in.size(); i++) {
02398   in[i].setWeight(1);
02399 }
02400 
02401 nlist = nlistNew;
02402 nlistNew.clear();
02403 
02404 #ifdef __DEBUG_DA_NLIST__
02405 MPI_Barrier(m_mpiCommActive);
02406 if(!m_iRankActive) {
02407   std::cout<<std::endl;
02408   std::cout<<"Corrected HnMasks and Nlists."<<std::endl;
02409   std::cout<<std::endl;
02410 }
02411 MPI_Barrier(m_mpiCommActive);
02412 #endif
02413 
02414 // Merge scattermap and scnScatterMap
02415 std::vector<unsigned int>               tmpScatterMap;
02416 std::vector<unsigned int>               tmpSendProcs;
02417 std::vector<unsigned int>               tmpSendCounts;
02418 std::vector<unsigned int>               tmpRecvProcs;
02419 std::vector<unsigned int>               tmpRecvCounts;
02420 
02421 //Assumes primary and secondary lists to be sorted independently. the
02422 //result will be sorted.
02423 //There is a 1-1 mapping between counts and procs.
02424 unsigned int primaryCnt = 0;
02425 unsigned int secondaryCnt = 0;
02426 while( (primaryCnt < m_uipSendProcs.size()) && (secondaryCnt < ScndSendProcs.size()) ) {
02427   if( m_uipSendProcs[primaryCnt] < ScndSendProcs[secondaryCnt] ) {
02428     tmpSendProcs.push_back(m_uipSendProcs[primaryCnt]);
02429     tmpSendCounts.push_back(m_uipSendCounts[primaryCnt]);
02430     primaryCnt++;
02431   }else if( m_uipSendProcs[primaryCnt] > ScndSendProcs[secondaryCnt] ) {
02432     tmpSendProcs.push_back(ScndSendProcs[secondaryCnt]);
02433     tmpSendCounts.push_back(ScndSendCounts[secondaryCnt]);
02434     secondaryCnt++;
02435   }else {
02436     //if both are equal select p only from one. Arbitrarily, the default
02437     //is picked as primary
02438     tmpSendProcs.push_back(m_uipSendProcs[primaryCnt]);
02439     //Sum the counts from the primary and secondary...
02440     tmpSendCounts.push_back(m_uipSendCounts[primaryCnt] + ScndSendCounts[secondaryCnt]);
02441     primaryCnt++;
02442     //skip secondary
02443     secondaryCnt++;
02444   }
02445 }//end while
02446 
02447 //only primary remains
02448 while( primaryCnt < m_uipSendProcs.size() ) {
02449   tmpSendProcs.push_back(m_uipSendProcs[primaryCnt]);
02450   tmpSendCounts.push_back(m_uipSendCounts[primaryCnt]);
02451   primaryCnt++;
02452 }
02453 
02454 //only secondary remains
02455 while( secondaryCnt < ScndSendProcs.size() ) {
02456   tmpSendProcs.push_back(ScndSendProcs[secondaryCnt]);
02457   tmpSendCounts.push_back(ScndSendCounts[secondaryCnt]);
02458   secondaryCnt++;
02459 }
02460 
02461 #ifdef __DEBUG_DA_NLIST__
02462 MPI_Barrier(m_mpiCommActive);
02463 assert( seq::test::isUniqueAndSorted<unsigned int>(m_uipSendProcs) );
02464 assert( seq::test::isUniqueAndSorted<unsigned int>(ScndSendProcs) );
02465 assert( seq::test::isUniqueAndSorted<unsigned int>(tmpSendProcs) );
02466 for( unsigned int i = 0; i < m_uipSendProcs.size(); i++) {
02467   std::cout<<m_iRankActive<<" --> "<<m_uipSendProcs[i]<<" (P) "<<m_uipSendCounts[i]<<std::endl;
02468 }
02469 std::cout<<m_iRankActive<<" P-Scatter: "<<m_uipScatterMap.size()<<std::endl;
02470 assert( m_uipSendProcs.size() == m_uipSendCounts.size() );
02471 MPI_Barrier(m_mpiCommActive);
02472 for( unsigned int i = 0; i < ScndSendProcs.size(); i++) {
02473   std::cout<<m_iRankActive<<" --> "<<ScndSendProcs[i]<<" (S) "<<ScndSendCounts[i]<<std::endl;
02474 }
02475 std::cout<<m_iRankActive<<" S-Scatter: "<<ScndScatterMap.size()<<std::endl;
02476 assert( ScndSendProcs.size() == ScndSendCounts.size() );
02477 MPI_Barrier(m_mpiCommActive);
02478 for( unsigned int i = 0; i < tmpSendProcs.size(); i++) {
02479   std::cout<<m_iRankActive<<" --> "<<tmpSendProcs[i]<<" (T) "<<tmpSendCounts[i]<<std::endl;
02480 }
02481 assert( tmpSendProcs.size() == tmpSendCounts.size() );
02482 MPI_Barrier(m_mpiCommActive);
02483 if(!m_iRankActive) {
02484   std::cout<<std::endl;
02485   std::cout<<"Corrected SendCnts and SendProcs."<<std::endl;
02486   std::cout<<std::endl;
02487 }
02488 MPI_Barrier(m_mpiCommActive);
02489 #endif
02490 
02491 //Assumes primary and secondary Scattermaps to be sorted
02492 //independently for each processors portion. the
02493 //result will also be sorted in chunks.
02494 primaryCnt = 0;
02495 secondaryCnt = 0;
02496 unsigned int primarySz = 0;
02497 unsigned int secondarySz = 0;
02498 while( (primaryCnt < m_uipSendProcs.size()) && (secondaryCnt < ScndSendProcs.size()) ) {
02499   if( m_uipSendProcs[primaryCnt] < ScndSendProcs[secondaryCnt] ) {
02500     unsigned int numSent = 0;
02501     while( numSent < m_uipSendCounts[primaryCnt] ) {
02502       tmpScatterMap.push_back(oldToNew[m_uipScatterMap[primarySz++]]);
02503       numSent++;
02504     }
02505     primaryCnt++;
02506   }else if( m_uipSendProcs[primaryCnt] > ScndSendProcs[secondaryCnt] ) {
02507     unsigned int numSent = 0;
02508     while( numSent < ScndSendCounts[secondaryCnt] ) {
02509       tmpScatterMap.push_back(oldToNew[ScndScatterMap[secondarySz++]]);
02510       numSent++;
02511     }
02512     secondaryCnt++;
02513   }else {
02514     //Both primary and secondary are sending to the same processor so
02515     //merge.
02516     unsigned int nP = 0;
02517     unsigned int nS = 0;
02518 
02519     //Both are not over.
02520     while( ( nP < m_uipSendCounts[primaryCnt] ) && ( nS < ScndSendCounts[secondaryCnt] ) ) {
02521 #ifdef __DEBUG_DA_NLIST__
02522       //The same octant can not be sent to the same processor in both
02523       //the primary and secondary lists.
02524       if( m_uipScatterMap[primarySz] == ScndScatterMap[secondarySz] ) {
02525         std::cout<<m_iRankActive<<" is sending "<<
02526           in[oldToNew[m_uipScatterMap[primarySz]]]<<" to "
02527           <<m_uipSendProcs[primaryCnt]<<" in both primary and secondary."<<std::endl;
02528         assert(false);
02529       }
02530 #endif
02531       //Both primary and secondary can only send own elements, the
02532       //relative ordering of the own elements in the old and new
02533       //ordering should be the same.
02534       if( m_uipScatterMap[primarySz] < ScndScatterMap[secondarySz] ) {
02535 #ifdef __DEBUG_DA_NLIST__
02536         assert( oldToNew[m_uipScatterMap[primarySz]] < oldToNew[ScndScatterMap[secondarySz]] );
02537 #endif
02538         tmpScatterMap.push_back(oldToNew[m_uipScatterMap[primarySz++]]);
02539         nP++;
02540       }else {
02541 #ifdef __DEBUG_DA_NLIST__
02542         assert( oldToNew[m_uipScatterMap[primarySz]] > oldToNew[ScndScatterMap[secondarySz]] );
02543 #endif
02544         tmpScatterMap.push_back(oldToNew[ScndScatterMap[secondarySz++]]);
02545         nS++;
02546       }
02547     }
02548 
02549     //Only primary remains
02550     while( nP < m_uipSendCounts[primaryCnt] ) {
02551       tmpScatterMap.push_back(oldToNew[m_uipScatterMap[primarySz++]]);
02552       nP++;
02553     }
02554 
02555     //only secondary remains
02556     while( nS < ScndSendCounts[secondaryCnt] ) {
02557       tmpScatterMap.push_back(oldToNew[ScndScatterMap[secondarySz++]]);
02558       nS++;
02559     }
02560 
02561     primaryCnt++;
02562     secondaryCnt++;
02563   }//end if-else
02564 }//end while
02565 
02566 #ifdef __DEBUG_DA_NLIST__
02567 //ScatterMaps and Procs must finish together.
02568 if( primarySz < m_uipScatterMap.size() ) {
02569   assert(primaryCnt < m_uipSendProcs.size());
02570 }else {
02571   assert(primaryCnt == m_uipSendProcs.size());
02572 }
02573 
02574 if( secondarySz < ScndScatterMap.size() ) {
02575   assert(secondaryCnt < ScndSendProcs.size());
02576 }else {
02577   assert(secondaryCnt == ScndSendProcs.size());
02578 }
02579 #endif
02580 
02581 //Only primary remains.
02582 while( primarySz < m_uipScatterMap.size() ) {
02583   tmpScatterMap.push_back(oldToNew[m_uipScatterMap[primarySz++]]);
02584 }
02585 
02586 //Only secondary remains.
02587 while( secondarySz < ScndScatterMap.size() ) {
02588   tmpScatterMap.push_back(oldToNew[ScndScatterMap[secondarySz++]]);
02589 }
02590 
02591 #ifdef __DEBUG_DA_NLIST__
02592 MPI_Barrier(m_mpiCommActive);
02593 if(!m_iRankActive) {
02594   std::cout<<std::endl;
02595   std::cout<<"Merged Primary and Secondary Scatter Maps."<<std::endl;
02596   std::cout<<std::endl;
02597 }
02598 
02599 std::cout<<m_iRankActive<<" pScatterSz: "<<m_uipScatterMap.size()<<" sScatterSz: "
02600 <<ScndScatterMap.size()<<" tScatterSz: "<<tmpScatterMap.size()<<std::endl;
02601 
02602 unsigned int debug_pCnt = 0;
02603 unsigned int debug_sCnt = 0;
02604 unsigned int debug_tCnt = 0;
02605 unsigned int debug_pOff = 0;
02606 unsigned int debug_sOff = 0;
02607 unsigned int debug_tOff = 0;
02608 for(unsigned int i = 0; i< m_iNpesActive; i++) {
02609   std::vector<unsigned int>  pSctList;
02610   std::vector<unsigned int>  sSctList;
02611   std::vector<unsigned int>  tSctList;
02612   if( (debug_pCnt < m_uipSendProcs.size()) && (m_uipSendProcs[debug_pCnt] == i) ) {
02613     for(unsigned int j = 0; j < m_uipSendCounts[debug_pCnt]; j++) {
02614       pSctList.push_back(m_uipScatterMap[debug_pOff + j]);
02615     }
02616     debug_pOff += m_uipSendCounts[debug_pCnt];
02617     debug_pCnt++;
02618   }
02619   if( (debug_sCnt < ScndSendProcs.size()) && (ScndSendProcs[debug_sCnt] == i) ) {
02620     for(unsigned int j = 0; j < ScndSendCounts[debug_sCnt]; j++) {
02621       sSctList.push_back(ScndScatterMap[debug_sOff + j]);
02622     }
02623     debug_sOff += ScndSendCounts[debug_sCnt];
02624     debug_sCnt++;
02625   }
02626   if( (debug_tCnt < tmpSendProcs.size()) && (tmpSendProcs[debug_tCnt] == i) ) {
02627     for(unsigned int j = 0; j < tmpSendCounts[debug_tCnt]; j++) {
02628       tSctList.push_back(tmpScatterMap[debug_tOff + j]);
02629     }
02630     debug_tOff += tmpSendCounts[debug_tCnt];
02631     debug_tCnt++;
02632   }
02633 
02634   assert( tSctList.size() == (pSctList.size() + sSctList.size()) );
02635 
02636   assert( seq::test::isUniqueAndSorted<unsigned int>(pSctList) );
02637   assert( seq::test::isUniqueAndSorted<unsigned int>(sSctList) );
02638 
02639   //Explicitly merge p and s into tt.
02640   std::vector<unsigned int> ttList;
02641   for(unsigned int j=0; j < sSctList.size(); j++) {
02642     ttList.push_back(oldToNew[sSctList[j]]);
02643   }
02644   for(unsigned int j=0; j < pSctList.size(); j++) {
02645     ttList.push_back(oldToNew[pSctList[j]]);
02646   }
02647 
02648   sort(ttList.begin(),ttList.end());
02649   assert( seq::test::isUniqueAndSorted<unsigned int>(ttList) );
02650 
02651   // was assert(ttList == tSctList);
02652   assert(ttList.size() == tSctList.size());
02653   for (unsigned int j = 0; j < ttList.size(); j++) {
02654     if (ttList[j] != tSctList[j]) {
02655       std::cout << m_iRankActive << ": MergeFailed at " << j << std::endl;
02656       std::cout << ttList[j] << " != "  << tSctList[j] << std::endl;
02657       for (unsigned int k = 0; k < pSctList.size(); k++) {
02658         std::cout << ttList[k] << ", " << tSctList[k]
02659           << ", " << pSctList[k] << std::endl;
02660       }
02661       for (unsigned int k = 0; k < sSctList.size(); k++) {
02662         std::cout << sSctList[k] << std::endl;
02663       }
02664       assert(false);
02665     }
02666   }//end for j
02667 
02668   ttList.clear();
02669   pSctList.clear();
02670   sSctList.clear();
02671   tSctList.clear();
02672 }//end for i
02673 
02674 assert( debug_pCnt == m_uipSendProcs.size() );
02675 assert( debug_sCnt == ScndSendProcs.size() );
02676 assert( debug_tCnt == tmpSendProcs.size() );
02677 
02678 assert( tmpScatterMap.size() == (m_uipScatterMap.size() + ScndScatterMap.size()) );
02679 MPI_Barrier(m_mpiCommActive);
02680 #endif
02681 
02682 oldToNew.clear();
02683 
02684 primaryCnt = 0;
02685 secondaryCnt = 0;
02686 while( (primaryCnt < m_uipRecvProcs.size()) && (secondaryCnt < ScndRecvProcs.size()) ) {
02687   if( m_uipRecvProcs[primaryCnt] < ScndRecvProcs[secondaryCnt] ) {
02688     tmpRecvProcs.push_back(m_uipRecvProcs[primaryCnt]);
02689     tmpRecvCounts.push_back(m_uipRecvCounts[primaryCnt]);
02690     primaryCnt++;
02691   }else if( m_uipRecvProcs[primaryCnt] > ScndRecvProcs[secondaryCnt] ) {
02692     tmpRecvProcs.push_back(ScndRecvProcs[secondaryCnt]);
02693     tmpRecvCounts.push_back(ScndRecvCounts[secondaryCnt]);
02694     secondaryCnt++;
02695   }else {
02696     //if both are equal select only from primary
02697     tmpRecvProcs.push_back(m_uipRecvProcs[primaryCnt]);
02698     tmpRecvCounts.push_back(m_uipRecvCounts[primaryCnt] + ScndRecvCounts[secondaryCnt]);
02699     primaryCnt++;
02700     //skip secondary
02701     secondaryCnt++;
02702   }
02703 }
02704 
02705 while( primaryCnt < m_uipRecvProcs.size() ) {
02706   tmpRecvProcs.push_back(m_uipRecvProcs[primaryCnt]);
02707   tmpRecvCounts.push_back(m_uipRecvCounts[primaryCnt]);
02708   primaryCnt++;
02709 }
02710 
02711 while( secondaryCnt < ScndRecvProcs.size() ) {
02712   tmpRecvProcs.push_back(ScndRecvProcs[secondaryCnt]);
02713   tmpRecvCounts.push_back(ScndRecvCounts[secondaryCnt]);
02714   secondaryCnt++;
02715 }
02716 
02717 #ifdef __DEBUG_DA_NLIST__
02718 MPI_Barrier(m_mpiCommActive);
02719 assert( seq::test::isUniqueAndSorted<unsigned int>(m_uipRecvProcs) );
02720 assert( seq::test::isUniqueAndSorted<unsigned int>(ScndRecvProcs) );
02721 assert( seq::test::isUniqueAndSorted<unsigned int>(tmpRecvProcs) );
02722 for( unsigned int i = 0; i < m_uipRecvProcs.size(); i++) {
02723   std::cout<<m_iRankActive<<" <-- "<<m_uipRecvProcs[i]<<" (P) "<<m_uipRecvCounts[i]<<std::endl;
02724 }
02725 assert( m_uipRecvProcs.size() == m_uipRecvCounts.size() );
02726 MPI_Barrier(m_mpiCommActive);
02727 for( unsigned int i = 0; i < ScndRecvProcs.size(); i++) {
02728   std::cout<<m_iRankActive<<" <-- "<<ScndRecvProcs[i]<<" (S) "<<ScndRecvCounts[i]<<std::endl;
02729 }
02730 assert( ScndRecvProcs.size() == ScndRecvCounts.size() );
02731 MPI_Barrier(m_mpiCommActive);
02732 for( unsigned int i = 0; i < tmpRecvProcs.size(); i++) {
02733   std::cout<<m_iRankActive<<" <-- "<<tmpRecvProcs[i]<<" (T) "<<tmpRecvCounts[i]<<std::endl;
02734 }
02735 assert( tmpRecvProcs.size() == tmpRecvCounts.size() );
02736 MPI_Barrier(m_mpiCommActive);
02737 #endif
02738 
02739 ScndScatterMap.clear();
02740 ScndSendProcs.clear();
02741 ScndSendCounts.clear();
02742 ScndRecvProcs.clear();
02743 ScndRecvCounts.clear();
02744 
02745 m_uipScatterMap  =  tmpScatterMap;
02746 m_uipSendProcs   =  tmpSendProcs;
02747 m_uipSendCounts  =  tmpSendCounts;
02748 m_uipRecvProcs   =  tmpRecvProcs;
02749 m_uipRecvCounts  =  tmpRecvCounts;
02750 
02751 tmpScatterMap.clear();
02752 tmpSendProcs.clear();
02753 tmpSendCounts.clear();
02754 tmpRecvProcs.clear();
02755 tmpRecvCounts.clear();
02756 
02757 #ifdef __DEBUG_DA_NLIST__
02758 MPI_Barrier(m_mpiCommActive);
02759 if(!m_iRankActive) {
02760   std::cout<<std::endl;
02761   std::cout<<"Corrected RecvCounts and RecvProcs."<<std::endl;
02762   std::cout<<std::endl;
02763 }
02764 MPI_Barrier(m_mpiCommActive);
02765 #endif
02766 
02767 }//end for numFullLoopCtr
02768 
02769 // Compute and store the new offsets ...
02770 m_uipSendOffsets.resize(m_uipSendCounts.size());
02771 m_uipRecvOffsets.resize(m_uipRecvCounts.size());
02772 
02773 if ( m_uipSendCounts.size() ) {
02774   m_uipSendOffsets[0] = 0;
02775   for (unsigned int i=1; i < m_uipSendCounts.size(); i++) {
02776     m_uipSendOffsets[i] = (m_uipSendCounts[i-1] + m_uipSendOffsets[i-1]);
02777   }
02778 }
02779 
02780 if ( m_uipRecvCounts.size() ) {
02781   bool adjustedAlready = false;
02782   if(m_uipRecvProcs[0] < static_cast<unsigned int>(m_iRankActive)) {
02783     m_uipRecvOffsets[0] = 0;
02784   }else {
02785     m_uipRecvOffsets[0] = m_uiPostGhostBegin;
02786     adjustedAlready = true;
02787   }
02788   for (unsigned int i=1; i < m_uipRecvCounts.size(); i++) {
02789     if( (m_uipRecvProcs[i] < m_iRankActive) || adjustedAlready ) {
02790       m_uipRecvOffsets[i] = (m_uipRecvCounts[i-1] + m_uipRecvOffsets[i-1]);
02791     }else {
02792       m_uipRecvOffsets[i] = m_uiPostGhostBegin;
02793       adjustedAlready = true;
02794     }
02795   }//end for i
02796 }
02797 
02798 //Store a copy of the original scattermap first. This will be required for
02799 //communicating ghost elements. Since there are no post-ghost element, we only
02800 //need to send to processors with ranks greater than my rank.
02801 m_uipElemScatterMap.clear();
02802 m_uipElemSendOffsets.clear();
02803 m_uipElemSendProcs.clear();
02804 m_uipElemSendCounts.clear();
02805 for(unsigned int i = 0; i < m_uipSendProcs.size(); i++) {
02806   if(m_uipSendProcs[i] > static_cast<unsigned int>(m_iRankActive)) {
02807     unsigned int numElemProcs = static_cast<unsigned int>(m_uipElemSendOffsets.size());
02808     m_uipElemSendOffsets.push_back(m_uipElemScatterMap.size());
02809     for(unsigned int j = m_uipSendOffsets[i]; j < (m_uipSendCounts[i] + m_uipSendOffsets[i]); j++) {
02810       if(m_uipScatterMap[j] < m_uiElementEnd) {
02811         m_uipElemScatterMap.push_back(m_uipScatterMap[j]);
02812       }else {
02813         break;
02814       }
02815     }//end for j    
02816     unsigned int currCount = 
02817       (static_cast<unsigned int>(m_uipElemScatterMap.size())
02818        - m_uipElemSendOffsets[numElemProcs]);
02819     if(currCount) {
02820       m_uipElemSendProcs.push_back(m_uipSendProcs[i]);
02821       m_uipElemSendCounts.push_back(currCount);
02822     }else {
02823       m_uipElemSendOffsets.resize(numElemProcs);
02824     }
02825   }
02826 }//end for i
02827 
02828 m_uipElemRecvOffsets.clear();
02829 m_uipElemRecvProcs.clear();
02830 m_uipElemRecvCounts.clear();
02831 for(unsigned int i = 0; i < m_uipRecvProcs.size(); i++) {
02832   if(m_uipRecvOffsets[i] < m_uiPreGhostElementSize) {
02833     //Planning to recieve at least 1 pre-ghost element from this processor
02834     m_uipElemRecvProcs.push_back(m_uipRecvProcs[i]);
02835     m_uipElemRecvOffsets.push_back(m_uipRecvOffsets[i]);
02836     if( (m_uipRecvOffsets[i] + m_uipRecvCounts[i]) <= m_uiPreGhostElementSize) {
02837       m_uipElemRecvCounts.push_back(m_uipRecvCounts[i]);
02838     } else {
02839       m_uipElemRecvCounts.push_back(m_uiPreGhostElementSize - m_uipRecvOffsets[i]);
02840     }
02841   }
02842 }//end for i
02843 
02844 // Correct the scatter-map so that hanging nodes get correct info ...
02845 //  Logic is to find if any entry in the scatter map is hanging, and if
02846 //  so, we simply correct the scatter map so that it points to the
02847 //  correct anchor instead.
02848 
02849 for ( unsigned int i = 0; i < m_uipScatterMap.size(); i++) {
02850   unsigned int idx = m_uipScatterMap[i];
02851 #ifdef __DEBUG_DA_NLIST__
02852   assert( (idx >= m_uiElementBegin) && (idx < m_uiPostGhostBegin) );
02853 #endif
02854   // if idx is an elem ... use nlist ...
02855   if ( idx < nelem) {
02856     if (m_ucpLutMasks[2*idx + 1] & 1) {
02857 #ifdef __DEBUG_DA_NLIST__
02858       assert( !(in[idx].getFlag() & ot::TreeNode::NODE) );
02859 #endif
02860       // anchor is hanging, so let us correct this ...
02861       m_uipScatterMap[i] = nlist[8*idx];
02862     }
02863   } else {
02864     // if idx points to a boundary node,
02865     // get the parent of in[idx] ...
02866     if ( !(in[idx].getFlag() & ot::TreeNode::NODE)) {
02867       ot::TreeNode tn = in[idx].getParent();
02868       while ( in[idx] > tn) {
02869         idx--;
02870       }
02871       //At the end of the above while loop, in[idx] will point to 1 element
02872       //before the first child of tn.
02873       idx++;
02874       m_uipScatterMap[i] = idx;
02875 #ifdef __DEBUG_DA_NLIST__
02876       assert( in[m_uipScatterMap[i]].getAnchor() == tn.getAnchor() );
02877       assert( (m_uipScatterMap[i] >= m_uiElementBegin) && (m_uipScatterMap[i] < m_uiPostGhostBegin) );
02878       assert( in[m_uipScatterMap[i]].getFlag() & ot::TreeNode::NODE );
02879 #endif
02880     }
02881   }
02882 }//end for i
02883 
02884 #ifdef __DEBUG_DA_NLIST__
02885 MPI_Barrier(m_mpiCommActive);
02886 if(!m_iRankActive) {
02887   std::cout<<std::endl;
02888   std::cout<<"Corrected New Scatter Map for hanging anchors."<<std::endl;
02889   std::cout<<std::endl;
02890 }
02891 MPI_Barrier(m_mpiCommActive);
02892 #endif
02893 
02894 //Correct nlist of all FOREIGNs....
02895 for(unsigned int i=0;i<nelem;i++) {
02896   if(m_ucpLutMasks[2*i+1] == ot::DA_FLAGS::FOREIGN) {
02897     for(unsigned int j=0;j<8;j++) {
02898       nlist[8*i + j] = i;
02899     }
02900   }
02901 }//end for i
02902 
02903 //~~~~~~~~~~~~~~~~~~~~Mark NODES~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
02904 bool *isNode = NULL;
02905 if(m_uiLocalBufferSize) {
02906   isNode = new bool[m_uiLocalBufferSize];
02907 }
02908 for (unsigned int i = 0; i < m_uiLocalBufferSize; i++) {
02909   isNode[i] = false;
02910 }
02911 // loop through the LUT, and tag everybody as being nodes or not ...
02912 for ( unsigned int i = 0; i < nelem; i++) {
02913   if(m_ucpLutMasks[2*i+1] == ot::DA_FLAGS::FOREIGN) {
02914     continue;
02915   }
02916   for (unsigned int j = 0; j < 8; j++) {
02917 #ifdef __DEBUG_DA_NLIST__
02918     assert(nlist[8*i + j] < m_uiLocalBufferSize);
02919 #endif
02920     isNode[nlist[8*i + j]] = true;
02921   }
02922 }
02923 
02924 for (unsigned int i = 0; i < m_uiLocalBufferSize; i++) {
02925   if ( isNode[i] ) {
02926 #ifdef __DEBUG_DA_NLIST__
02927     if ( (i >= m_uiElementBegin) && (i < m_uiPostGhostBegin) ) {
02928       assert( (in[i].getFlag() & ot::TreeNode::NODE) );
02929     }
02930 #endif
02931     in[i].orFlag( ot::TreeNode::NODE );
02932   }
02933 }//end for i
02934 
02935 #ifdef __DEBUG_DA_NLIST__
02936 MPI_Barrier(m_mpiCommActive);
02937 if(!m_iRankActive) {
02938   std::cout<<std::endl;
02939   std::cout<<"Finished is NODE..."<<std::endl;
02940   std::cout<<std::endl;
02941 }
02942 MPI_Barrier(m_mpiCommActive);
02943 #endif
02944 
02945 // Now compute the elem node size, and boundary node sizes ...
02946 unsigned int elemNodeSz = 0;
02947 unsigned int bndNodeSz = 0;
02948 unsigned int preGhostNodeSz = 0;
02949 unsigned int preBndNodeSz = 0;
02950 unsigned int postGhostNodeSz = 0;
02951 unsigned int postBndNodeSz = 0;
02952 
02953 for (unsigned int i = 0; i < m_uiElementBegin; i++) {
02954   if ( isNode[i]  && (!(in[i].getFlag() & ot::TreeNode::BOUNDARY)) ) {
02955     preGhostNodeSz++;                    
02956   } else if (isNode[i]) {
02957     preBndNodeSz++;
02958   }
02959 }
02960 
02961 //My Elements which are also nodes.
02962 for (unsigned int i = m_uiElementBegin; i < m_uiElementEnd; i++) {
02963   if (isNode[i]) {
02964     elemNodeSz++;        
02965   }
02966 }
02967 
02968 for (unsigned int i = m_uiElementEnd; i < m_uiPostGhostBegin; i++) {
02969   if (isNode[i]) {
02970     bndNodeSz++;          
02971   }
02972 }
02973 
02974 for (unsigned int i = m_uiPostGhostBegin; i < m_uiLocalBufferSize; i++) {
02975   if (isNode[i] && (!(in[i].getFlag() & ot::TreeNode::BOUNDARY)) ) {
02976     postGhostNodeSz++;
02977   } else if (isNode[i]) {
02978     postBndNodeSz++;
02979   }
02980 }
02981 
02982 #ifdef __DEBUG_DA_NLIST__
02983 MPI_Barrier(m_mpiCommActive);
02984 if(!m_iRankActive) {
02985   std::cout<<std::endl;
02986   std::cout<<"Finished setting sizes..."<<std::endl;
02987   std::cout<<std::endl;
02988 }
02989 MPI_Barrier(m_mpiCommActive);
02990 #endif
02991 
02992 if(isNode) {
02993   delete [] isNode;
02994   isNode = NULL;
02995 }
02996 
02997 //Store the sizes.
02998 m_uiPreGhostNodeSize = preGhostNodeSz;
02999 m_uiPreGhostBoundaryNodeSize = preBndNodeSz;
03000 m_uiPostGhostNodeSize = postGhostNodeSz + postBndNodeSz;
03001 m_uiNodeSize = elemNodeSz;
03002 m_uiBoundaryNodeSize = bndNodeSz;
03003 
03004 #ifdef __DEBUG_DA_NLIST__
03005 MPI_Barrier(m_mpiCommActive);
03006 std::cout<<m_iRankActive<<" Just Before Compression...."<<std::endl<<std::endl;
03007 std::cout<<m_iRankActive<<" "<<m_uiPreGhostNodeSize<<" "<<m_uiPreGhostBoundaryNodeSize
03008 <<" "<<m_uiPostGhostNodeSize<<" "<<m_uiNodeSize<<" "
03009 <<m_uiBoundaryNodeSize<<std::endl<<std::endl;
03010 MPI_Barrier(m_mpiCommActive);
03011 #endif
03012 
03013 // All done ... Now COMPRESS
03014 // Now compress the node list ...
03015 
03016 // allocate a small buffer to unsort ...
03017 if(m_bCompressLut) {
03018   m_ucpLutRemainders.resize(8*nelem);
03019   m_ucpSortOrders.resize(nelem);
03020 }
03021 
03022 bool foundBeg = false;
03023 bool foundEnd = false;
03024 
03025 m_uiIndependentElementSize = 0;
03026 for (unsigned int i = 0; i < nelem; i++) {
03027   // get basic info ...
03028   unsigned int ii = 8*i;
03029 
03030   unsigned int x = in[i].getX();
03031   unsigned int y = in[i].getY();
03032   unsigned int z = in[i].getZ();
03033 
03034   if(m_bCompressLut) {
03035     unsigned int d   = in[i].getLevel();
03036     unsigned int sz = 1u << (m_uiMaxDepth - d);
03037     //compute and store the sort order for the non-hanging case.
03038     if(!(m_ucpLutMasks[2*i+1])){
03039       unsigned int xp = x + sz;
03040       unsigned int yp = y + sz;
03041       unsigned int zp = z + sz;
03042 
03043       unsigned int _x = x^xp;
03044       unsigned int _y = y^yp;
03045       unsigned int _z = z^zp;
03046 
03047       if (_x > _y) {
03048         if ( _y > _z) {
03049           m_ucpSortOrders[i] = ot::DA_FLAGS::ZYX;
03050         } else if ( _x > _z ) {
03051           m_ucpSortOrders[i] = ot::DA_FLAGS::YZX;
03052         } else {
03053           m_ucpSortOrders[i] = ot::DA_FLAGS::YXZ;
03054         }
03055       } else {
03056         if ( _x > _z) {
03057           m_ucpSortOrders[i] = ot::DA_FLAGS::ZXY;
03058         } else if ( _y > _z ) {
03059           m_ucpSortOrders[i] = ot::DA_FLAGS::XZY;
03060         } else {
03061           m_ucpSortOrders[i] = ot::DA_FLAGS::XYZ;
03062         }
03063       }
03064     }else {
03065       //store the childnumber instead for the hanging cases.
03066       unsigned int len_par = (unsigned int)(1u << ( m_uiMaxDepth  - d +1 ) );
03067 
03068       unsigned int a = x % len_par;
03069       unsigned int b = y % len_par;
03070       unsigned int c = z % len_par;
03071 
03072       a /= sz;
03073       b /= sz;
03074       c /= sz;
03075 
03076       m_ucpSortOrders[i]  = (4*c + 2*b + a);
03077     }//end sortOrderRegular block
03078   }//end if Lut compressed
03079 
03080   // use this loop to also detect the begining and end of dependent.
03081   if ( !foundBeg && !(in[i].getFlag() & ot::DA_FLAGS::DEP_ELEM)
03082       && (m_ucpLutMasks[2*i+1] != ot::DA_FLAGS::FOREIGN) ) {
03083     //std::cout << GRN"FOUND BEGINING OF INDEPENDENT "NRM << i << std::endl;
03084     foundBeg = true;
03085     m_uiIndependentElementBegin = i;
03086     m_ptIndependentOffset = Point(x,y,z);
03087   }
03088 
03089   // reverse loop to find end ...
03090   if ( !foundEnd && !(in[nelem-i-1].getFlag() & ot::DA_FLAGS::DEP_ELEM)
03091       && (m_ucpLutMasks[2*(nelem-i-1)+1] != ot::DA_FLAGS::FOREIGN) ) {
03092     //std::cout << GRN"FOUND END OF INDEPENDENT "NRM << nelem - i << std::endl;
03093     foundEnd = true;
03094     m_uiIndependentElementEnd = nelem - i;
03095   }
03096 
03097   //Actual number of Independent elements. In between IndependentElementBegin
03098   //and IndependentElementEnd, we can also have dependent elements and so
03099   //simply taking the difference of end and begin will not work
03100   if( (!(in[i].getFlag() & ot::DA_FLAGS::DEP_ELEM)) &&
03101       (m_ucpLutMasks[2*i+1] != ot::DA_FLAGS::FOREIGN) ) {
03102     m_uiIndependentElementSize++;
03103   }
03104 
03105   if ( i == m_uiElementBegin ) {
03106     m_uiElementQuotient = static_cast<unsigned int>(m_uspLutQuotients.size());
03107   }
03108 
03109   if ( i == m_uiIndependentElementBegin ) {
03110     m_uiIndependentElementQuotient = static_cast<unsigned int>(m_uspLutQuotients.size());
03111   }
03112 
03113   //First initialize Masks...
03114   m_ucpLutMasks[2*i] = 0;
03115 
03116   // locally sort ...
03117   if(m_bCompressLut) {
03118     // Perform Golomb-Rice encoding
03119     //Assumes that the highest 8 bits in offset are not significant, i.e. they will be all 0.
03120     //This means offset can not be more than (2^24-1) = 16,777,215.
03121     //Since the number of octants on a processor will not be more than 16M,
03122     //this does not pose any kind of difficulty.
03123 
03124     unsigned short q;
03125     unsigned int offset;
03126     std::vector<unsigned int> nl(8);
03127     for (unsigned int ij = 0; ij < 8; ij++) {
03128       nl[ij] = nlist[ii+ij];
03129     }
03130     std::sort(nl.begin(), nl.end());
03131 
03132     // 0 is special, it computes the offset wrt i.
03133     // 0, we have a negative offset .. or 0
03134     offset = i - nl[0];
03135     q = (offset >> 8);
03136     m_ucpLutRemainders[8*i] = (offset%256);
03137     if (q) {
03138       m_ucpLutMasks[2*i] |= 1;
03139       m_uspLutQuotients.push_back(q);
03140     }
03141 
03142     for (unsigned int j = 1; j < 8; j++) {
03143       offset =  nl[j] - nl[j-1];
03144       q = (offset >> 8);
03145       m_ucpLutRemainders[8*i + j] = (offset%256);
03146       if (q) {
03147         m_ucpLutMasks[2*i] |= (1 << j);
03148         m_uspLutQuotients.push_back(q);
03149       }
03150     } // for j ...
03151     nl.clear();
03152   }//end if compress
03153 
03154 } // for i
03155 
03156 //Store Nlist if you are not compressing...
03157 if(!m_bCompressLut) {
03158   m_uiNlist = nlist; 
03159 }
03160 
03161 // free up
03162 nlist.clear();
03163 
03164 #ifdef __DEBUG_DA_NLIST__
03165 MPI_Barrier(m_mpiCommActive);
03166 std::cout<<m_iRankActive<<" Just After Compression...."<<std::endl<<std::endl;
03167 std::cout<<m_iRankActive<<" "<<m_uiPreGhostNodeSize<<" "
03168 <<m_uiPreGhostBoundaryNodeSize<<" "<<m_uiPostGhostNodeSize
03169 <<" "<<m_uiNodeSize<<" "<<m_uiBoundaryNodeSize<<std::endl<<std::endl;
03170 MPI_Barrier(m_mpiCommActive);
03171 std::cout << m_iRankActive << ": Leaving " << __func__ << std::endl;
03172 MPI_Barrier(m_mpiCommActive);
03173 #endif
03174 
03175 PROF_BUILD_NLIST_END
03176 
03177 }//end 4-way BuildNode List
03178 }//end namespace ot
03179 
03180 

Generated on Tue Mar 24 16:14:06 2009 for DENDRO by  doxygen 1.3.9.1