DENDRO: /net/www/csela/dendro/releases/Dendro/src/oda/odaPartition.C Source File

00001 
00009 #include "oda.h"
00010 #include "parUtils.h"
00011 #include "seqUtils.h"
00012 #include "testUtils.h"
00013 
00014 #ifdef __DEBUG__
00015 #ifndef __DEBUG_DA__
00016 #define __DEBUG_DA__
00017 #endif
00018 #endif
00019 
00020 namespace ot {
00021 
00022   //If block Part equals Morton part, then we only correct singular blocks with
00023   //hanging anchors. Unlike in the general Block Part, the blocks and the
00024   //nodes are already aligned in Morton Part
00025 #ifdef __BLOCK_PART_EQUALS_MORTON_PART__
00026 
00027   int DA_blockPartStage2(std::vector<TreeNode> &nodes, std::vector<TreeNode> &globalCoarse,
00028       unsigned int dim, unsigned int maxDepth, MPI_Comm commActive) {
00029 #ifdef __PROF_WITH_BARRIER__
00030     MPI_Barrier(commActive);
00031 #endif
00032     PROF_BLKPART2_BEGIN
00033 
00034       assert(!globalCoarse.empty());
00035     assert(!nodes.empty());
00036     assert(globalCoarse[0] == nodes[0]);
00037     assert(globalCoarse[globalCoarse.size() - 1] == nodes[nodes.size() - 1]);
00038 
00039     int npesActive, rankActive;
00040 
00041     MPI_Comm_rank(commActive, &rankActive);
00042     MPI_Comm_size(commActive, &npesActive);
00043 
00044     //Reset weights
00045     std::vector<bool> isSingular(globalCoarse.size());
00046     unsigned int nodeCnt = 0;
00047     for (int i = 0; i < globalCoarse.size(); i++) {
00048       globalCoarse[i].setWeight(1);
00049       isSingular[i] = false;
00050       while(nodes[nodeCnt] < globalCoarse[i]) {
00051         nodeCnt++;
00052       }
00053       if( (nodes[nodeCnt] == globalCoarse[i]) && 
00054           (!(nodes[nodeCnt].getFlag() & ot::TreeNode::NODE))  ) {
00055         isSingular[i] = true;
00056       }
00057     }//end for i 
00058 
00059     if(npesActive > 1) {
00060       //For DA only.....
00061       //Pick singular blocks on this processor...
00062       std::vector<ot::TreeNode> singularBlocks;
00063       for(unsigned int i = 0; i < globalCoarse.size(); i++) {
00064         if(isSingular[i]) {
00065           singularBlocks.push_back(globalCoarse[i]);
00066         }
00067       }//end for i
00068 
00069       //Gather all singular blocks on all processors.
00070       std::vector<int> numSingular(npesActive);
00071       std::vector<int> singularDisps(npesActive);
00072 
00073       int singularSz = singularBlocks.size();
00074 
00075       par::Mpi_Allgather<int>(&singularSz, &(*numSingular.begin()), 1, commActive);
00076 
00077       unsigned int totSingular = 0;
00078       for(int i = 0; i < npesActive; i++) {
00079         totSingular += numSingular[i];
00080       }
00081 
00082       std::vector<TreeNode> allSingular(totSingular);
00083 
00084       singularDisps[0] = 0;
00085       for (unsigned int i=1; i < npesActive; i++) {
00086         singularDisps[i] = singularDisps[i-1] + numSingular[i-1];
00087       }
00088 
00089       ot::TreeNode* singularBlocksPtr = NULL;
00090       ot::TreeNode* allSingularPtr = NULL;
00091       if(!singularBlocks.empty()) {
00092         singularBlocksPtr = &(*(singularBlocks.begin()));
00093       }
00094       if(!allSingular.empty()) {
00095         allSingularPtr = &(*(allSingular.begin()));
00096       }
00097       par::Mpi_Allgatherv<ot::TreeNode>(singularBlocksPtr, singularSz,
00098           allSingularPtr, &(*numSingular.begin()), &(*singularDisps.begin()), commActive);
00099 
00100       singularBlocks.clear();
00101       numSingular.clear();
00102       singularDisps.clear();
00103 
00104 #ifdef __DEBUG_DA__
00105       MPI_Barrier(commActive);
00106       assert(seq::test::isUniqueAndSorted(allSingular));
00107       assert(par::test::isUniqueAndSorted(globalCoarse, commActive));
00108       MPI_Barrier(commActive);
00109 #endif
00110 
00111       //Loop through globalCoarse and set wts of all elements in between some
00112       //singular Block's parent and the singular Block to 0. So that the global
00113       //scan of all these elements in partW is the same and hence they will be
00114       //sent to the same processor...
00115       unsigned int lastIdxFound = (globalCoarse.size() -1);
00116       for(int singCnt = (allSingular.size()-1); singCnt >= 0; singCnt--) {
00117         unsigned int idxMLB;          
00118         bool foundMLB = seq::maxLowerBound<ot::TreeNode>(globalCoarse, 
00119             allSingular[singCnt], idxMLB, NULL, &lastIdxFound);
00120         if(foundMLB) {
00121           ot::TreeNode requiredOct = allSingular[singCnt].getParent().getDFD().
00122             getAncestor(allSingular[singCnt].getLevel());
00123           while(globalCoarse[idxMLB] > requiredOct) {
00124             globalCoarse[idxMLB].setWeight(0);
00125             if(idxMLB > 0) {
00126               idxMLB--;
00127             }else {
00128               break;
00129             }              
00130           }
00131           lastIdxFound = idxMLB;
00132           while( (singCnt >= 0) && (allSingular[singCnt] > requiredOct) ) {
00133             singCnt--;
00134           }
00135           singCnt++;
00136         }else {
00137           break;
00138         }//end if found
00139       }//end for i
00140 
00141       allSingular.clear();
00142     }//end if npes > 1
00143 
00144     isSingular.clear();
00145 
00146     par::partitionW<ot::TreeNode>(globalCoarse, getNodeWeight, commActive);
00147 
00148     //Reset weights
00149     for (unsigned int i = 0; i < globalCoarse.size(); i++) {
00150       globalCoarse[i].setWeight(1);
00151     }
00152 
00153     PROF_BLKPART2_END
00154   } // end blockPart
00155 
00156 #else
00157 
00158   int DA_blockPartStage2(std::vector<TreeNode> &nodes, std::vector<TreeNode> &globalCoarse,
00159       unsigned int dim, unsigned int maxDepth, MPI_Comm commActive) {
00160 #ifdef __PROF_WITH_BARRIER__
00161     MPI_Barrier(commActive);
00162 #endif
00163     PROF_BLKPART2_BEGIN
00164 
00165       int npesActive, rankActive;
00166 
00167     MPI_Comm_rank(commActive, &rankActive);
00168     MPI_Comm_size(commActive, &npesActive);
00169 
00170     int *sendCnt = new int[npesActive];
00171     int *recvCnt = new int[npesActive];
00172     int *sendOffsets = new int[npesActive];
00173     int *recvOffsets = new int[npesActive];
00174 
00175     //1. Now compute the wts of these cells ...
00176     //    a. Get the min and max nodes at each processor.
00177     std::vector<TreeNode> _mins_maxs(2*npesActive);
00178 
00179     // communicate ...
00180     TreeNode sendMinMax[2];
00181     TreeNode rootNode (dim,maxDepth);
00182 
00183     if (!nodes.empty()) {
00184       sendMinMax[0] =  nodes[0];
00185       sendMinMax[1] =  nodes[nodes.size()-1];
00186     } else {
00187       sendMinMax[0] = rootNode;
00188       sendMinMax[1] = rootNode;
00189     }
00190 
00191     par::Mpi_Allgather<ot::TreeNode>(sendMinMax, &(*_mins_maxs.begin()), 2, commActive);
00192 
00193     std::vector<std::vector<TreeNode> > sendNodes(npesActive);
00194     std::vector<std::vector<unsigned int> > keymap(npesActive);
00195 
00196     for (int i = 0; i < npesActive; i++) {
00197       sendCnt[i] = 0;
00198       sendNodes[i].clear();
00199       keymap[i].clear();        
00200     }
00201 
00202     //    b. Now compute which cells go to which cells ...
00203     //       logic is that if the coarse cell is between the min and max at a
00204     //       processor or if it is an ancestor of min, then it is sent to that
00205     //       processor.
00206     //Naive Logic:
00207     for (unsigned int i = 0; i < globalCoarse.size(); i++) {
00208       for (int p = 0; p < npesActive; p++) {
00209         if ( (globalCoarse[i].isAncestor(_mins_maxs[2*p])) ||
00210             ( (globalCoarse[i] >= _mins_maxs[2*p]) &&
00211               (globalCoarse[i] <=_mins_maxs[(2*p)+1]) ) ) {
00212           sendNodes[p].push_back(globalCoarse[i]);
00213           // save keymap so that we can assign weights back to globalCoarse.
00214           keymap[p].push_back(i);    
00215           sendCnt[p]++;
00216         }//end if
00217       }//end for
00218     }//end for
00219 
00220     _mins_maxs.clear();
00221 
00222     //2. Send nodes to all cells to compute the wts ... locally ...
00223 
00224     //    a. Communicate how many you'll be sending and how many will be
00225     //       received.
00226 
00227     // Now do an All2All to get numKeysRecv
00228     par::Mpi_Alltoall<int>( sendCnt, recvCnt, 1, commActive);
00229 
00230     //    b. Concatenate all nodes into one single Carray ...
00231     unsigned int totalSend = 0;
00232     unsigned int totalRecv = 0;
00233     for (unsigned int i = 0; i < npesActive; i++) {
00234       totalSend+= sendCnt[i];
00235       totalRecv+= recvCnt[i];
00236     }
00237 
00238     // create the send and recv buffers ...
00239     std::vector<ot::TreeNode> sendK (totalSend);
00240     std::vector<ot::TreeNode> recvK (totalRecv);
00241 
00242     // Now create sendK
00243     sendOffsets[0] = 0;
00244     recvOffsets[0] = 0;
00245 
00246     // compute offsets ...
00247     for (int i = 1; i < npesActive; i++) {
00248       sendOffsets[i] = sendOffsets[i-1] + sendCnt[i-1];
00249       recvOffsets[i] = recvOffsets[i-1] + recvCnt[i-1];
00250     }
00251 
00252     for (int i = 0; i < npesActive; i++) {
00253 #ifdef __DEBUG_DA__
00254       assert( sendCnt[i]  == sendNodes[i].size() );
00255 #endif
00256       for (unsigned int j=0; j<sendCnt[i]; j++) {
00257 #ifdef __DEBUG_DA__
00258         assert( (sendOffsets[i] + j) < totalSend);
00259 #endif
00260         sendK[sendOffsets[i] + j] = sendNodes[i][j];
00261       }//end for j
00262     }//end for i
00263 
00264     //3. send and receive all keys ...
00265 
00266     ot::TreeNode* sendKptr = NULL;
00267     ot::TreeNode* recvKptr = NULL;
00268     if(!sendK.empty()) {
00269       sendKptr = &(*(sendK.begin()));
00270     }
00271     if(!recvK.empty()) {
00272       recvKptr = &(*(recvK.begin()));
00273     }
00274     par::Mpi_Alltoallv_sparse<ot::TreeNode>( sendKptr, sendCnt, sendOffsets,
00275         recvKptr, recvCnt, recvOffsets, commActive);
00276 
00277     sendK.clear();
00278 
00279     //4. Now compute the wts of the locally received nodes ...
00280     //    a. loop through nodes and update the wts of the local chunks ...
00281     unsigned int *wts = NULL;
00282     char * isAnchorHanging = NULL;
00283     if(totalRecv) {
00284       wts = new unsigned int [totalRecv];
00285       isAnchorHanging = new char [totalRecv];
00286     }
00287 
00288     for (unsigned int i = 0; i < totalRecv; i++) {
00289       wts[i] = 0;
00290       isAnchorHanging[i] = 0;
00291     }
00292 
00293     //decendants and chunks are both sorted at this point.
00294     unsigned int nextPt = 0;
00295     unsigned int nextNode = 0;
00296     //Every element in nodes is inside some element in recvK.
00297     while (nextPt < nodes.size()) {
00298       //The first pt. lies in some block.
00299 #ifdef __DEBUG_DA__
00300       assert(nextNode < recvK.size());
00301 #endif
00302       if ((recvK[nextNode].isAncestor(nodes[nextPt])) ||
00303           (recvK[nextNode] == nodes[nextPt])) {
00304         wts[nextNode]++;
00305         if( (recvK[nextNode].getAnchor() == nodes[nextPt].getAnchor()) &&
00306             (!(nodes[nextPt].getFlag() & ot::TreeNode::NODE)) ) {
00307           isAnchorHanging[nextNode] = 1;
00308 #ifdef __DEBUG_DA__
00309           //Only singular blocks can have hanging anchors
00310           assert(recvK[nextNode] == nodes[nextPt]);
00311 #endif
00312         }
00313         nextPt++;
00314       } else {
00315         nextNode++;
00316         if (nextNode >= totalRecv) {
00317           //If this fails then either recvK and nodes are not sorted or
00318           //Some pt in nodes is not in any of recvK
00319           assert(false);
00320         }
00321       }//end if-else
00322     }//end while
00323 
00324     recvK.clear();
00325 
00326     //5. Now communicate the wts back to the procs ...
00327     unsigned int *recvWts = NULL;
00328     char *recvChars = NULL;
00329     if(totalSend) {
00330       recvWts = new unsigned int[totalSend];
00331       recvChars = new char[totalSend];
00332     }
00333 
00334     par::Mpi_Alltoallv_sparse<unsigned int>( wts, recvCnt, recvOffsets, 
00335         recvWts, sendCnt, sendOffsets,  commActive);
00336 
00337     par::Mpi_Alltoallv_sparse<char>( isAnchorHanging, recvCnt, recvOffsets, 
00338         recvChars, sendCnt, sendOffsets, commActive);
00339 
00340     //6. Now map them back to the blocks ...
00341     std::vector<bool> isSingular(globalCoarse.size());
00342     for (int i = 0; i < globalCoarse.size(); i++) {
00343       globalCoarse[i].setWeight(0);
00344       isSingular[i] = false;
00345     }
00346 
00347     for (int i = 0; i < npesActive; i++) {
00348       for (int j = 0; j < sendCnt[i]; j++) {
00349 #ifdef __DEBUG_DA__
00350         assert(j < keymap[i].size());
00351         assert(keymap[i][j] < globalCoarse.size());
00352         assert( (sendOffsets[i] + j) < totalSend );
00353 #endif
00354         globalCoarse[keymap[i][j]].addWeight(recvWts[sendOffsets[i] + j]);
00355         isSingular[keymap[i][j]] = ( isSingular[keymap[i][j]] ||
00356             recvChars[sendOffsets[i] + j]  );
00357       }//end for j
00358     }//end for i
00359 
00360     for (unsigned int i = 0; i < npesActive; i++) {
00361       keymap[i].clear();
00362       sendNodes[i].clear();
00363     }//end for i
00364 
00365     sendNodes.clear();
00366     keymap.clear();
00367 
00368     if(recvWts) {
00369       delete [] recvWts;
00370       recvWts = NULL;
00371     }
00372 
00373     if(recvChars) {
00374       delete [] recvChars;
00375       recvChars = NULL;
00376     }
00377 
00378     if(wts) {
00379       delete [] wts;
00380       wts = NULL;
00381     }
00382 
00383     if(isAnchorHanging) {
00384       delete [] isAnchorHanging;
00385       isAnchorHanging = NULL;
00386     }
00387 
00388     if(npesActive > 1) {
00389       //For DA only.....
00390       //Pick singular blocks on this processor...
00391       std::vector<ot::TreeNode> singularBlocks;
00392       for(unsigned int i=0;i<globalCoarse.size(); i++) {
00393         if(isSingular[i]) {
00394           singularBlocks.push_back(globalCoarse[i]);
00395         }
00396       }//end for i
00397 
00398       //Gather all singular blocks on all processors.
00399       std::vector<int> numSingular(npesActive);
00400       std::vector<int> singularDisps(npesActive);
00401 
00402       int singularSz = singularBlocks.size();
00403 
00404       par::Mpi_Allgather<int>(&singularSz, &(*numSingular.begin()), 1, commActive);
00405 
00406       unsigned int totSingular = 0;
00407       for(int i = 0; i < npesActive; i++) {
00408         totSingular += numSingular[i];
00409       }
00410 
00411       std::vector<TreeNode> allSingular(totSingular);
00412 
00413       singularDisps[0] = 0;
00414       for (unsigned int i=1; i < npesActive; i++) {
00415         singularDisps[i] = singularDisps[i-1] + numSingular[i-1];
00416       }
00417 
00418       ot::TreeNode* singularBlocksPtr = NULL;
00419       ot::TreeNode* allSingularPtr = NULL;
00420       if(!singularBlocks.empty()) {
00421         singularBlocksPtr = &(*(singularBlocks.begin()));
00422       }
00423       if(!allSingular.empty()) {
00424         allSingularPtr = &(*(allSingular.begin()));
00425       }
00426       par::Mpi_Allgatherv<ot::TreeNode>(singularBlocksPtr, singularSz,
00427           allSingularPtr, &(*numSingular.begin()), &(*singularDisps.begin()), commActive);
00428 
00429       singularBlocks.clear();
00430       numSingular.clear();
00431       singularDisps.clear();
00432 
00433 #ifdef __DEBUG_DA__
00434       MPI_Barrier(commActive);
00435       assert(seq::test::isUniqueAndSorted(allSingular));
00436       assert(par::test::isUniqueAndSorted(globalCoarse, commActive));
00437       MPI_Barrier(commActive);
00438 #endif
00439 
00440       //Loop through globalCoarse and set wts of all elements in between some
00441       //singular Block's parent and the singular Block to 0. So that the global
00442       //scan of all these elements in partW is the same and hence they will be
00443       //sent to the same processor...
00444       unsigned int lastIdxFound = (globalCoarse.size() -1);
00445       for(int singCnt = (allSingular.size()-1); singCnt >= 0; singCnt--) {
00446         unsigned int idxMLB;          
00447         bool foundMLB = seq::maxLowerBound<ot::TreeNode>(globalCoarse, 
00448             allSingular[singCnt], idxMLB, NULL, &lastIdxFound);
00449         if(foundMLB) {
00450           ot::TreeNode requiredOct = allSingular[singCnt].getParent().getDFD().
00451             getAncestor(allSingular[singCnt].getLevel());
00452           while(globalCoarse[idxMLB] > requiredOct) {
00453             globalCoarse[idxMLB].setWeight(0);
00454             if(idxMLB > 0) {
00455               idxMLB--;
00456             }else {
00457               break;
00458             }              
00459           }
00460           lastIdxFound = idxMLB;
00461           while( (singCnt >= 0) && (allSingular[singCnt] > requiredOct) ) {
00462             singCnt--;
00463           }
00464           singCnt++;
00465         }else {
00466           break;
00467         }//end if found
00468       }//end for i
00469 
00470       allSingular.clear();
00471     }//end if npes > 1
00472 
00473     isSingular.clear();
00474 
00475     par::partitionW<ot::TreeNode>(globalCoarse, getNodeWeight, commActive);
00476 
00477     //Reset weights
00478     for (unsigned int i=0;i<globalCoarse.size(); i++) {
00479       globalCoarse[i].setWeight(1);
00480     }
00481 
00482     // clean up ...
00483     delete [] sendCnt;
00484     sendCnt = NULL;
00485 
00486     delete [] recvCnt;
00487     recvCnt = NULL;
00488 
00489     delete [] sendOffsets;
00490     sendOffsets = NULL;
00491 
00492     delete [] recvOffsets;
00493     recvOffsets = NULL;
00494 
00495     PROF_BLKPART2_END
00496   } // end blockPart
00497 
00498 #endif
00499 
00500   int DA_blockPartStage3(std::vector<TreeNode> &nodes, std::vector<TreeNode>& globalCoarse,
00501       std::vector<ot::TreeNode>& minsAllBlocks, unsigned int dim,
00502       unsigned int maxDepth, MPI_Comm commActive) {
00503 #ifdef __PROF_WITH_BARRIER__
00504     MPI_Barrier(commActive);
00505 #endif
00506     PROF_BLKPART3_BEGIN
00507 
00508       int npesActive, rankActive;
00509 
00510     MPI_Comm_rank(commActive, &rankActive);
00511     MPI_Comm_size(commActive, &npesActive);
00512 
00513     int *sendCnt = new int[npesActive];
00514     int *recvCnt = new int[npesActive];
00515     int *sendOffsets = new int[npesActive];
00516     int *recvOffsets = new int[npesActive];
00517 
00518     TreeNode rootNode (dim,maxDepth);
00519 
00520     // Now communicate the nodes ...
00521 
00522     //7. Determine locally which keys to send to which proc ...
00523     //Compute Dist on globalCoarse....
00524 
00525     TreeNode *sendMin;
00526     std::vector<ot::TreeNode> vtkDist(npesActive);
00527     if (!globalCoarse.empty()) {
00528       sendMin = (TreeNode *)&(*(globalCoarse.begin()));
00529     } else {
00530       sendMin = &(rootNode);
00531     }
00532 
00533     par::Mpi_Allgather<ot::TreeNode>(sendMin, &(* vtkDist.begin()), 1, commActive);
00534 
00535     minsAllBlocks.clear();
00536     for(int j = 0; j < npesActive; j++) {
00537       if(vtkDist[j] != rootNode) {
00538         minsAllBlocks.push_back(vtkDist[j]);
00539       }
00540     }//end for j
00541 
00542     for (unsigned int j = 1; j < npesActive ; j++) {
00543       if (vtkDist[j] == rootNode) {
00544         vtkDist[j] = vtkDist[j-1];
00545       }
00546     }//end for j
00547 
00548     // correct dist ...
00549     if (npesActive > 1) {
00550       if (vtkDist[npesActive - 1] == vtkDist[npesActive - 2]) {
00551         vtkDist[npesActive - 1] = rootNode;
00552       }//end if
00553 
00554       for (int i = npesActive - 2; i > 0; i--) {
00555         if (vtkDist[i] == vtkDist[i-1]) {
00556           vtkDist[i] = vtkDist[i+1];
00557         }//end if
00558       }//end for
00559     }//end if npes > 1
00560 
00561     unsigned int *part = NULL;
00562     if(!nodes.empty()) {
00563       part = new unsigned int[nodes.size()];
00564     }
00565 
00566     if (npesActive > 1) {
00567       unsigned int pCnt=0;
00568       for (unsigned int i=0; i< nodes.size(); i++) {
00569 #ifdef __DEBUG_DA__
00570         assert(pCnt < npesActive);
00571 #endif
00572         if ( (nodes[i] >= vtkDist[pCnt]) && ( (pCnt == (npesActive - 1)) ||
00573               ( nodes[i] < vtkDist[pCnt+1] ) || (vtkDist[pCnt+1] == rootNode) ) ) {
00574           part[i] = pCnt;
00575         } else {
00576           while ( (pCnt < (npesActive -1)) && (nodes[i] >= vtkDist[pCnt+1])
00577               && (vtkDist[pCnt+1] != rootNode)  ) {
00578             pCnt++;
00579           }//end while
00580           part[i] = pCnt;
00581         }//end if-else
00582       }//end for i
00583     }//end if np>1
00584 
00585     vtkDist.clear();
00586     //_________________________________________________________________________
00587     // Now the partitions should be contiguous since the two lists are globally
00588     // sorted ... and it's simply a shift between the two.
00589     //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00590 
00591     //compute the total number of nodes being sent to each proc ...
00592     for (int i = 0; i < npesActive; i++) {
00593       sendCnt[i]=0;
00594       recvCnt[i]=0;
00595     }
00596 
00597     if (npesActive > 1) {
00598       for (unsigned int i=0; i<nodes.size(); i++) {
00599 #ifdef __DEBUG_DA__
00600         assert(part[i] < npesActive);
00601 #endif
00602         sendCnt[part[i]]++;
00603       }//end for i
00604     } else {
00605       sendCnt[0] += (nodes.size());
00606     }//end if-else
00607 
00608     if(part) {
00609       delete [] part;
00610       part = NULL;
00611     }
00612 
00613     // communicate with other procs how many you shall be sending and get how
00614     // many to recieve from whom.
00615 
00616     par::Mpi_Alltoall<int>( sendCnt, recvCnt, 1, commActive);
00617 
00618     unsigned int totalRecv = 0;
00619     for (unsigned int i = 0; i < npesActive; i++) {
00620       totalRecv += recvCnt[i];
00621     }//end for i
00622 
00623     sendOffsets[0] = 0;
00624     recvOffsets[0] = 0;
00625 
00626     // compute offsets ...
00627     for (int i=1; i < npesActive; i++) {
00628       sendOffsets[i] = sendOffsets[i-1] + sendCnt[i-1];
00629       recvOffsets[i] = recvOffsets[i-1] + recvCnt[i-1];
00630     }//end for i
00631 
00632     // Allocate for new array ...
00633     std::vector<ot::TreeNode > newNodes(totalRecv);
00634 
00635     // perform All2Allv
00636     ot::TreeNode* nodesPtr = NULL;
00637     ot::TreeNode* newNodesPtr = NULL;
00638     if(!nodes.empty()) {
00639       nodesPtr = &(*(nodes.begin()));
00640     }
00641     if(!newNodes.empty()) {
00642       newNodesPtr = &(*(newNodes.begin()));
00643     }
00644     par::Mpi_Alltoallv_sparse<ot::TreeNode>( nodesPtr, sendCnt, sendOffsets,
00645         newNodesPtr, recvCnt, recvOffsets, commActive);
00646 
00647     // reset the pointer ...
00648     nodes = newNodes;
00649 
00650     // clean up ...
00651     delete [] sendCnt;
00652     sendCnt = NULL;
00653 
00654     delete [] recvCnt;
00655     recvCnt = NULL;
00656 
00657     delete [] sendOffsets;
00658     sendOffsets = NULL;
00659 
00660     delete [] recvOffsets;
00661     recvOffsets = NULL;
00662 
00663     newNodes.clear();
00664 
00665     PROF_BLKPART3_END
00666   } // end blockPart
00667 
00668   //Assumptions: Blocks are sorted and unique and linear, nodes (Volume) is sorted and unique and linear.
00669   //Every element in nodes is a decendant of or equal to some block.
00670   //Some Blocks may be empty.
00671   //Assumes nodes is balanced.
00672   void pickGhostCandidates(const std::vector<ot::TreeNode> & blocks, const std::vector<ot::TreeNode> &nodes,
00673       std::vector<ot::TreeNode>& res, unsigned int dim, unsigned int maxDepth) {        
00674 
00675     PROF_PICK_GHOSTS_BEGIN 
00676       std::vector<unsigned int> firstLayer;
00677     //1. First Pick true inter-processor boundary octants 
00678     //(any octant that touches the inter-processor bdy.)
00679 
00680     pickInterProcessorBoundaryNodes(nodes, firstLayer, blocks[0], blocks[blocks.size() - 1]);
00681 
00682     std::vector<TreeNode> secondLayerBlocks(26*firstLayer.size());
00683     unsigned int secondLayerCount = 0;
00684     for(unsigned int i = 0; i < firstLayer.size(); i++) {
00685 #ifdef __DEBUG_DA__
00686       assert( firstLayer[i] < nodes.size() );
00687 #endif
00688       std::vector<TreeNode> myNh = nodes[firstLayer[i]].getAllNeighbours();
00689       for(unsigned int j = 0; j < myNh.size(); j++) {
00690         if( (myNh[j] >= blocks[0]) && (myNh[j] <= blocks[blocks.size()-1].getDLD()) ) {
00691           //myNh[j] lies in the domain controlled by my processor so add it.
00692           secondLayerBlocks[secondLayerCount] = myNh[j];
00693           secondLayerCount++;
00694         }
00695       }//end for j
00696     }//end for i
00697 
00698     secondLayerBlocks.resize(secondLayerCount);
00699 
00700     seq::makeVectorUnique<ot::TreeNode>(secondLayerBlocks, false);
00701 
00702     //2 Generate Second Ring Candidates depending on the way the first layer touches the block boundaries.
00703     //secondRing is sorted, unique and exists in nodes. (linearity follows directly)
00704     std::vector<unsigned int> secondRing;
00705 
00706     addSecondRing(nodes,firstLayer,blocks,secondRing);
00707 
00708     //2.c. Compare with the Second Ring Candidates and eliminate unwanted octants.
00709     //secondLayer is sorted, unique and not linear.
00710     //secondRing is sorted, unique and linear.
00711     unsigned int layerCnt = 0;
00712     unsigned int ringCnt = 0;
00713     res.clear();     
00714     while( (layerCnt < secondLayerBlocks.size()) && (ringCnt < secondRing.size()) ) {
00715       if( secondLayerBlocks[layerCnt] < nodes[secondRing[ringCnt]] ) {          
00716         if(secondLayerBlocks[layerCnt].isAncestor(nodes[secondRing[ringCnt]])) {
00717           unsigned int tmpCnt = ringCnt;
00718           while ( (tmpCnt < secondRing.size()) && 
00719               (secondLayerBlocks[layerCnt].isAncestor(nodes[secondRing[tmpCnt]]) ) ) {
00720             if( secondLayerBlocks[layerCnt] == nodes[secondRing[tmpCnt]].getParent() ) {
00721               res.push_back(nodes[secondRing[tmpCnt]]);
00722             }
00723             tmpCnt++;
00724           }
00725         }
00726         layerCnt++;
00727       }else {
00728         ringCnt++;
00729       }
00730     }
00731 
00732     //3. Add the first layer and makeVectorUnique.
00733     for(unsigned int i=0; i < firstLayer.size(); i++) {
00734       res.push_back(nodes[firstLayer[i]]);
00735     }
00736     seq::makeVectorUnique<ot::TreeNode>(res,false);
00737 
00738     firstLayer.clear();
00739     secondRing.clear();
00740     secondLayerBlocks.clear();
00741 
00742     PROF_PICK_GHOSTS_END 
00743   }// end fn.
00744 
00745   //secondRing is sorted, unique and exists in nodes. (linearity follows directly)
00746   //blocks is sorted, unique and linear.
00747   //firstLayer is sorted, unique and linear.
00748   int addSecondRing(const std::vector<ot::TreeNode> & nodes, const std::vector<unsigned int> & firstLayer,
00749       const std::vector<ot::TreeNode> & blocks , std::vector<unsigned int> & secondRing) {
00750     unsigned int blkCnt =0;
00751     unsigned int firstCnt = 0;
00752     std::vector<ot::TreeNode> tmpLayer;
00753 
00754     while( (blkCnt < blocks.size()) && (firstCnt < firstLayer.size()) ) {
00755       if( blocks[blkCnt] <= nodes[firstLayer[firstCnt]] ) {
00756         if( (blocks[blkCnt].isAncestor(nodes[firstLayer[firstCnt]])) ||
00757             (blocks[blkCnt] == nodes[firstLayer[firstCnt]]) ) {
00758           unsigned char flags;
00759           bool isBnd = nodes[firstLayer[firstCnt]].isBoundaryOctant(blocks[blkCnt],
00760               ((ot::TreeNode::NEGATIVE) | (ot::TreeNode::POSITIVE)),&flags);                                            
00761 
00762           if(isBnd) {
00763             bool negX = (flags & 1);
00764             bool negY = (flags & 2);
00765             bool negZ = (flags & 4);
00766             bool posX = (flags & 32);
00767             bool posY = (flags & 64);
00768             bool posZ = (flags & 128);
00769             bool isX = (negX || posX);
00770             bool isY = (negY || posY);
00771             bool isZ = (negZ || posZ);
00772 
00773             //Faces...
00774             if(isX) {
00775               if(negX) {
00776                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getRight());                                     
00777               }
00778               if(posX) {
00779                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getLeft());
00780               }         
00781             }
00782             if(isY) {
00783               if(negY) {
00784                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getBack()); 
00785               }
00786               if(posY) {
00787                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getFront());
00788               }
00789             }
00790             if(isZ) {
00791               if(negZ) {
00792                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getTop());
00793               }
00794               if(posZ) {
00795                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getBottom());
00796               }
00797             }
00798 
00799             //Edges...                                          
00800             if(isX && isY) {                                            
00801               if(negX && negY) {
00802                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getRightBack());                                         
00803               }
00804               if(negX && posY) {
00805                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getRightFront());
00806               }
00807               if(posX && negY) {
00808                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getLeftBack());
00809               }                 
00810               if(posX && posY) {
00811                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getLeftFront());
00812               }
00813             }
00814 
00815             if(isZ && isY) {                                                                    
00816               if(negZ && negY) {
00817                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getTopBack());
00818               }
00819               if(negZ && posY) {
00820                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getTopFront());                                  
00821               }
00822               if(posZ && negY) {
00823                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getBottomBack());
00824               }                 
00825               if(posZ && posY) {
00826                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getBottomFront());
00827               }
00828             }
00829 
00830             if(isX && isZ) {
00831               if(negX && negZ) {
00832                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getTopRight());
00833               }
00834               if(negX && posZ) {
00835                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getBottomRight());
00836               }
00837               if(posX && negZ) {
00838                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getTopLeft());
00839               }                 
00840               if(posX && posZ) {
00841                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getBottomLeft()) ; 
00842               }
00843             }
00844 
00845             //Corners...                                                                 
00846             if(isX && isY && isZ) {
00847               if( negX && negY && negZ ) {
00848                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getTopRightBack());
00849               }
00850               if( posX && negY && negZ ) {
00851                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getTopLeftBack());
00852               }
00853               if( posX && negY && posZ ) {
00854                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getBottomLeftBack());
00855               }
00856               if( negX && negY && posZ ) {
00857                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getBottomRightBack());
00858               }
00859               if( posX && posY && negZ ) {
00860                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getTopLeftFront());
00861               }
00862               if( negX && posY && negZ ) {
00863                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getTopRightFront());
00864               }
00865               if( posX && posY && posZ ) {
00866                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getBottomLeftFront());
00867               }
00868               if( negX && posY && posZ ) {
00869                 tmpLayer.push_back(nodes[firstLayer[firstCnt]].getBottomRightFront());
00870               }
00871             }
00872           }
00873           //The same block must be compared with other possible decendants.
00874           firstCnt++;
00875         }else {
00876           blkCnt++;
00877         }
00878       }else {
00879         firstCnt++;
00880       }
00881     }   
00882 
00883     seq::makeVectorUnique<ot::TreeNode>(tmpLayer,false);
00884 
00885     //tmpLayer is sorted and unique, but not linear.
00886     unsigned int tmpCnt = 0;
00887     unsigned int nodeCnt = 0;
00888     while( (tmpCnt < tmpLayer.size()) && (nodeCnt < nodes.size()) ) {
00889       if( nodes[nodeCnt] <= tmpLayer[tmpCnt] ) {
00890         if(nodes[nodeCnt] == tmpLayer[tmpCnt]) {
00891           secondRing.push_back(nodeCnt);
00892         }
00893         nodeCnt++;                                      
00894       }else {
00895         tmpCnt++;
00896       }
00897     }
00898     tmpLayer.clear();
00899 
00900     return 1;
00901   }//end fn.
00902 
00903 
00904 } // end namespace ot
00905 
00906