Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

odaFactory.C

Go to the documentation of this file.
00001 
00007 #include "oda.h"
00008 #include "parUtils.h"
00009 #include "colors.h"
00010 #include "testUtils.h"
00011 #include "dendro.h"
00012 
00013 #ifdef __DEBUG__
00014 #ifndef __DEBUG_DA__
00015 #define __DEBUG_DA__
00016 #endif
00017 #endif
00018 
00019 #ifdef __DEBUG_DA__
00020 #ifndef __DEBUG_DA_PUBLIC__
00021 #define __DEBUG_DA_PUBLIC__
00022 #endif
00023 #endif
00024 
00025 #ifdef __DEBUG_DA_PUBLIC__
00026 #ifndef __MEASURE_DA__
00027 #define __MEASURE_DA__
00028 #endif
00029 #endif
00030 
00031 namespace ot {
00032 
00033 #define RESET_DA_BLOCK {\
00034   /*Initialize Member Data*/\
00035   m_ucpOctLevels = NULL;\
00036   m_bComputedLocalToGlobal = false;\
00037   m_bComputedLocalToGlobalElems = false;\
00038   m_tnBlocks.clear();\
00039   m_tnMinAllBlocks.clear();\
00040   m_uiNlist.clear();\
00041   m_uiNlistPtr = NULL;\
00042   m_dilpLocalToGlobal = NULL;\
00043   m_dilpLocalToGlobalElems = NULL;\
00044   m_ucpLutRemainders.clear();\
00045   m_ucpLutRemaindersPtr = NULL;\
00046   m_ucpSortOrders.clear();\
00047   m_ucpSortOrdersPtr = NULL;\
00048   m_uspLutQuotients.clear();\
00049   m_uspLutQuotientsPtr = NULL;\
00050   m_ucpLutMasks.clear();\
00051   m_ucpLutMasksPtr = NULL;\
00052   m_ucpPreGhostConnectivity.clear();\
00053   m_ucpPreGhostConnectivityPtr = NULL;\
00054   m_ptsPreGhostOffsets.clear();\
00055   m_ptsPreGhostOffsetsPtr = NULL;\
00056   PreGhostAnchors.clear();\
00057   m_uiQuotientCounter = 0;\
00058   m_uiPreGhostQuotientCnt = 0;\
00059   m_uiElementQuotient = 0;\
00060   m_uiIndependentElementQuotient = 0;\
00061   m_uiNodeSize = 0;\
00062   m_uiBoundaryNodeSize = 0;\
00063   m_uiElementSize = 0;\
00064   m_uiIndependentElementSize = 0;\
00065   m_uiPreGhostElementSize = 0;\
00066   m_uiPreGhostNodeSize = 0;\
00067   m_uiPreGhostBoundaryNodeSize = 0;\
00068   m_uiPostGhostNodeSize = 0;\
00069   m_uiLocalBufferSize = 0;\
00070   m_uiElementBegin = 0;\
00071   m_uiElementEnd = 0;\
00072   m_uiPostGhostBegin = 0;\
00073   m_uiIndependentElementBegin = 0;\
00074   m_uiIndependentElementEnd = 0;\
00075   m_uiCurrent = 0;\
00076   m_uiDimension = 0;\
00077   m_uiMaxDepth = 0;\
00078   m_uipScatterMap.clear();\
00079   m_uipSendProcs.clear();\
00080   m_uipSendOffsets.clear();\
00081   m_uipSendCounts.clear();\
00082   m_uipElemScatterMap.clear();\
00083   m_uipElemSendProcs.clear();\
00084   m_uipElemSendOffsets.clear();\
00085   m_uipElemSendCounts.clear();\
00086   m_uipRecvProcs.clear();\
00087   m_uipRecvOffsets.clear();\
00088   m_uipRecvCounts.clear();\
00089   m_uipElemRecvProcs.clear();\
00090   m_uipElemRecvOffsets.clear();\
00091   m_uipElemRecvCounts.clear();\
00092   m_mpiContexts.clear();\
00093   m_bCompressLut = compressLut;\
00094   m_uiCommTag = 1;\
00095   m_mpiCommAll = comm;\
00096   MPI_Comm_size(m_mpiCommAll,&m_iNpesAll);\
00097   MPI_Comm_rank(m_mpiCommAll,&m_iRankAll);\
00098 }
00099 
00100 void DA::DA_FactoryPart0(std::vector<ot::TreeNode>& in, MPI_Comm comm,
00101     MPI_Comm activeInputComm, bool compressLut, bool* iAmActive) {
00102   RESET_DA_BLOCK
00103     m_uiInputSize = static_cast<unsigned int>(in.size());
00104 
00105   //The default is NULL. 
00106   if(iAmActive != NULL) {
00107     (*iAmActive) = (!(in.empty())); 
00108     m_bIamActive = (*iAmActive);    
00109   }else {
00110     //Don't care for active state.
00111     m_bIamActive = (!(in.empty()));
00112   }
00113 
00114 #ifdef __DEBUG_DA_PUBLIC__
00115   MPI_Comm expectedActiveInputComm;
00116   int commCompareResult;
00117   par::splitComm2way(m_bIamActive, &expectedActiveInputComm, comm);
00118   if(m_bIamActive) {
00119     MPI_Comm_compare(activeInputComm, expectedActiveInputComm, &commCompareResult);
00120     assert( (commCompareResult == MPI_CONGRUENT) || (commCompareResult == MPI_IDENT) );
00121   }
00122 #endif
00123 
00124   m_mpiCommActive = activeInputComm;
00125 
00126   if(m_bIamActive) {
00127     MPI_Comm_size(m_mpiCommActive, &m_iNpesActive);
00128     MPI_Comm_rank(m_mpiCommActive, &m_iRankActive);
00129   } else {
00130     m_iNpesActive = 0;
00131     m_iRankActive = 0;
00132   }
00133 
00134   if(!m_bIamActive) {
00135     return;
00136   } else {
00137     assert(!in.empty());
00138     m_uiDimension = in[0].getDim();
00139     m_uiMaxDepth  = in[0].getMaxDepth();
00140   }
00141 }//end function
00142 
00143 void DA::DA_FactoryPart1(std::vector<ot::TreeNode>& in) {
00144 #ifdef __PROF_WITH_BARRIER__
00145   MPI_Barrier(m_mpiCommActive);
00146 #endif
00147   PROF_BUILD_DA_STAGE1_BEGIN
00148 
00149     assert(!in.empty());
00150 
00151   // first generate the boundary nodes ...
00152   std::vector<ot::TreeNode> positiveBoundaryOctants;
00153   //Assumption: in is globally sorted to begin with. 
00154   //Guarantee: in remains globally sorted in the end.
00155   //positiveBoundaryOctants need not be globally sorted, in fact I don't think
00156   //it will be sorted even on 1 processor.
00157   addBoundaryNodesType1(in, positiveBoundaryOctants, m_uiDimension, m_uiMaxDepth);
00158 
00159   // Update the maxDepth ...
00160   m_uiMaxDepth = m_uiMaxDepth + 1;
00161 
00162   //Most processors will not add any positive boundaries. So, there is no need
00163   //to unnecessarily distribute positive boundaries on all procs just for
00164   //sorting. So only the few processors touching the positive boundary will
00165   //participate in the parallel sort
00166   MPI_Comm bdyComm;
00167   par::splitComm2way((positiveBoundaryOctants.empty()), &bdyComm, m_mpiCommActive);
00168 
00169   if(!(positiveBoundaryOctants.empty())) {
00170     //Call Sample Sort  
00171     std::vector<ot::TreeNode > tmpVecTN;
00172     par::sampleSort<ot::TreeNode>(positiveBoundaryOctants, tmpVecTN, bdyComm);
00173     positiveBoundaryOctants = tmpVecTN;
00174     tmpVecTN.clear();
00175   }
00176 
00177   par::concatenate<ot::TreeNode>(in, positiveBoundaryOctants, m_mpiCommActive);
00178   positiveBoundaryOctants.clear();
00179 
00180   PROF_BUILD_DA_STAGE1_END
00181 }//end function
00182 
00183 void DA::DA_FactoryPart2(std::vector<ot::TreeNode>& in) {
00184 #ifdef __PROF_WITH_BARRIER__
00185   MPI_Barrier(m_mpiCommActive);
00186 #endif
00187   PROF_BUILD_DA_STAGE2_BEGIN
00188 
00189     assert(!in.empty());
00190   //Marks regular nodes. MUST be called before DA_blockPartStage2
00191   //Assumption: in is globally sorted at this point, including positive
00192   //boundaries 
00193 
00194   flagNodesType3(in, m_mpiCommActive);
00195 
00196   PROF_BUILD_DA_STAGE2_END
00197 }//end function
00198 
00199 void DA::DA_FactoryPart3(std::vector<ot::TreeNode>& in, MPI_Comm comm, bool compressLut,
00200     const std::vector<ot::TreeNode>* blocksPtr, bool* iAmActive) {
00201 #ifdef __PROF_WITH_BARRIER__
00202   MPI_Barrier(m_mpiCommActive);
00203 #endif
00204   PROF_BUILD_DA_STAGE3_BEGIN
00205 
00206     assert(!in.empty());
00207   //  1. First repartition  
00208 
00209   DendroIntL localSizeBefore = in.size();
00210   DendroIntL globalSizeBefore; 
00211   par::Mpi_Allreduce<DendroIntL>(&localSizeBefore, &globalSizeBefore, 1, MPI_SUM, m_mpiCommActive);
00212 
00213   //Partition in and create blocks (blocks must be globally sorted).
00214   std::vector<ot::TreeNode> blocks;
00215   if(blocksPtr == NULL) {
00216 
00217     //min grain size = 1000
00218     const DendroIntL THOUSAND = 1000;
00219     if (globalSizeBefore < (THOUSAND*m_iNpesActive)) {
00220       int splittingSize = (globalSizeBefore/THOUSAND); 
00221       if(splittingSize == 0) {
00222         splittingSize = 1; 
00223       }
00224 
00225       unsigned int avgLoad = (globalSizeBefore / splittingSize);
00226       int leftOvers = (globalSizeBefore % splittingSize);
00227 
00228       std::vector<TreeNode> tmpIn;
00229       if(m_iRankActive >= splittingSize) {
00230         par::scatterValues<ot::TreeNode>(in, tmpIn, 0, m_mpiCommActive);
00231       }else if(m_iRankActive < leftOvers) {
00232         par::scatterValues<ot::TreeNode>(in, tmpIn, (avgLoad+1), m_mpiCommActive);
00233       }else {
00234         par::scatterValues<ot::TreeNode>(in, tmpIn, avgLoad, m_mpiCommActive);
00235       }
00236       in = tmpIn;
00237       tmpIn.clear();
00238 
00239       MPI_Comm newComm;
00240       par::splitCommUsingSplittingRank(splittingSize, &newComm, m_mpiCommActive);
00241       m_mpiCommActive = newComm;
00242       MPI_Comm_size(m_mpiCommActive,&m_iNpesActive);
00243       MPI_Comm_rank(m_mpiCommActive,&m_iRankActive);
00244 
00245 #ifndef __SILENT_MODE__
00246       if(!m_iRankActive) {
00247         std::cout<<" input to DA constructor is small("<<globalSizeBefore
00248           <<") npes = "<<m_iNpesActive<<" splitting comm."<<std::endl;
00249       }
00250 #endif
00251 
00252       m_bIamActive = (!in.empty());   
00253       if(iAmActive != NULL) {   
00254         //Want the active state returned
00255         (*iAmActive) = m_bIamActive;
00256       }
00257       if(!m_bIamActive) {
00258         RESET_DA_BLOCK
00259           PROF_BUILD_DA_STAGE3_END
00260           return;
00261       }
00262     }//end check if total size is too small
00263 
00264     PROF_DA_BPART1_BEGIN
00265 
00266       ot::blockPartStage1(in, blocks, m_uiDimension, m_uiMaxDepth, m_mpiCommActive);    
00267 
00268     PROF_DA_BPART1_END
00269 
00270       PROF_DA_BPART2_BEGIN
00271 
00272       DA_blockPartStage2(in, blocks, m_uiDimension, m_uiMaxDepth, m_mpiCommActive);
00273 
00274     PROF_DA_BPART2_END
00275   } else {
00276     blocks = *blocksPtr;
00277   }
00278 
00279   PROF_DA_BPART3_BEGIN
00280 
00281     DA_blockPartStage3(in, blocks, m_tnMinAllBlocks, m_uiDimension, m_uiMaxDepth, m_mpiCommActive);
00282 
00283   PROF_DA_BPART3_END
00284 
00285     //Store Blocks. 
00286     m_tnBlocks = blocks;
00287 
00288   //we must split comm if new empty procs were created in blockPartStage2
00289   //empty procs are created using partW and 0 wts and so all empty procs should
00290   //be contiguous and only the last few procs should be empty
00291   if(m_tnMinAllBlocks.size() != m_iNpesActive) {
00292     m_bIamActive = (!in.empty());   
00293     if(iAmActive != NULL) {     
00294       //Want the active state returned
00295       (*iAmActive) = m_bIamActive;
00296     }
00297     assert( m_bIamActive == (m_iRankActive < m_tnMinAllBlocks.size()) );
00298 
00299     MPI_Comm tmpComm;
00300     par::splitCommUsingSplittingRank(static_cast<int>(m_tnMinAllBlocks.size()),
00301         &tmpComm, m_mpiCommActive);
00302     m_mpiCommActive = tmpComm;
00303 
00304 #ifndef __SILENT_MODE__
00305     if(!m_iRankActive) {
00306       std::cout<<" splitComm: "<<m_iNpesActive<<" -> "<<
00307         (m_tnMinAllBlocks.size())<<" after bPart in DA constructor. "<<std::endl;
00308     }
00309 #endif
00310 
00311     if(m_bIamActive) {
00312       MPI_Comm_size(m_mpiCommActive,&m_iNpesActive);
00313       MPI_Comm_rank(m_mpiCommActive,&m_iRankActive);
00314     } else {
00315       m_iNpesActive = 0;
00316       m_iRankActive = 0;
00317     }
00318 
00319     if(!m_bIamActive) {
00320       RESET_DA_BLOCK
00321         PROF_BUILD_DA_STAGE3_END
00322         return;
00323     }
00324   }//end if new empty procs after bPart
00325 
00326   PROF_BUILD_DA_STAGE3_END
00327 #ifdef __PROF_WITH_BARRIER__
00328     MPI_Barrier(m_mpiCommActive);
00329 #endif
00330   PROF_BUILD_DA_STAGE4_BEGIN
00331 
00332     // Set the Local offset
00333     assert(!in.empty());
00334   assert(m_tnMinAllBlocks.size() == m_iNpesActive);
00335 
00336   m_ptOffset = in[0].getAnchor();
00337 
00338 #ifdef __DEBUG_DA_PUBLIC__
00339   MPI_Barrier(m_mpiCommActive);
00340   //Check that block Part did the right job.
00341   //1. Ensure that in is globally sorted and unique.
00342   assert(par::test::isUniqueAndSorted(in, m_mpiCommActive));
00343 
00344   MPI_Barrier(m_mpiCommActive);
00345   //2. Ensure that the global size before and after block part has not
00346   //changed. So we don't miss out octants
00347   DendroIntL localSizeAfter = in.size();
00348   DendroIntL globalSizeAfter; 
00349   par::Mpi_Allreduce<DendroIntL>(&localSizeAfter, &globalSizeAfter, 1,
00350       MPI_SUM, m_mpiCommActive);
00351 
00352   assert(globalSizeAfter == globalSizeBefore);
00353 
00354   MPI_Barrier(m_mpiCommActive);
00355 
00356   //3. Locally check that if an anchor is hanging then the parent's first
00357   // child exists.
00358   for(unsigned int i=0; i< in.size(); i++) {
00359     if( !(in[i].getFlag() & ot::TreeNode::NODE) ) {
00360       ot::TreeNode anchorOfParent = in[i].getParent().getDFD();
00361       unsigned int idxOfParent;
00362       bool foundAnchorOfParent = seq::maxLowerBound<ot::TreeNode>(in, anchorOfParent, idxOfParent, NULL, NULL);
00363       if(!foundAnchorOfParent) {
00364         std::cout<<m_iRankActive<<" failing for: "<<in[i]<<std::endl<<
00365           "Expected to find: "<<
00366           anchorOfParent.getAncestor(in[i].getLevel())<<std::endl;
00367         assert(false);
00368       }
00369       if( anchorOfParent.getAnchor() != in[idxOfParent].getAnchor() ) {
00370         std::cout<<m_iRankActive<<" failing for: "<<in[i]<<std::endl<<
00371           "Expected to find: "<<
00372           anchorOfParent.getAncestor(in[i].getLevel())<<std::endl;
00373         assert(false);
00374       }
00375       if( in[idxOfParent].getParent() != in[i].getParent() ) {
00376         std::cout<<m_iRankActive<<" failing for: "<<in[i]<<std::endl<<
00377           "Expected to find: "<<
00378           anchorOfParent.getAncestor(in[i].getLevel())<<std::endl;
00379         assert(false);
00380       }
00381     }
00382   }
00383 
00384   MPI_Barrier(m_mpiCommActive);
00385 
00386   if(!m_iRankActive) {
00387     std::cout<<"Partition is correct. Using "<<m_iNpesActive<<" active processors."<<std::endl;
00388   }
00389 
00390   MPI_Barrier(m_mpiCommActive);
00391 #endif
00392 
00393   //  2. Obtain all ghost octants.
00394 
00395 #ifdef __DEBUG_DA_PUBLIC__
00396   MPI_Barrier(m_mpiCommActive);
00397   if(!m_iRankActive) {
00398     std::cout<<"Pick Inter-Processor Boundary Nodes "<<std::endl;
00399   }
00400   MPI_Barrier(m_mpiCommActive);
00401 #endif
00402 
00403   // first let's pick boundary nodes on each proc.
00404   std::vector<ot::TreeNode> allBoundaryLeaves;
00405 
00406   pickInterProcessorBoundaryNodes(in, allBoundaryLeaves, blocks[0], blocks[blocks.size() - 1]);
00407 
00408   ot::TreeNode myFirstOctant = in[0];
00409   ot::TreeNode myLastOctant = in[in.size() - 1];
00410 
00411   includeSiblingsOfBoundary(allBoundaryLeaves, myFirstOctant, myLastOctant);
00412 
00413   PROF_BUILD_DA_STAGE4_END
00414 #ifdef __PROF_WITH_BARRIER__
00415     MPI_Barrier(m_mpiCommActive);
00416 #endif
00417   PROF_BUILD_DA_STAGE5_BEGIN
00418 
00419 #ifdef __DEBUG_DA_PUBLIC__
00420     MPI_Barrier(m_mpiCommActive);
00421   if(!m_iRankActive) {
00422     std::cout<<"Selected Extra Ghost Candidates. Now Checking "<<std::endl;      
00423   }
00424 
00425   assert(seq::test::isUniqueAndSorted<ot::TreeNode>(allBoundaryLeaves));
00426 
00427   unsigned int allBoundaryLeavesSize = allBoundaryLeaves.size();
00428   unsigned int maxBndSz;
00429   par::Mpi_Reduce<unsigned int>(&allBoundaryLeavesSize,&maxBndSz,1,MPI_MAX,0,m_mpiCommActive);
00430 
00431   if(!m_iRankActive) {
00432     std::cout<<"Max Bnd Size:  "<<maxBndSz<<std::endl;      
00433   }
00434 
00435   MPI_Barrier(m_mpiCommActive);
00436 #endif
00437 
00438   PROF_BUILD_DA_STAGE5_END
00439 #ifdef __PROF_WITH_BARRIER__
00440     MPI_Barrier(m_mpiCommActive);
00441 #endif
00442   PROF_BUILD_DA_STAGE6_BEGIN
00443 
00444     //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00445 
00446     // 4. Loop through all local boundary nodes and determine which processors
00447     // need to be aware of those nodes. Create lists that shall be sent.
00448     //
00449 
00450     int *sendCnt = new int[m_iNpesActive];
00451   int *recvCnt = new int[m_iNpesActive];
00452   int *sendOffsets = new int[m_iNpesActive];
00453   int *recvOffsets = new int[m_iNpesActive];
00454 
00455   std::vector< std::vector<unsigned int> > sendNodes;
00456 
00457   prepareAprioriCommMessagesInDAtype2(in, allBoundaryLeaves, blocks, m_tnMinAllBlocks,
00458       m_iRankActive, m_iNpesActive, sendCnt, sendNodes);
00459 
00460   allBoundaryLeaves.clear();
00461   blocks.clear();
00462 
00463 #ifdef __DEBUG_DA_PUBLIC__
00464   MPI_Barrier(m_mpiCommActive);
00465   if(!m_iRankActive) {
00466     std::cout<<"Picked Octants for Apriori Comm. "<<std::endl;
00467   }
00468   MPI_Barrier(m_mpiCommActive);
00469 #endif
00470 
00471   // 5. Actual send/recv. to exchange nodes.
00472   //
00473   // 5a.
00474   // Now do an All2All to get numKeysRecv
00475   par::Mpi_Alltoall<int>( sendCnt, recvCnt, 1, m_mpiCommActive);
00476 
00477   // 5b. Concatenate all nodes into one single Carray ...
00478   unsigned int totalSend=0;
00479   unsigned int totalRecv=0;
00480   for (unsigned int i=0; i < m_iNpesActive; i++) {
00481     totalSend+= sendCnt[i];
00482     totalRecv+= recvCnt[i];
00483   }
00484 
00485   // create the send and recv buffers ...
00486   std::vector<ot::TreeNode> sendK (totalSend);
00487   std::vector<ot::TreeNode> recvK (totalRecv);
00488 
00489   // Now create sendK
00490   sendOffsets[0] = 0;
00491   recvOffsets[0] = 0;   
00492 
00493   // compute offsets ...
00494   for (int i=1; i < m_iNpesActive; i++) {
00495     sendOffsets[i] = sendOffsets[i-1] + sendCnt[i-1];
00496     recvOffsets[i] = recvOffsets[i-1] + recvCnt[i-1];
00497   }
00498 
00499   int myOff = recvOffsets[m_iRankActive];
00500 
00501 #ifdef __DEBUG_DA_PUBLIC__
00502   assert(sendCnt[m_iRankActive] == 0);
00503 #endif
00504 
00505   for (int i=0; i < m_iNpesActive; i++) {
00506     for (unsigned int j=0; j<sendCnt[i]; j++) {
00507       sendK[sendOffsets[i] + j] = in[sendNodes[i][j]];
00508       m_uipScatterMap.push_back(sendNodes[i][j] + myOff);
00509     }
00510   }
00511 
00512 #ifdef __DEBUG_DA_PUBLIC__
00513   MPI_Barrier(m_mpiCommActive);
00514   if(!m_iRankActive) {
00515     std::cout<<"Built Primary ScatterMap. "<<std::endl;
00516   }
00517   MPI_Barrier(m_mpiCommActive);
00518 #endif
00519 
00520   // 5c. Perform SendRecv to send and receive all keys ...
00521 
00522   ot::TreeNode* sendKptr = NULL;
00523   ot::TreeNode* recvKptr = NULL;
00524   if(!sendK.empty()) {
00525     sendKptr = &(*(sendK.begin()));
00526   }
00527   if(!recvK.empty()) {
00528     recvKptr = &(*(recvK.begin()));
00529   }
00530 
00531   par::Mpi_Alltoallv_sparse<ot::TreeNode>( sendKptr, sendCnt, sendOffsets, 
00532       recvKptr, recvCnt, recvOffsets, m_mpiCommActive);
00533 
00534   sendK.clear();
00535   for (unsigned int i=0; i < m_iNpesActive; i++) {
00536     sendNodes[i].clear();
00537   }
00538   sendNodes.clear();
00539 
00540   // Let's store the counts for later synchronizations  ...
00541   //The offsets will be computed just before compression, After primary and
00542   //secondary scatterMaps are merged.
00543   for ( unsigned int i=0; i < m_iNpesActive; i++) {
00544     if ( sendCnt[i] ) {
00545       m_uipSendProcs.push_back(i);
00546       m_uipSendCounts.push_back(sendCnt[i]);
00547 #ifdef __DEBUG_DA_PUBLIC__
00548       assert(i != m_iRankActive);
00549 #endif
00550     }
00551     if ( recvCnt[i] ) {
00552       m_uipRecvProcs.push_back(i);
00553       m_uipRecvCounts.push_back(recvCnt[i]);
00554 #ifdef __DEBUG_DA_PUBLIC__
00555       assert(i != m_iRankActive);
00556 #endif
00557     }
00558   }
00559 
00560   //Free some memory...
00561   delete [] sendCnt;
00562   delete [] recvCnt;
00563   delete [] sendOffsets;
00564   delete [] recvOffsets;
00565 
00566   PROF_BUILD_DA_STAGE6_END
00567 #ifdef __PROF_WITH_BARRIER__
00568     MPI_Barrier(m_mpiCommActive);
00569 #endif
00570   PROF_BUILD_DA_STAGE7_BEGIN
00571 
00572     //Now that we have the ghost nodes (in recvK), we can merge this with the
00573     //input nodes, and all tag all of these as elements. While merging we shall
00574     //also set the right offsets into the global vector.
00575 
00576 #ifdef __DEBUG_DA_PUBLIC__        
00577     MPI_Barrier(m_mpiCommActive);
00578   //std::cout << "Now Merging recv ghosts with own octants, recvK size is " << recvK.size() << std::endl;
00579   //std::cout << "In size is " << in.size() << std::endl;
00580   assert(seq::test::isSorted<ot::TreeNode> (recvK));
00581   MPI_Barrier(m_mpiCommActive);
00582 #endif        
00583 
00584   // Lets store offsets demarcating the global domain spanned by this
00585   // processor.
00586 
00587   ot::TreeNode globalMin, globalMax;
00588   globalMin = in[0];
00589 
00590 
00591   // Since recvK is sorted, and so are the 'global' nodes, we can merge
00592   // effectively. We also mark them as being elements.  
00593 
00594   //Merge (In-place) recieved octants with local octants....
00595   std::vector < ot::TreeNode > localOcts(recvK.size() + in.size() );
00596 
00597 #ifdef __DEBUG_DA_PUBLIC__
00598   MPI_Barrier(m_mpiCommActive);
00599   if(!m_iRankActive) {
00600     std::cout<<"Merging ghosts and own octants into the buffer. "<<std::endl;
00601   }
00602   MPI_Barrier(m_mpiCommActive);
00603 #endif
00604 
00605 
00606   //Note this is an inplace insertion. This is done to preserve the sorted
00607   //order. Each recvK[i] is independently sorted. Also, since the intial
00608   //blocks were globally sorted and since the elements on each processor are
00609   //only decendants of these blocks, hence recvK[i][j] < recvK[i+][k] for all i,j
00610   //and k.
00611   // PreGhost ....
00612   m_uiPreGhostElementSize = 0;
00613   for (int i=0; i<myOff; i++) {
00614     localOcts[i] = recvK[i];
00615     if (!(localOcts[i].getFlag() & ot::TreeNode::BOUNDARY) ) {
00616       m_uiPreGhostElementSize++;
00617     }
00618   }
00619 
00620   //Mine ...
00621   m_uiElementBegin = myOff;
00622   m_uiElementEnd = myOff;
00623   for (int i=myOff; i<myOff+in.size(); i++) {
00624     localOcts[i] = in[i-myOff];
00625     if (!(localOcts[i].getFlag() & ot::TreeNode::BOUNDARY) ) {
00626       m_uiElementEnd++;
00627     } 
00628   }
00629   m_uiElementSize = (m_uiElementEnd - m_uiElementBegin);
00630 
00631   //PostGhosts....
00632   m_uiPostGhostBegin = myOff + static_cast<unsigned int>(in.size());
00633   for (int i=myOff+in.size(); i<localOcts.size(); i++) {
00634     localOcts[i] = recvK[i-in.size()];
00635   }
00636 
00637   // all indices should be set at this stage ...
00638   // free up some memory.
00639   in.clear();
00640   recvK.clear();
00641 
00642   m_uiLocalBufferSize = static_cast<unsigned int>(localOcts.size());
00643 
00644   PROF_BUILD_DA_STAGE7_END
00645 #ifdef __PROF_WITH_BARRIER__
00646     MPI_Barrier(m_mpiCommActive);
00647 #endif
00648   PROF_BUILD_DA_STAGE8_BEGIN
00649 
00650     // Now for the big step. Perform searches to build the look-up table.
00651     // LocalOcts will be modified inside buildNodeList.
00652     // All counters will be reset inside the function.
00653     // The list will continue to remain sorted.
00654 
00655 
00656 #ifdef __DEBUG_DA_PUBLIC__
00657     MPI_Barrier(m_mpiCommActive);
00658   assert(seq::test::isUniqueAndSorted(localOcts));
00659   if(!m_iRankActive) {
00660     std::cout<<" Entering BuildNodeList. "<<std::endl;
00661   }    
00662   MPI_Barrier(m_mpiCommActive);
00663 #endif
00664 
00665   buildNodeList(localOcts);
00666 
00667 #ifdef __DEBUG_DA_PUBLIC__
00668   MPI_Barrier(m_mpiCommActive);
00669   if(!m_iRankActive) {
00670     std::cout<<" Leaving BuildNodeList. "<<std::endl;
00671   }
00672   assert(seq::test::isUniqueAndSorted(localOcts));
00673   MPI_Barrier(m_mpiCommActive);
00674 #endif
00675 
00676   PROF_BUILD_DA_STAGE8_END
00677 #ifdef __PROF_WITH_BARRIER__
00678     MPI_Barrier(m_mpiCommActive);
00679 #endif
00680   PROF_BUILD_DA_STAGE9_BEGIN
00681 
00682     // Set the global offset 
00683     m_ptGhostedOffset = localOcts[0].getAnchor();
00684 
00685   //Compute pre-ghost offsets
00686 #ifdef __DEBUG_DA_PUBLIC__
00687   if ( m_uiElementBegin) {
00688     PreGhostAnchors.push_back( localOcts[0].getAnchor());
00689   }
00690 #endif
00691 
00692   for (unsigned int i = 1; i < m_uiElementBegin; i++) {
00693     unsigned char touchConfig = getTouchConfig(localOcts[i-1],
00694         localOcts[i], m_uiMaxDepth);
00695     m_ucpPreGhostConnectivity.push_back(touchConfig);
00696     if (!touchConfig) {
00697       m_ptsPreGhostOffsets.push_back( localOcts[i].getAnchor() );
00698     }
00699 #ifdef __DEBUG_DA_PUBLIC__
00700     PreGhostAnchors.push_back( localOcts[i].getAnchor());
00701 #endif
00702   }
00703 
00704   // Finally compress the octree, clean up and we are all set.
00705 
00706   // we simply retain the oct levels ...
00707   if(!localOcts.empty()) {
00708     m_ucpOctLevels = new unsigned char [localOcts.size()];
00709   } else {
00710     m_ucpOctLevels = NULL;
00711   }
00712 
00713   for (unsigned int i = 0; i < localOcts.size(); i++) {
00714     m_ucpOctLevels[i] = localOcts[i].getFlag();
00715   }
00716 
00717   // clean up ...
00718   localOcts.clear();
00719 
00720   // Set pointers ....
00721   if(!m_ucpLutRemainders.empty()) {
00722     m_ucpLutRemaindersPtr = &(*m_ucpLutRemainders.begin());
00723   } else {
00724     m_ucpLutRemaindersPtr = NULL;
00725   }
00726 
00727   if(!m_uspLutQuotients.empty()) {
00728     m_uspLutQuotientsPtr = &(*m_uspLutQuotients.begin());
00729   } else {
00730     m_uspLutQuotientsPtr = NULL;
00731   }
00732 
00733   if(!m_ucpLutMasks.empty()) {
00734     m_ucpLutMasksPtr = &(*m_ucpLutMasks.begin());
00735   } else {
00736     m_ucpLutMasksPtr = NULL;
00737   }
00738 
00739   if(!m_ucpSortOrders.empty()) {
00740     m_ucpSortOrdersPtr = &(*m_ucpSortOrders.begin());
00741   } else {
00742     m_ucpSortOrdersPtr = NULL;
00743   }
00744 
00745   if(!m_uiNlist.empty()) {
00746     m_uiNlistPtr = &(*m_uiNlist.begin());
00747   } else {
00748     m_uiNlistPtr = NULL;
00749   }
00750 
00751   PROF_BUILD_DA_STAGE9_END
00752 
00753 #ifdef __DEBUG_DA_PUBLIC__
00754     //Check Loops
00755     MPI_Barrier(m_mpiCommActive);
00756   for( init<ot::DA_FLAGS::WRITABLE>(); curr() < end<ot::DA_FLAGS::WRITABLE>();
00757       next<ot::DA_FLAGS::WRITABLE>() ) {
00758     assert(curr() >= m_uiElementBegin);
00759     assert(curr() < m_uiElementEnd);
00760     unsigned int indices[8];
00761     getNodeIndices(indices);
00762     for(unsigned int vtxId = 0; vtxId < 8; vtxId++) {
00763       assert(indices[vtxId] >= m_uiElementBegin);
00764       assert(indices[vtxId] < m_uiLocalBufferSize);             
00765     }
00766   }
00767   MPI_Barrier(m_mpiCommActive);
00768   if(!m_iRankActive) {
00769     std::cout<<" Finished checking WRITABLE."<<std::endl;
00770   }
00771   MPI_Barrier(m_mpiCommActive);
00772 
00773   for( init<ot::DA_FLAGS::INDEPENDENT>(); curr() < end<ot::DA_FLAGS::INDEPENDENT>();
00774       next<ot::DA_FLAGS::INDEPENDENT>() ) {
00775     assert(curr() >= m_uiElementBegin);
00776     assert(curr() < m_uiElementEnd);
00777     unsigned int indices[8];
00778     getNodeIndices(indices);
00779     for(unsigned int vtxId = 0; vtxId < 8; vtxId++) {
00780       assert(indices[vtxId] >= m_uiElementBegin);       
00781       assert(indices[vtxId] < m_uiPostGhostBegin);              
00782     }
00783   }
00784 
00785   MPI_Barrier(m_mpiCommActive);
00786 
00787   if(!m_iRankActive) {
00788     std::cout<<" Finished checking INDEPENDENT."<<std::endl;
00789   }
00790 
00791   MPI_Barrier(m_mpiCommActive);
00792 
00793   for( init<ot::DA_FLAGS::DEPENDENT>(); curr() < end<ot::DA_FLAGS::DEPENDENT>();
00794       next<ot::DA_FLAGS::DEPENDENT>() ) {
00795     assert(curr() < m_uiElementEnd);
00796     unsigned int indices[8];
00797     getNodeIndices(indices);
00798     unsigned int numLocal = 0;
00799     for(unsigned int vtxId = 0; vtxId < 8; vtxId++) {
00800       assert(indices[vtxId] < m_uiLocalBufferSize);
00801       if( (indices[vtxId] >= m_uiElementBegin) &&  (indices[vtxId] < m_uiPostGhostBegin) ) {
00802         numLocal++;
00803       }         
00804     }
00805     assert(numLocal > 0);
00806     assert(numLocal < 8);
00807   }
00808 
00809   MPI_Barrier(m_mpiCommActive);
00810 
00811   if(!m_iRankActive) {
00812     std::cout<<" Finished checking DEPENDENT."<<std::endl;
00813   }
00814   MPI_Barrier(m_mpiCommActive);
00815 
00816   for( init<ot::DA_FLAGS::W_DEPENDENT>(); curr() < end<ot::DA_FLAGS::W_DEPENDENT>();
00817       next<ot::DA_FLAGS::W_DEPENDENT>() ) {
00818     assert(curr() >= m_uiElementBegin);
00819     assert(curr() < m_uiElementEnd);
00820     unsigned int indices[8];
00821     getNodeIndices(indices);
00822     unsigned int numLocal = 0;
00823     for(unsigned int vtxId = 0; vtxId < 8; vtxId++) {
00824       assert(indices[vtxId] >= m_uiElementBegin);       
00825       assert(indices[vtxId] < m_uiLocalBufferSize);             
00826       if( (indices[vtxId] >= m_uiElementBegin) &&  (indices[vtxId] < m_uiPostGhostBegin) ) {
00827         numLocal++;
00828       }         
00829     }
00830     assert(numLocal > 0);
00831     assert(numLocal < 8);
00832   }
00833 
00834   MPI_Barrier(m_mpiCommActive);
00835 
00836   if(!m_iRankActive) {
00837     std::cout<<" Finished checking W_DEPENDENT."<<std::endl;
00838   }
00839 
00840   MPI_Barrier(m_mpiCommActive);
00841 
00842   for( init<ot::DA_FLAGS::ALL>(); 
00843       curr() < end<ot::DA_FLAGS::ALL>();
00844       next<ot::DA_FLAGS::ALL>() ) {
00845     assert(curr() < m_uiElementEnd);
00846     unsigned int indices[8];
00847     getNodeIndices(indices);
00848     for(unsigned int vtxId = 0; vtxId < 8; vtxId++) {
00849       assert(indices[vtxId] < m_uiLocalBufferSize);             
00850     }
00851   }
00852 
00853   MPI_Barrier(m_mpiCommActive);
00854 
00855   if(!m_iRankActive) {
00856     std::cout<<" Finished checking ALL."<<std::endl;
00857   }
00858 
00859   MPI_Barrier(m_mpiCommActive);
00860 
00861 #endif
00862 
00863 #ifdef __DEBUG_DA_PUBLIC__
00864   MPI_Barrier(m_mpiCommActive);
00865   if(!m_iRankActive) {
00866     std::cout<<" Just At End of DA...."<<std::endl<<std::endl;
00867   }
00868   //std::cout<<m_iRankActive<<" "<<m_uiPreGhostNodeSize<<" "<<m_uiPreGhostBoundaryNodeSize<<" "<<m_uiPostGhostNodeSize<<" "<<m_uiNodeSize<<" "<<m_uiBoundaryNodeSize<<std::endl<<std::endl;
00869   MPI_Barrier(m_mpiCommActive);
00870 #endif
00871 
00872 }//end function
00873 
00874 #undef RESET_DA_BLOCK
00875 
00876 }//end namespace
00877 
00878 
00879 

Generated on Tue Mar 24 16:14:06 2009 for DENDRO by  doxygen 1.3.9.1