Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

Construct.C

Go to the documentation of this file.
00001 
00009 #include "parUtils.h"
00010 #include "TreeNode.h"
00011 #include <cassert>
00012 #include "nodeAndValues.h"
00013 #include "binUtils.h"
00014 #include "dendro.h"
00015 
00016 #ifdef __DEBUG__
00017 #ifndef __DEBUG_OCT__
00018 #define __DEBUG_OCT__
00019 #endif
00020 #endif
00021 
00022 namespace ot {
00023 
00024   int regularGrid2Octree(const std::vector<double>& elementValues,
00025       unsigned int N, unsigned int nx, unsigned int ny, unsigned int nz,
00026       unsigned int xs, unsigned int ys, unsigned int zs, std::vector<TreeNode>& linOct,
00027       unsigned int dim, unsigned int maxDepth, double thresholdFac, MPI_Comm comm) {
00028     PROF_RG2O_BEGIN
00029 
00030       int rank;
00031     int npes;
00032     MPI_Comm_rank(comm, &rank);
00033     MPI_Comm_size(comm, &npes);
00034 
00035     const int MIN_GRAIN_SIZE = 10;
00036 
00037     bool isNvalid = binOp::isPowerOfTwo(N);
00038 
00039     assert(isNvalid);
00040 
00041     unsigned int rgLevel = binOp::fastLog2(N);
00042     unsigned int elemLen = (1u << (maxDepth - rgLevel));
00043 
00044     //1. Convert input to regular octree.
00045     std::vector<ot::NodeAndValues<double, 1> > tmpList(nx*ny*nz);
00046 
00047     for(int k = 0; k < nz; k++) {
00048       for(int j = 0; j < ny; j++) {
00049         for(int i = 0; i < nx; i++) {
00050           unsigned int currX = ((i + xs)*elemLen); 
00051           unsigned int currY = ((j + ys)*elemLen); 
00052           unsigned int currZ = ((k + zs)*elemLen);
00053           unsigned int idx = ((nx*(j + (ny*k))) + i);
00054           tmpList[idx].node = ot::TreeNode(currX, currY, currZ, rgLevel, dim, maxDepth);
00055           tmpList[idx].values[0] = elementValues[idx];
00056         }//end for i
00057       }//end for j
00058     }//end for k
00059 
00060     //2. Convert to Morton Ordering
00061     std::vector<ot::NodeAndValues<double, 1> > tnAndValsList;
00062     par::sampleSort<ot::NodeAndValues<double, 1> >(tmpList, tnAndValsList, comm);
00063     tmpList.clear();
00064 
00065     DendroIntL inSz = tnAndValsList.size();
00066     DendroIntL globInSize;
00067     par::Mpi_Allreduce<DendroIntL>(&inSz, &globInSize, 1, MPI_SUM, comm);
00068 
00069     bool repeatLoop = true;
00070     int npesCurr = npes;
00071     MPI_Comm commCurr = comm;
00072     if(globInSize < (MIN_GRAIN_SIZE*npes)) {
00073       int splittingSize = (globInSize/MIN_GRAIN_SIZE); 
00074       if(splittingSize == 0) {
00075         splittingSize = 1; 
00076       }
00077 
00078       unsigned int avgLoad = (globInSize/splittingSize);
00079       int leftOvers = (globInSize - (splittingSize*avgLoad));
00080 
00081       if(rank >= splittingSize) {
00082         par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, 0, commCurr);
00083       }else if(rank < leftOvers) {
00084         par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, (avgLoad + 1), commCurr);
00085       }else {
00086         par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, avgLoad, commCurr);
00087       }
00088       tnAndValsList = tmpList;
00089       tmpList.clear();
00090 
00091       MPI_Comm newComm;
00092       par::splitCommUsingSplittingRank(splittingSize, &newComm, commCurr);
00093       commCurr = newComm;
00094       npesCurr = splittingSize;
00095 
00096       if(rank >= splittingSize) {
00097         repeatLoop = false;
00098       }
00099     } else {
00100       par::partitionW<ot::NodeAndValues<double, 1> >(tnAndValsList, NULL, comm);
00101     }
00102 
00103     if(globInSize < MIN_GRAIN_SIZE) {
00104       repeatLoop = false; 
00105     }
00106 
00107     while(repeatLoop) {
00108 
00109       inSz = tnAndValsList.size();
00110 
00111       assert(inSz >= MIN_GRAIN_SIZE);
00112 
00113       MPI_Request requests[4];
00114 
00115       //Send first 7 to previous processor
00116       //Send last 7 to next processor
00117       ot::NodeAndValues<double, 1> prevOcts[7];
00118       if( rank ) {
00119         par::Mpi_Irecv<ot::NodeAndValues<double, 1> >(prevOcts, 7, (rank - 1), 
00120             1, commCurr, &(requests[0]));
00121       }
00122 
00123       ot::NodeAndValues<double, 1> nextOcts[7];
00124       if( rank < (npesCurr - 1) ) {
00125         par::Mpi_Irecv<ot::NodeAndValues<double, 1> >(nextOcts, 7, (rank + 1),
00126             1, commCurr, &(requests[1]));
00127       }
00128 
00129       if( rank ) {
00130         par::Mpi_Issend<ot::NodeAndValues<double, 1> >((&(*(tnAndValsList.begin()))), 7,
00131             (rank - 1), 1, commCurr, &(requests[2]));
00132       }
00133 
00134       if( rank < (npesCurr - 1) ) {
00135         par::Mpi_Issend<ot::NodeAndValues<double, 1> >((&(*(tnAndValsList.end() - 7))), 7,
00136             (rank + 1), 1, commCurr, &(requests[3]));
00137       }
00138 
00139       MPI_Status statuses[4];
00140 
00141       if( rank ) {
00142         MPI_Wait(&(requests[0]), &(statuses[0]));
00143         MPI_Wait(&(requests[2]), &(statuses[2]));
00144       }
00145 
00146       if( rank < (npesCurr - 1) ) {
00147         MPI_Wait(&(requests[1]), &(statuses[1]));
00148         MPI_Wait(&(requests[3]), &(statuses[3]));
00149       }
00150 
00151       //Check and coarsen
00152       int idxOfPrevFC = -1;
00153       if( rank ) {
00154         for(int i = 0; i < 7; i++) {
00155           //There may be more than 1 FC, we only need the last one here
00156           if(prevOcts[i].node.getChildNumber() == 0) {
00157             idxOfPrevFC = i;
00158           }
00159         }//end for i
00160       }
00161 
00162       int idxOfNextFC = -1;
00163       if( rank < (npesCurr - 1) ) {
00164         for(int i = 0; i < 7; i++) {
00165           //There may be more than 1 FC, we only need the first one here
00166           if(nextOcts[i].node.getChildNumber() == 0) {
00167             idxOfNextFC = i;
00168             break;
00169           }
00170         }//end for i
00171       }
00172 
00173       int myFirstFC = -1;
00174       for(int i = 0; i < inSz; i++) {
00175         if(tnAndValsList[i].node.getChildNumber() == 0) {
00176           myFirstFC = i;
00177           break;
00178         }
00179       }//end for i
00180 
00181       int myLastFC = -1;
00182       if( myFirstFC >= 0 ) {
00183         for(int i = (inSz - 1); i >= 0; i--) {
00184           if(tnAndValsList[i].node.getChildNumber() == 0) {
00185             myLastFC = i;
00186             break;
00187           }
00188         }//end for i
00189       }
00190 
00191       //Process upto myFirstFC (exclusive)
00192       //Note, we check the threshold on both the processors so that the
00193       //partition is not changed unnecessarily. 
00194       if( (myFirstFC >= 0) && (idxOfPrevFC >= 0) ) {
00195         int fcGap = (myFirstFC + 7 - idxOfPrevFC);
00196         if( fcGap < 8 ) {
00197           //Can not coarsen
00198           if(myFirstFC) {
00199             tmpList.insert(tmpList.end(), tnAndValsList.begin(),
00200                 tnAndValsList.begin() + myFirstFC);
00201           }
00202         } else {
00203           //fcGap >= 8
00204           //Can coarsen upto (excluding) (1 + idxOfPrevFC) 
00205           double minVal = prevOcts[idxOfPrevFC].values[0];
00206           double maxVal = prevOcts[idxOfPrevFC].values[0];
00207           for(int i = idxOfPrevFC; i < 7; i++){
00208             double currVal = prevOcts[i].values[0];
00209             if(currVal < minVal) {
00210               minVal = currVal;
00211             }
00212             if(currVal > maxVal) {
00213               maxVal = currVal;
00214             }
00215           }//end for i
00216           for(int i = 0; i < (1 + idxOfPrevFC); i++){
00217             double currVal = tnAndValsList[i].values[0];
00218             if(currVal < minVal) {
00219               minVal = currVal;
00220             }
00221             if(currVal > maxVal) {
00222               maxVal = currVal;
00223             }
00224           }//end for i
00225           if((maxVal - minVal) >= thresholdFac) {
00226             //Can't coarsen
00227             if(myFirstFC) {
00228               tmpList.insert(tmpList.end(), tnAndValsList.begin(),
00229                   tnAndValsList.begin() + myFirstFC);
00230             }
00231           } else {
00232             //Can coarsen. The previous processor will add the parent
00233             if((1 + idxOfPrevFC) < myFirstFC) {
00234               tmpList.insert(tmpList.end(), tnAndValsList.begin() + (1 + idxOfPrevFC), 
00235                   tnAndValsList.begin() + myFirstFC);
00236             }//if to skip equality
00237           }//end check threshold
00238         }//end check fcGap
00239       } else {
00240         if(myFirstFC >= 0) {
00241           //Can not coarsen
00242           //The prev processor has no FC. Since the prev processor has more
00243           //than 8 elements, our elements will remain.  
00244           if(myFirstFC) {
00245             tmpList.insert(tmpList.end(), tnAndValsList.begin(),
00246                 tnAndValsList.begin() + myFirstFC);
00247           }
00248         } else if (idxOfPrevFC >= 0) {
00249           //Can coarsen upto (excluding) (1 + idxOfPrevFC) 
00250           double minVal = prevOcts[idxOfPrevFC].values[0];
00251           double maxVal = prevOcts[idxOfPrevFC].values[0];
00252           for(int i = idxOfPrevFC; i < 7; i++){
00253             double currVal = prevOcts[i].values[0];
00254             if(currVal < minVal) {
00255               minVal = currVal;
00256             }
00257             if(currVal > maxVal) {
00258               maxVal = currVal;
00259             }
00260           }//end for i
00261           for(int i = 0; i < (1 + idxOfPrevFC); i++){
00262             double currVal = tnAndValsList[i].values[0];
00263             if(currVal < minVal) {
00264               minVal = currVal;
00265             }
00266             if(currVal > maxVal) {
00267               maxVal = currVal;
00268             }
00269           }//end for i
00270           if((maxVal - minVal) >= thresholdFac) {
00271             //Can't coarsen.
00272             tmpList = tnAndValsList;
00273           } else {
00274             //Can coarsen. The previous processor will add the parent 
00275             tmpList.insert(tmpList.end(), tnAndValsList.begin() + (1 + idxOfPrevFC), 
00276                 tnAndValsList.end());
00277           }
00278         } else {
00279           //Can not coarsen
00280           tmpList = tnAndValsList;
00281         }
00282       }
00283 
00284       //Process myFirstFC (inclusive) to myLastFC (exclusive)
00285       int prevFCidx = myFirstFC;
00286       for(int idx = myFirstFC + 1; idx <= myLastFC; idx++) {
00287         if(tnAndValsList[idx].node.getChildNumber() == 0) {
00288           int fcGap = (idx - prevFCidx);
00289           if(fcGap < 8) {
00290             //Can't coarsen
00291             tmpList.insert(tmpList.end(), tnAndValsList.begin() + prevFCidx,
00292                 tnAndValsList.begin() + idx);
00293           } else {
00294             //fcGap >= 8
00295             //Can coarsen upto (excluding) (8 + prevFCidx)
00296             double minVal = tnAndValsList[prevFCidx].values[0];
00297             double maxVal = tnAndValsList[prevFCidx].values[0];
00298             double sumVal = 0;
00299             for(int j = prevFCidx; j < (8 + prevFCidx); j++) {
00300               double currVal = tnAndValsList[j].values[0];
00301               if(currVal < minVal) {
00302                 minVal = currVal;
00303               }
00304               if(currVal > maxVal) {
00305                 maxVal = currVal;
00306               }
00307               sumVal += currVal;
00308             }//end for j 
00309             if((maxVal - minVal) >= thresholdFac) {
00310               //Can't coarsen.
00311               tmpList.insert(tmpList.end(), tnAndValsList.begin() + prevFCidx,
00312                   tnAndValsList.begin() + idx);
00313             } else {
00314               //Can coarsen. 
00315               ot::NodeAndValues<double, 1> tmpObj;
00316               tmpObj.node = tnAndValsList[prevFCidx].node.getParent();
00317               tmpObj.values[0] = (sumVal/8.0);
00318               tmpList.push_back(tmpObj);
00319               if((prevFCidx + 8) < idx) {
00320                 tmpList.insert(tmpList.end(), tnAndValsList.begin() + prevFCidx + 8,
00321                     tnAndValsList.begin() + idx);
00322               }//if to skip equality
00323             }
00324           }//end check fcGap
00325           prevFCidx = idx;
00326         }//end if found FC
00327       }//end for idx
00328 
00329       //Process myLastFC (inclusive) to end
00330       if( (myLastFC >= 0) && (idxOfNextFC >= 0) ) {
00331         int fcGap = inSz + idxOfNextFC - myLastFC;
00332         if(fcGap < 8) {
00333           //Can't coarsen
00334           tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC,
00335               tnAndValsList.end());
00336         } else {
00337           //fcGap >= 8
00338           //Can coarsen upto (excluding) (myLastFC + 8)
00339           double minVal = tnAndValsList[myLastFC].values[0];
00340           double maxVal = tnAndValsList[myLastFC].values[0];
00341           double sumVal = 0;
00342           if((myLastFC + 8) > inSz) {
00343             for(int i = myLastFC; i < inSz; i++){
00344               double currVal = tnAndValsList[i].values[0];
00345               if(currVal < minVal) {
00346                 minVal = currVal;
00347               }
00348               if(currVal > maxVal) {
00349                 maxVal = currVal;
00350               }
00351               sumVal += currVal;
00352             }//end for i
00353             for(int i = 0; i < (myLastFC + 8 - inSz); i++){
00354               double currVal = nextOcts[i].values[0];
00355               if(currVal < minVal) {
00356                 minVal = currVal;
00357               }
00358               if(currVal > maxVal) {
00359                 maxVal = currVal;
00360               }
00361               sumVal += currVal;
00362             }//end for i         
00363           } else {
00364             for(int i = myLastFC; i < (myLastFC + 8); i++){
00365               double currVal = tnAndValsList[i].values[0];
00366               if(currVal < minVal) {
00367                 minVal = currVal;
00368               }
00369               if(currVal > maxVal) {
00370                 maxVal = currVal;
00371               }
00372               sumVal += currVal;
00373             }//end for i
00374           }
00375           if((maxVal - minVal) >= thresholdFac) {
00376             //Can't coarsen
00377             tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC,
00378                 tnAndValsList.end());
00379           } else {
00380             //Can coarsen
00381             ot::NodeAndValues<double, 1> tmpObj;
00382             tmpObj.node = tnAndValsList[myLastFC].node.getParent();
00383             tmpObj.values[0] = (sumVal/8.0);
00384             tmpList.push_back(tmpObj);
00385             if((myLastFC + 8) < inSz) {
00386               tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC + 8,
00387                   tnAndValsList.end());
00388             }
00389           }//end check threshold
00390         }//end check fcGap
00391       } else {
00392         if(myLastFC >= 0) {
00393           //Can coarsen upto (excluding) (myLastFC + 8)
00394           double minVal = tnAndValsList[myLastFC].values[0];
00395           double maxVal = tnAndValsList[myLastFC].values[0];
00396           double sumVal = 0;
00397           if((myLastFC + 8) > inSz) {
00398             for(int i = myLastFC; i < inSz; i++){
00399               double currVal = tnAndValsList[i].values[0];
00400               if(currVal < minVal) {
00401                 minVal = currVal;
00402               }
00403               if(currVal > maxVal) {
00404                 maxVal = currVal;
00405               }
00406               sumVal += currVal;
00407             }//end for i
00408             if(rank < (npesCurr - 1)) {
00409               for(int i = 0; i < (myLastFC + 8 - inSz); i++){
00410                 double currVal = nextOcts[i].values[0];
00411                 if(currVal < minVal) {
00412                   minVal = currVal;
00413                 }
00414                 if(currVal > maxVal) {
00415                   maxVal = currVal;
00416                 }
00417                 sumVal += currVal;
00418               }//end for i         
00419             }//end if last proc
00420           } else {
00421             for(int i = myLastFC; i < (myLastFC + 8); i++){
00422               double currVal = tnAndValsList[i].values[0];
00423               if(currVal < minVal) {
00424                 minVal = currVal;
00425               }
00426               if(currVal > maxVal) {
00427                 maxVal = currVal;
00428               }
00429               sumVal += currVal;
00430             }//end for i
00431           } 
00432           if((maxVal - minVal) >= thresholdFac) {
00433             //Can't coarsen
00434             tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC,
00435                 tnAndValsList.end());
00436           } else {
00437             //Can coarsen
00438             ot::NodeAndValues<double, 1> tmpObj;
00439             tmpObj.node = tnAndValsList[myLastFC].node.getParent();
00440             tmpObj.values[0] = (sumVal/8.0);
00441             tmpList.push_back(tmpObj);
00442             if((myLastFC + 8) < inSz) {
00443               tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC + 8,
00444                   tnAndValsList.end());
00445             }
00446           }
00447         } else {
00448           //This case is the same as myFirstFC < 0 and it has been taken care
00449           //of earlier. 
00450         }
00451       }
00452 
00453       DendroIntL outSz = tmpList.size();
00454       DendroIntL globOutSize;
00455       par::Mpi_Allreduce<DendroIntL>(&outSz, &globOutSize, 1, MPI_SUM, commCurr);
00456 
00457       //TESTING
00458       if(!rank) {
00459         std::cout<<"In RG2O: globInSize: "<<globInSize<<" globOutSize: "
00460           <<globOutSize<<" npesCurr: "<<npesCurr<<std::endl;
00461       }
00462 
00463       if(globOutSize == globInSize) {
00464         repeatLoop = false;
00465       }
00466 
00467       if(globOutSize < MIN_GRAIN_SIZE) {
00468         repeatLoop = false; 
00469       }
00470 
00471       //prepare for next iteration...
00472       globInSize = globOutSize;
00473       tnAndValsList = tmpList;
00474       tmpList.clear();
00475 
00476       if(globInSize < (MIN_GRAIN_SIZE*npesCurr)) {
00477         int splittingSize = (globInSize/MIN_GRAIN_SIZE); 
00478         if(splittingSize == 0) {
00479           splittingSize = 1; 
00480         }
00481 
00482         unsigned int avgLoad = (globInSize/splittingSize);
00483         int leftOvers = (globInSize - (splittingSize*avgLoad));
00484 
00485         if(rank >= splittingSize) {
00486           par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, 0, commCurr);
00487         }else if(rank < leftOvers) {
00488           par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, (avgLoad + 1), commCurr);
00489         }else {
00490           par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, avgLoad, commCurr);
00491         }
00492         tnAndValsList = tmpList;
00493         tmpList.clear();
00494 
00495         MPI_Comm newComm;
00496         par::splitCommUsingSplittingRank(splittingSize, &newComm, commCurr);
00497         commCurr = newComm;
00498         npesCurr = splittingSize;
00499 
00500         if(rank >= splittingSize) {
00501           repeatLoop = false;
00502         }
00503       } else {
00504         par::partitionW<ot::NodeAndValues<double, 1> >(tnAndValsList, NULL, commCurr);
00505       } //end if splitting comm
00506 
00507     }//end while
00508 
00509     linOct.clear();
00510     for(int i = 0; i < tnAndValsList.size(); i++) {
00511       linOct.push_back(tnAndValsList[i].node);
00512     }//end for i
00513 
00514     PROF_RG2O_END
00515   }//end function
00516 
00517   //New Implementation. Written on April 20, 2008
00518   int completeOctree(const std::vector<TreeNode > & inp,
00519       std::vector<TreeNode > & out, 
00520       unsigned int dim, unsigned int maxDepth, bool isUnique,
00521       bool isSorted, bool assertNoEmptyProcs, MPI_Comm comm) {
00522 
00523     PROF_N2O_BEGIN
00524 
00525       TreeNode root (dim, maxDepth);
00526 
00527     int size;
00528     MPI_Comm_size(comm, &size);
00529 
00530     if(size == 1) {
00531       completeSubtree(root, inp, out, dim, maxDepth, isUnique, isSorted);
00532       PROF_N2O_END
00533     }//end single proc case     
00534 
00535     out = inp;
00536 
00537     //Sort and remove duplicate leaves.
00538     if (!isUnique) {
00539       par::removeDuplicates<ot::TreeNode>(out, isSorted, comm);
00540     } else if (!isSorted) {
00541       std::vector<TreeNode> tmpOut;
00542       par::sampleSort<ot::TreeNode>(out, tmpOut,comm); 
00543       out = tmpOut;
00544       tmpOut.clear();
00545     }
00546 
00547     //Remove empty processors...    
00548     MPI_Comm   new_comm;
00549     if(assertNoEmptyProcs) {
00550       new_comm = comm;
00551       assert(!out.empty());
00552     } else {
00553       par::splitComm2way(out.empty(), &new_comm, comm);
00554     }
00555 
00556     if(!(out.empty())) {        
00557       int new_rank, new_size; 
00558 
00559       MPI_Comm_rank (new_comm, &new_rank);
00560       MPI_Comm_size (new_comm, &new_size);
00561 
00562       MPI_Request requestSend;    
00563       MPI_Request requestRecv;
00564 
00565       ot::TreeNode begBuf;
00566       ot::TreeNode lastElem;
00567 
00568       if(new_rank) {
00569         //Recv
00570         par::Mpi_Irecv<ot::TreeNode>(&begBuf, 1, (new_rank-1) , 1, new_comm, &requestRecv);
00571       }//end if not P0
00572 
00573       if (new_rank < (new_size-1)) {
00574         lastElem = out[out.size() - 1];
00575         //Send
00576         par::Mpi_Issend<ot::TreeNode>(&lastElem, 1, (new_rank+1), 1, new_comm, &requestSend);
00577       }//end if not PN 
00578 
00579 
00580       //Add missing corners to complete the region.
00581       //Add the first corner leaf on the first processor.
00582       if (new_rank == 0) {
00583         ot::TreeNode minCorner(0,0,0,maxDepth,dim,maxDepth);
00584 #ifdef __DEBUG_OCT__
00585         assert(areComparable(out[0], minCorner));
00586 #endif
00587         if ( (out[0] != minCorner) && (!out[0].isAncestor(minCorner)) ) {
00588           ot::TreeNode ncaTmp = getNCA(out[0],minCorner);
00589           std::vector<ot::TreeNode>  kids;
00590           ncaTmp.addChildren(kids);
00591           out.insert(out.begin(),kids[0]);
00592           kids.clear();
00593         }//end if
00594       }//end if
00595 
00596       //Add the last corner leaf on the last processor.
00597       if (new_rank == (new_size-1) ) {
00598         ot::TreeNode maxCorner(((1u << maxDepth)-1),
00599             ((1u << maxDepth)-1),((1u << maxDepth)-1),maxDepth,dim,maxDepth);
00600 #ifdef __DEBUG_OCT__
00601         assert(areComparable(out[out.size()-1], maxCorner));
00602 #endif
00603         if ( (out[out.size()-1] != maxCorner) && 
00604             (!out[out.size()-1].isAncestor(maxCorner)) ) {
00605           ot::TreeNode ncaTmp =  getNCA(out[out.size()-1],maxCorner);
00606           std::vector<ot::TreeNode> kids;
00607           ncaTmp.addChildren(kids);
00608           out.insert(out.end(),kids[(1 << dim)-1]);
00609           kids.clear();
00610         }//end if
00611       }//end if
00612 
00613       std::vector<ot::TreeNode> tmpList;
00614       for(unsigned int i = 0; i < (out.size()-1); i++) {
00615 #ifdef __DEBUG_OCT__
00616         assert(areComparable(out[i], out[i+1]));
00617 #endif
00618         if(out[i].isAncestor(out[i+1])) {
00619           appendCompleteRegion(out[i], out[i+1], tmpList, false, false);
00620         } else {
00621           appendCompleteRegion(out[i], out[i+1], tmpList, true, false);
00622         }
00623       }//end for i
00624 
00625       //Only the last processor adds the last element. All the other processors would have 
00626       //sent it to the next processor, which will add it if it is not an ancestor of 
00627       //the first element on that processor
00628       if(new_rank == (new_size - 1) ) {
00629         tmpList.push_back(out[out.size() - 1]);
00630       }
00631 
00632       if(new_rank) {
00633         MPI_Status statusWait;
00634         MPI_Wait(&requestRecv, &statusWait);
00635 
00636         std::vector<ot::TreeNode> begList;
00637 #ifdef __DEBUG_OCT__
00638         assert(areComparable(begBuf, out[0]));
00639 #endif
00640         if(begBuf.isAncestor(out[0])) {
00641           appendCompleteRegion(begBuf, out[0], begList, false, false);
00642         } else {
00643           appendCompleteRegion(begBuf, out[0], begList, true, false);
00644         }
00645         out = begList;
00646         begList.clear();
00647         out.insert(out.end(), tmpList.begin(), tmpList.end());
00648       } else {
00649         out = tmpList;
00650       }
00651 
00652       tmpList.clear();
00653 
00654       if(new_rank < (new_size-1)) {
00655         MPI_Status statusWait;
00656         MPI_Wait(&requestSend, &statusWait);
00657       }
00658 
00659     } //out not empty
00660 
00661 
00662     PROF_N2O_END
00663 
00664   }//end function
00665 
00666   //New Implementation. Written on April 19, 2008
00667   int completeSubtree(TreeNode block, const std::vector<TreeNode > & inp, std::vector<TreeNode > & out,
00668       unsigned int dim, unsigned int maxDepth, bool isUnique, bool isSorted) {
00669 
00670     PROF_N2O_SEQ_BEGIN
00671 
00672       out = inp;
00673     //Sort and remove duplicate leaves.
00674     if (!isUnique) {
00675       seq::makeVectorUnique<TreeNode>(out, isSorted) ;
00676     } else if (!isSorted) {
00677       sort(out.begin(),out.end());
00678     }
00679 
00680     if (!out.empty()) {
00681       //Add missing corners to complete the region.
00682       //Add the first corner .
00683       ot::TreeNode minCorner = block.getDFD();
00684 #ifdef __DEBUG_OCT__
00685       assert(areComparable(out[0], minCorner));
00686 #endif
00687       if ( (out[0] != minCorner) && (!out[0].isAncestor(minCorner)) ) {
00688         ot::TreeNode ncaTmp = getNCA(out[0],minCorner);
00689         std::vector<ot::TreeNode> kids;
00690         ncaTmp.addChildren(kids);
00691         out.insert(out.begin(),kids[0]);
00692         kids.clear();
00693       }//end if
00694 
00695       //Add the last corner.
00696       ot::TreeNode maxCorner = block.getDLD();
00697 #ifdef __DEBUG_OCT__
00698       assert(areComparable(out[out.size()-1], maxCorner));
00699 #endif
00700       if ( (out[out.size()-1] != maxCorner) && (!out[out.size()-1].isAncestor(maxCorner)) ) {
00701         ot::TreeNode ncaTmp = getNCA(out[out.size()-1],maxCorner);
00702         std::vector<ot::TreeNode> kids;
00703         ncaTmp.addChildren(kids);
00704         out.insert(out.end(),kids[(1 << dim)-1]);
00705         kids.clear();
00706       }//end if
00707 
00708       std::vector<ot::TreeNode> tmpList;
00709       for(unsigned int i = 0; i < (out.size()-1); i++) {
00710 #ifdef __DEBUG_OCT__
00711         assert(areComparable(out[i], out[i+1]));
00712 #endif
00713         if(out[i].isAncestor(out[i+1])) {
00714           appendCompleteRegion(out[i], out[i+1], tmpList, false, false);
00715         } else {
00716           appendCompleteRegion(out[i], out[i+1], tmpList, true, false);
00717         }
00718       }//end for i
00719 
00720       tmpList.push_back(out[out.size() - 1]);
00721 
00722       out = tmpList;
00723       tmpList.clear();
00724     }//end if out empty
00725 
00726     PROF_N2O_SEQ_END
00727 
00728   }//end function
00729 
00730   int points2Octree(std::vector<double>& pts, double * gLens, std::vector<TreeNode> & nodes,
00731       unsigned int dim, unsigned int maxDepth, unsigned int maxNumPts, MPI_Comm comm ) {
00732 
00733     PROF_P2O_BEGIN
00734 
00735       int size;
00736     MPI_Comm_size(comm, &size);
00737 
00738     //Sequential...
00739     if (size == 1) {
00740       points2OctreeSeq(pts, gLens, nodes, dim, maxDepth, maxNumPts); 
00741       PROF_P2O_END
00742     }//end if sequential
00743 
00744     int rank;
00745     MPI_Comm_rank(comm, &rank);
00746 
00747     TreeNode root (dim, maxDepth);
00748 
00749     if (maxDepth == 0) {
00750       if (!rank) {
00751         nodes.resize(1);
00752         nodes[0] = root;
00753       } else {
00754         nodes.resize(0);
00755       }
00756       PROF_P2O_END
00757     }
00758 
00759     unsigned int ptsLen = pts.size();
00760     assert( (dim == 1) || (dim == 2) || (dim == 3 ));
00761     assert( (ptsLen%dim) == 0);
00762 
00763     DendroIntL numNodes = (ptsLen/dim);
00764 
00765     DendroIntL totSize;
00766     par::Mpi_Allreduce<DendroIntL>(&numNodes, &totSize, 1, MPI_SUM, comm);
00767 
00768     if (totSize <= static_cast<DendroIntL>(maxNumPts)) {
00769       if (!rank) {
00770         nodes.resize(1);
00771         nodes[0] = root;
00772       } else {
00773         nodes.resize(0);
00774       }//end if-else
00775       PROF_P2O_END
00776     }//end if
00777 
00778     //Tackle small problems separately....
00779     //min Grain size = 1000 
00780     const DendroIntL THOUSAND = 1000;
00781     if (totSize < (THOUSAND*size)) {
00782       int splittingSize = (totSize/THOUSAND); 
00783 
00784       if(splittingSize == 0) {
00785         splittingSize = 1; 
00786       }
00787 
00788       unsigned int avgLoad = (totSize/splittingSize);
00789       int leftOvers = (totSize - (splittingSize*avgLoad));
00790 
00791       std::vector<double> tmpPts;
00792       if(rank >= splittingSize) {
00793         par::scatterValues<double>(pts, tmpPts, 0, comm);
00794       }else if(rank < leftOvers) {
00795         par::scatterValues<double>(pts, tmpPts, (dim*(avgLoad+1)), comm);
00796       }else {
00797         par::scatterValues<double>(pts, tmpPts, (dim*avgLoad), comm);
00798       }
00799       pts.clear();
00800 
00801       MPI_Comm newComm;
00802       par::splitCommUsingSplittingRank(splittingSize, &newComm, comm);
00803 
00804 #ifndef __SILENT_MODE__
00805       if(!rank) {
00806         std::cout<<"Input to p2o is small ("<<totSize
00807           <<"). npes = "<<size<<" Splitting Comm. "<<std::endl;
00808       }
00809 #endif
00810 
00811       if(rank < splittingSize) {
00812         points2Octree(tmpPts, gLens, nodes, dim, maxDepth, maxNumPts, newComm); 
00813       }
00814       tmpPts.clear();
00815 
00816       PROF_P2O_END
00817     }//reduce procs for small problems
00818 
00819     //Tackle large problems.... 
00820 
00821     nodes.resize(numNodes);
00822 
00823     for (DendroIntL i = 0; i < numNodes; i++) {
00824       //The constructor will ignore unnecessary arguments (for lower
00825       //dimensions).
00826       unsigned int px = (unsigned int)(pts[i*dim]*((double)(1u << maxDepth))/(gLens[0]));
00827       unsigned int py, pz = 0;
00828       if(dim > 1) {
00829         py = (unsigned int)(pts[(i*dim)+1]*((double)(1u << maxDepth))/gLens[1]);
00830         if(dim > 2) {
00831           pz = (unsigned int)(pts[(i*dim)+2]*((double)(1u << maxDepth))/gLens[2]);
00832         }
00833       }
00834       nodes[i] = TreeNode(px, py, pz, maxDepth, dim, maxDepth);
00835     }//end for
00836 
00837     pts.clear(); 
00838 
00839     std::vector<ot::TreeNode> tmpNodes ;
00840 
00841     //Sort nodes (pts.) and partition them.
00842     par::sampleSort<ot::TreeNode>(nodes, tmpNodes, comm); 
00843     nodes = tmpNodes;
00844     tmpNodes.clear();
00845 
00846     std::vector<ot::TreeNode> leaves;
00847     std::vector<ot::TreeNode> minsAllBlocks;
00848 
00849     blockPartStage1_p2o(nodes, leaves, dim, maxDepth, comm);
00850     blockPartStage2_p2o(nodes, leaves, minsAllBlocks, dim, maxDepth, comm);
00851     //leaves will be sorted.
00852 
00853     p2oLocal(nodes, leaves, maxNumPts, dim, maxDepth);
00854 
00855     PROF_P2O_END
00856 
00857   }//end function
00858 
00859   //Added on April 19, 2008
00860   int points2OctreeSeq(std::vector<double>& pts, double * gLens, std::vector<TreeNode> & nodes,
00861       unsigned int dim, unsigned int maxDepth, unsigned int maxNumPts) {
00862 
00863     PROF_P2O_SEQ_BEGIN
00864 
00865       if (maxDepth == 0) {    
00866         nodes.resize(1);
00867         nodes[0] = TreeNode(dim,maxDepth);
00868         PROF_P2O_SEQ_END
00869       }
00870 
00871     unsigned int ptsLen = pts.size();
00872     assert( (dim == 1) || (dim == 2) || (dim == 3 ));
00873     assert( (ptsLen%dim) == 0);
00874 
00875     TreeNode root (dim,maxDepth);
00876     unsigned int numNodes = ptsLen/dim;
00877     nodes.resize(numNodes);
00878 
00879     for (unsigned int i = 0; i < numNodes; i++) {
00880       //The constructor will ignore unnecessary arguments (for lower
00881       //dimensions).
00882       unsigned int px = (unsigned int)(pts[i*dim]*((double)(1u << maxDepth))/(gLens[0]));
00883       unsigned int py, pz = 0;
00884       if(dim > 1) {
00885         py = (unsigned int)(pts[(i*dim)+1]*((double)(1u << maxDepth))/gLens[1]);
00886         if(dim > 2) {
00887           pz = (unsigned int)(pts[(i*dim)+2]*((double)(1u << maxDepth))/gLens[2]);
00888         }
00889       }
00890       nodes[i] = TreeNode(px, py, pz, maxDepth, dim, maxDepth);
00891     }//end for
00892 
00893     pts.clear(); 
00894 
00895     unsigned int totSize = nodes.size();
00896 
00897     if (totSize <= maxNumPts) {
00898       nodes.resize(1);
00899       nodes[0] = root;
00900       PROF_P2O_SEQ_END
00901     }//end if
00902 
00903     //Sort nodes (pts.) and partition them.  
00904     sort(nodes.begin(), nodes.end());
00905 
00906     std::vector<TreeNode> leaves;
00907     leaves.push_back(root);
00908 
00909     p2oLocal(nodes, leaves, maxNumPts, dim, maxDepth);
00910 
00911     PROF_P2O_SEQ_END
00912 
00913   }//end function
00914 
00915   int p2oLocal(std::vector<TreeNode> & nodes, std::vector<TreeNode>& leaves,
00916       unsigned int maxNumPts, unsigned int dim, unsigned int maxDepth) {
00917     PROF_P2O_LOCAL_BEGIN
00918 
00919       std::vector<TreeNode> nodesDup = nodes;
00920     nodes.clear();
00921 
00922     std::vector<TreeNode> addList;
00923     std::vector<TreeNode> nodesTmp;
00924 
00925     std::vector<TreeNode> *nodeSrc = &nodes;
00926     std::vector<TreeNode> *nodeDest = &nodesTmp;
00927     std::vector<TreeNode> *leafSrc = &leaves;
00928     std::vector<TreeNode> *leafDest = &addList;
00929 
00930     //The source and destination alternate for each iteration.
00931     do {
00932       //Leaves remain sorted inside this loop!
00933       //Reset all wts to 0.
00934       for (unsigned int i = 0; i < leafSrc->size(); i++) {
00935         (*leafSrc)[i].setWeight(0);
00936       }
00937 
00938       //nodesDup and leaves are both sorted at this point.
00939       unsigned int nextNode = 0;
00940       unsigned int nextPt = 0;
00941       //Some elements of nodesDup are not inside any element in leaves.
00942       //Note, this is different from other similar loops.
00943       //Here, nodesDup is merely  a counter and remains unchanged for all
00944       //outer do-while iterations, if a box had less than maxNumpts, it will
00945       //be removed from leaves and placed in nodeDest. However, all its
00946       //decendants in nodesDup still remain.
00947       //Also, note the first pt. need not be in any block. Hence, there is an
00948       //extra if-else wrapper to skip pts, that lie outside all blocks and
00949       //are lesser than them in the Morton order.
00950       while (nextPt < nodesDup.size()) {
00951         if (nodesDup[nextPt] >= (*leafSrc)[nextNode]) {
00952 #ifdef __DEBUG_OCT__
00953           assert(areComparable((*leafSrc)[nextNode], nodesDup[nextPt]));
00954 #endif
00955           if (((*leafSrc)[nextNode].isAncestor(nodesDup[nextPt])) ||
00956               ((*leafSrc)[nextNode] == nodesDup[nextPt])) {
00957             (*leafSrc)[nextNode].addWeight(1);
00958             nextPt++;
00959           } else {
00960             nextNode++;
00961             if (nextNode == leafSrc->size()) {
00962               //Note: No assert(false) here. 
00963               break;
00964             }
00965           }
00966         } else {
00967           nextPt++;
00968         }
00969       }//end while
00970 
00971       leafDest->resize((1 << dim)*(leafSrc->size()));
00972       unsigned int addListSz=0;     
00973       unsigned int nodesPrevSz = nodeSrc->size();
00974       nodeSrc->resize(nodesPrevSz + (leafSrc->size()));
00975       nodeDest->resize(nodeSrc->size());
00976       unsigned int tmpCtr=0;
00977       unsigned int nodesCtr=0;
00978       // int cntX=0;
00979       for (unsigned int i = 0; i < leafSrc->size(); i++) {
00980         if ( ((*leafSrc)[i].getWeight() > maxNumPts) &&
00981             ((*leafSrc)[i].getLevel() < maxDepth) ) {
00982           TreeNode thisNode = (*leafSrc)[i];
00983           std::vector<TreeNode> children;
00984           thisNode.addChildren(children);
00985           for (int ci = 0; ci < (1 << dim); ci++) {
00986             (*leafDest)[addListSz] = children[ci];
00987             addListSz++;
00988           }
00989           children.clear();
00990         } else {
00991           while ((nodesCtr < nodesPrevSz) &&
00992               ((*nodeSrc)[nodesCtr] < (*leafSrc)[i]) ) {
00993             (*nodeDest)[tmpCtr++] = (*nodeSrc)[nodesCtr++];
00994           }
00995           (*nodeDest)[tmpCtr++] = (*leafSrc)[i];
00996         }//end if-else
00997       }//end for i
00998 
00999       for (unsigned int i = nodesCtr; i < nodesPrevSz; i++) {
01000         (*nodeDest)[tmpCtr++] = (*nodeSrc)[i];
01001       }
01002 
01003       leafDest->resize(addListSz);
01004       nodeDest->resize(tmpCtr);
01005 
01006       // swap pointers ...
01007       std::vector<TreeNode> *tmpPtr = nodeSrc;
01008       nodeSrc = nodeDest;
01009       nodeDest = tmpPtr;
01010       tmpPtr = leafSrc;
01011       leafSrc = leafDest;
01012       leafDest = tmpPtr;
01013       leafDest->clear();
01014       nodeDest->clear();  
01015     } while (!leafSrc->empty());
01016 
01017     if ( nodeSrc != &nodes ) {
01018       nodes = nodesTmp;
01019     }
01020     nodesTmp.clear();
01021 
01022     //Reset All weights to 1.
01023     for (unsigned int i = 0; i < nodes.size(); i++) {
01024       nodes[i].setWeight(1);
01025     }
01026 
01027     PROF_P2O_LOCAL_END
01028   }
01029 
01030   //New Implementation. Written on April 19th, 2008
01031   //Both ends are inclusive. The output is sorted.
01032   int appendCompleteRegion(TreeNode first, TreeNode second, 
01033       std::vector<ot::TreeNode>& newNodes, bool includeMin, bool includeMax) {
01034 
01035     PROF_COMPLETE_REGION_BEGIN
01036 
01037       unsigned int dim = first.getDim();
01038     unsigned int maxDepth = first.getMaxDepth();
01039 
01040     TreeNode min = ((first < second) ? first : second);
01041 
01042     if(includeMin) {
01043       newNodes.push_back(min);
01044     }
01045 
01046     if (first == second) {
01047       PROF_COMPLETE_REGION_END
01048     }
01049 
01050     TreeNode max = ((first > second) ? first : second);
01051 
01052     //Add nodes > min and < max
01053     TreeNode nca = getNCA(min,max);
01054 
01055     if(min == nca) {
01056       //special case. Top down approach
01057       ot::TreeNode tmpAncestor = min;
01058       bool repeatLoop;
01059       do {
01060         repeatLoop = false;
01061         std::vector<ot::TreeNode> tmpChildList;
01062         tmpAncestor.addChildren(tmpChildList);
01063         for(unsigned int j = 0; j < tmpChildList.size(); j++) {
01064 #ifdef __DEBUG_OCT__
01065           assert(areComparable(tmpChildList[j], max));
01066 #endif
01067           if( (tmpChildList[j] < max) &&
01068               (!(tmpChildList[j].isAncestor(max))) ) {
01069             newNodes.push_back(tmpChildList[j]);
01070           } else if(tmpChildList[j].isAncestor(max)) {
01071             tmpAncestor = tmpChildList[j];
01072             repeatLoop = true;
01073             break;
01074           } else {
01075             assert(tmpChildList[j] == max);
01076             break;
01077           }
01078         }//end for j
01079       } while (repeatLoop);
01080     } else {
01081       TreeNode currentNode = min;    
01082       while(currentNode > nca) {
01083         TreeNode parentOfCurrent = currentNode.getParent();
01084         std::vector<ot::TreeNode> myBros;
01085         parentOfCurrent.addChildren(myBros);
01086         for(unsigned int i = 0; i < myBros.size(); i++) {
01087 #ifdef __DEBUG_OCT__
01088           assert(areComparable(myBros[i], max));
01089 #endif
01090           if( (myBros[i] > min) &&
01091               (myBros[i] < max) && (!(myBros[i].isAncestor(max))) ) {
01092             //Bottom-up here
01093             newNodes.push_back(myBros[i]);
01094           } else if(myBros[i].isAncestor(max)) {
01095             //Top-down now
01096             //nca will be the parentOfCurrent 
01097             //If myBros[i] is an acestor of max then
01098             //it is automatically > min and < max
01099             //The octants are automatically inserted in
01100             //the sorted order, due to properties of space-filling curves.
01101             ot::TreeNode tmpAncestor = myBros[i];
01102             bool repeatLoop;
01103             do {
01104               repeatLoop = false;
01105               std::vector<ot::TreeNode> tmpChildList;
01106               tmpAncestor.addChildren(tmpChildList);
01107               for(unsigned int j = 0; j < tmpChildList.size(); j++) {
01108 #ifdef __DEBUG_OCT__
01109                 assert(areComparable(tmpChildList[j], max));
01110 #endif
01111                 if( (tmpChildList[j] < max) &&
01112                     (!(tmpChildList[j].isAncestor(max))) ) {
01113                   newNodes.push_back(tmpChildList[j]);
01114                 } else if(tmpChildList[j].isAncestor(max)) {
01115                   tmpAncestor = tmpChildList[j];
01116                   repeatLoop = true;
01117                   break;
01118                 } else {
01119                   assert(tmpChildList[j] == max);
01120                   break;
01121                 }
01122               }//end for j
01123             } while (repeatLoop);
01124             break;      
01125           }//end select between bottom-up/ top-down
01126         }//end for i            
01127         currentNode = parentOfCurrent;
01128       }//end while      
01129     }//end if special case
01130 
01131     if(includeMax) {
01132       newNodes.push_back(max);
01133     }
01134 
01135     PROF_COMPLETE_REGION_END
01136   }//end function
01137 
01138   /*
01139   //Original Code
01140   //Both ends are inclusive. The output is sorted.
01141   int completeRegion(TreeNode first, TreeNode second, 
01142   std::vector<ot::TreeNode>& newNodes, bool includeMin, bool includeMax) {
01143 
01144   PROF_COMPLETE_REGION_BEGIN
01145 
01146   unsigned int dim = first.getDim();
01147   unsigned int maxDepth = first.getMaxDepth();
01148 
01149   TreeNode min = ( (first < second) ? first : second );
01150   TreeNode max = ( (first > second) ? first : second );
01151 
01152   newNodes.clear();
01153 
01154   if(includeMin) {
01155   newNodes.push_back(min);
01156   }
01157 
01158   if (first == second) {
01159   return 1;
01160   }
01161 
01162   if(includeMax) {
01163   newNodes.push_back(max);
01164   }
01165 
01166 //Add nodes > min and < max
01167 TreeNode nca = getNCA(min,max);
01168 
01169 std::vector<TreeNode> workingNodes;
01170 std::vector<TreeNode> addList;
01171 std::vector<TreeNode> * wSrc = &(workingNodes);
01172 std::vector<TreeNode> * wDest = &(addList);
01173 workingNodes.push_back(nca);
01174 while (!(wSrc->empty())) {
01175 wDest->resize((wSrc->size())*(1 << dim));
01176 unsigned int addListCtr=0;
01177 for (unsigned int k=0;k < wSrc->size();k++) {
01178 if ( ((*wSrc)[k] > min) && ((*wSrc)[k] < max) && 
01179 (!(*wSrc)[k].isAncestor(max)) ) {
01180 newNodes.push_back((*wSrc)[k]);
01181 } else if ((*wSrc)[k].getLevel() < maxDepth) {
01182 if ((*wSrc)[k].isAncestor(min) || (*wSrc)[k].isAncestor(max) ) {
01183 std::vector<TreeNode>  tmpChildren;
01184 (*wSrc)[k].addChildren(tmpChildren);
01185 for (int l=0;l<(1 << dim);l++) {
01186 if (tmpChildren[l]<max) {
01187 (*wDest)[addListCtr++] = tmpChildren[l];
01188 }
01189 }//end for l
01190 tmpChildren.clear();
01191 }
01192 }//end if-else
01193 }//end for k
01194 wDest->resize(addListCtr);
01195 std::vector<TreeNode> * tmpPtr = wSrc;
01196 wSrc = wDest;
01197 wDest = tmpPtr;
01198 }//end while
01199 
01200 sort(newNodes.begin(),newNodes.end());
01201 
01202 PROF_COMPLETE_REGION_END
01203 }//end function
01204 */
01205 
01206 }//end namespace
01207 

Generated on Tue Mar 24 16:14:04 2009 for DENDRO by  doxygen 1.3.9.1