00001 
00009 #include "parUtils.h"
00010 #include "TreeNode.h"
00011 #include <cassert>
00012 #include "nodeAndValues.h"
00013 #include "binUtils.h"
00014 #include "dendro.h"
00015 
00016 #ifdef __DEBUG__
00017 #ifndef __DEBUG_OCT__
00018 #define __DEBUG_OCT__
00019 #endif
00020 #endif
00021 
00022 namespace ot {
00023 
00024   int regularGrid2Octree(const std::vector<double>& elementValues,
00025       unsigned int N, unsigned int nx, unsigned int ny, unsigned int nz,
00026       unsigned int xs, unsigned int ys, unsigned int zs, std::vector<TreeNode>& linOct,
00027       unsigned int dim, unsigned int maxDepth, double thresholdFac, MPI_Comm comm) {
00028     PROF_RG2O_BEGIN
00029 
00030       int rank;
00031     int npes;
00032     MPI_Comm_rank(comm, &rank);
00033     MPI_Comm_size(comm, &npes);
00034 
00035     const int MIN_GRAIN_SIZE = 10;
00036 
00037     bool isNvalid = binOp::isPowerOfTwo(N);
00038 
00039     assert(isNvalid);
00040 
00041     unsigned int rgLevel = binOp::fastLog2(N);
00042     unsigned int elemLen = (1u << (maxDepth - rgLevel));
00043 
00044     
00045     std::vector<ot::NodeAndValues<double, 1> > tmpList(nx*ny*nz);
00046 
00047     for(int k = 0; k < nz; k++) {
00048       for(int j = 0; j < ny; j++) {
00049         for(int i = 0; i < nx; i++) {
00050           unsigned int currX = ((i + xs)*elemLen); 
00051           unsigned int currY = ((j + ys)*elemLen); 
00052           unsigned int currZ = ((k + zs)*elemLen);
00053           unsigned int idx = ((nx*(j + (ny*k))) + i);
00054           tmpList[idx].node = ot::TreeNode(currX, currY, currZ, rgLevel, dim, maxDepth);
00055           tmpList[idx].values[0] = elementValues[idx];
00056         }
00057       }
00058     }
00059 
00060     
00061     std::vector<ot::NodeAndValues<double, 1> > tnAndValsList;
00062     par::sampleSort<ot::NodeAndValues<double, 1> >(tmpList, tnAndValsList, comm);
00063     tmpList.clear();
00064 
00065     DendroIntL inSz = tnAndValsList.size();
00066     DendroIntL globInSize;
00067     par::Mpi_Allreduce<DendroIntL>(&inSz, &globInSize, 1, MPI_SUM, comm);
00068 
00069     bool repeatLoop = true;
00070     int npesCurr = npes;
00071     MPI_Comm commCurr = comm;
00072     if(globInSize < (MIN_GRAIN_SIZE*npes)) {
00073       int splittingSize = (globInSize/MIN_GRAIN_SIZE); 
00074       if(splittingSize == 0) {
00075         splittingSize = 1; 
00076       }
00077 
00078       unsigned int avgLoad = (globInSize/splittingSize);
00079       int leftOvers = (globInSize - (splittingSize*avgLoad));
00080 
00081       if(rank >= splittingSize) {
00082         par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, 0, commCurr);
00083       }else if(rank < leftOvers) {
00084         par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, (avgLoad + 1), commCurr);
00085       }else {
00086         par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, avgLoad, commCurr);
00087       }
00088       tnAndValsList = tmpList;
00089       tmpList.clear();
00090 
00091       MPI_Comm newComm;
00092       par::splitCommUsingSplittingRank(splittingSize, &newComm, commCurr);
00093       commCurr = newComm;
00094       npesCurr = splittingSize;
00095 
00096       if(rank >= splittingSize) {
00097         repeatLoop = false;
00098       }
00099     } else {
00100       par::partitionW<ot::NodeAndValues<double, 1> >(tnAndValsList, NULL, comm);
00101     }
00102 
00103     if(globInSize < MIN_GRAIN_SIZE) {
00104       repeatLoop = false; 
00105     }
00106 
00107     while(repeatLoop) {
00108 
00109       inSz = tnAndValsList.size();
00110 
00111       assert(inSz >= MIN_GRAIN_SIZE);
00112 
00113       MPI_Request requests[4];
00114 
00115       
00116       
00117       ot::NodeAndValues<double, 1> prevOcts[7];
00118       if( rank ) {
00119         par::Mpi_Irecv<ot::NodeAndValues<double, 1> >(prevOcts, 7, (rank - 1), 
00120             1, commCurr, &(requests[0]));
00121       }
00122 
00123       ot::NodeAndValues<double, 1> nextOcts[7];
00124       if( rank < (npesCurr - 1) ) {
00125         par::Mpi_Irecv<ot::NodeAndValues<double, 1> >(nextOcts, 7, (rank + 1),
00126             1, commCurr, &(requests[1]));
00127       }
00128 
00129       if( rank ) {
00130         par::Mpi_Issend<ot::NodeAndValues<double, 1> >((&(*(tnAndValsList.begin()))), 7,
00131             (rank - 1), 1, commCurr, &(requests[2]));
00132       }
00133 
00134       if( rank < (npesCurr - 1) ) {
00135         par::Mpi_Issend<ot::NodeAndValues<double, 1> >((&(*(tnAndValsList.end() - 7))), 7,
00136             (rank + 1), 1, commCurr, &(requests[3]));
00137       }
00138 
00139       MPI_Status statuses[4];
00140 
00141       if( rank ) {
00142         MPI_Wait(&(requests[0]), &(statuses[0]));
00143         MPI_Wait(&(requests[2]), &(statuses[2]));
00144       }
00145 
00146       if( rank < (npesCurr - 1) ) {
00147         MPI_Wait(&(requests[1]), &(statuses[1]));
00148         MPI_Wait(&(requests[3]), &(statuses[3]));
00149       }
00150 
00151       
00152       int idxOfPrevFC = -1;
00153       if( rank ) {
00154         for(int i = 0; i < 7; i++) {
00155           
00156           if(prevOcts[i].node.getChildNumber() == 0) {
00157             idxOfPrevFC = i;
00158           }
00159         }
00160       }
00161 
00162       int idxOfNextFC = -1;
00163       if( rank < (npesCurr - 1) ) {
00164         for(int i = 0; i < 7; i++) {
00165           
00166           if(nextOcts[i].node.getChildNumber() == 0) {
00167             idxOfNextFC = i;
00168             break;
00169           }
00170         }
00171       }
00172 
00173       int myFirstFC = -1;
00174       for(int i = 0; i < inSz; i++) {
00175         if(tnAndValsList[i].node.getChildNumber() == 0) {
00176           myFirstFC = i;
00177           break;
00178         }
00179       }
00180 
00181       int myLastFC = -1;
00182       if( myFirstFC >= 0 ) {
00183         for(int i = (inSz - 1); i >= 0; i--) {
00184           if(tnAndValsList[i].node.getChildNumber() == 0) {
00185             myLastFC = i;
00186             break;
00187           }
00188         }
00189       }
00190 
00191       
00192       
00193       
00194       if( (myFirstFC >= 0) && (idxOfPrevFC >= 0) ) {
00195         int fcGap = (myFirstFC + 7 - idxOfPrevFC);
00196         if( fcGap < 8 ) {
00197           
00198           if(myFirstFC) {
00199             tmpList.insert(tmpList.end(), tnAndValsList.begin(),
00200                 tnAndValsList.begin() + myFirstFC);
00201           }
00202         } else {
00203           
00204           
00205           double minVal = prevOcts[idxOfPrevFC].values[0];
00206           double maxVal = prevOcts[idxOfPrevFC].values[0];
00207           for(int i = idxOfPrevFC; i < 7; i++){
00208             double currVal = prevOcts[i].values[0];
00209             if(currVal < minVal) {
00210               minVal = currVal;
00211             }
00212             if(currVal > maxVal) {
00213               maxVal = currVal;
00214             }
00215           }
00216           for(int i = 0; i < (1 + idxOfPrevFC); i++){
00217             double currVal = tnAndValsList[i].values[0];
00218             if(currVal < minVal) {
00219               minVal = currVal;
00220             }
00221             if(currVal > maxVal) {
00222               maxVal = currVal;
00223             }
00224           }
00225           if((maxVal - minVal) >= thresholdFac) {
00226             
00227             if(myFirstFC) {
00228               tmpList.insert(tmpList.end(), tnAndValsList.begin(),
00229                   tnAndValsList.begin() + myFirstFC);
00230             }
00231           } else {
00232             
00233             if((1 + idxOfPrevFC) < myFirstFC) {
00234               tmpList.insert(tmpList.end(), tnAndValsList.begin() + (1 + idxOfPrevFC), 
00235                   tnAndValsList.begin() + myFirstFC);
00236             }
00237           }
00238         }
00239       } else {
00240         if(myFirstFC >= 0) {
00241           
00242           
00243           
00244           if(myFirstFC) {
00245             tmpList.insert(tmpList.end(), tnAndValsList.begin(),
00246                 tnAndValsList.begin() + myFirstFC);
00247           }
00248         } else if (idxOfPrevFC >= 0) {
00249           
00250           double minVal = prevOcts[idxOfPrevFC].values[0];
00251           double maxVal = prevOcts[idxOfPrevFC].values[0];
00252           for(int i = idxOfPrevFC; i < 7; i++){
00253             double currVal = prevOcts[i].values[0];
00254             if(currVal < minVal) {
00255               minVal = currVal;
00256             }
00257             if(currVal > maxVal) {
00258               maxVal = currVal;
00259             }
00260           }
00261           for(int i = 0; i < (1 + idxOfPrevFC); i++){
00262             double currVal = tnAndValsList[i].values[0];
00263             if(currVal < minVal) {
00264               minVal = currVal;
00265             }
00266             if(currVal > maxVal) {
00267               maxVal = currVal;
00268             }
00269           }
00270           if((maxVal - minVal) >= thresholdFac) {
00271             
00272             tmpList = tnAndValsList;
00273           } else {
00274             
00275             tmpList.insert(tmpList.end(), tnAndValsList.begin() + (1 + idxOfPrevFC), 
00276                 tnAndValsList.end());
00277           }
00278         } else {
00279           
00280           tmpList = tnAndValsList;
00281         }
00282       }
00283 
00284       
00285       int prevFCidx = myFirstFC;
00286       for(int idx = myFirstFC + 1; idx <= myLastFC; idx++) {
00287         if(tnAndValsList[idx].node.getChildNumber() == 0) {
00288           int fcGap = (idx - prevFCidx);
00289           if(fcGap < 8) {
00290             
00291             tmpList.insert(tmpList.end(), tnAndValsList.begin() + prevFCidx,
00292                 tnAndValsList.begin() + idx);
00293           } else {
00294             
00295             
00296             double minVal = tnAndValsList[prevFCidx].values[0];
00297             double maxVal = tnAndValsList[prevFCidx].values[0];
00298             double sumVal = 0;
00299             for(int j = prevFCidx; j < (8 + prevFCidx); j++) {
00300               double currVal = tnAndValsList[j].values[0];
00301               if(currVal < minVal) {
00302                 minVal = currVal;
00303               }
00304               if(currVal > maxVal) {
00305                 maxVal = currVal;
00306               }
00307               sumVal += currVal;
00308             }
00309             if((maxVal - minVal) >= thresholdFac) {
00310               
00311               tmpList.insert(tmpList.end(), tnAndValsList.begin() + prevFCidx,
00312                   tnAndValsList.begin() + idx);
00313             } else {
00314               
00315               ot::NodeAndValues<double, 1> tmpObj;
00316               tmpObj.node = tnAndValsList[prevFCidx].node.getParent();
00317               tmpObj.values[0] = (sumVal/8.0);
00318               tmpList.push_back(tmpObj);
00319               if((prevFCidx + 8) < idx) {
00320                 tmpList.insert(tmpList.end(), tnAndValsList.begin() + prevFCidx + 8,
00321                     tnAndValsList.begin() + idx);
00322               }
00323             }
00324           }
00325           prevFCidx = idx;
00326         }
00327       }
00328 
00329       
00330       if( (myLastFC >= 0) && (idxOfNextFC >= 0) ) {
00331         int fcGap = inSz + idxOfNextFC - myLastFC;
00332         if(fcGap < 8) {
00333           
00334           tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC,
00335               tnAndValsList.end());
00336         } else {
00337           
00338           
00339           double minVal = tnAndValsList[myLastFC].values[0];
00340           double maxVal = tnAndValsList[myLastFC].values[0];
00341           double sumVal = 0;
00342           if((myLastFC + 8) > inSz) {
00343             for(int i = myLastFC; i < inSz; i++){
00344               double currVal = tnAndValsList[i].values[0];
00345               if(currVal < minVal) {
00346                 minVal = currVal;
00347               }
00348               if(currVal > maxVal) {
00349                 maxVal = currVal;
00350               }
00351               sumVal += currVal;
00352             }
00353             for(int i = 0; i < (myLastFC + 8 - inSz); i++){
00354               double currVal = nextOcts[i].values[0];
00355               if(currVal < minVal) {
00356                 minVal = currVal;
00357               }
00358               if(currVal > maxVal) {
00359                 maxVal = currVal;
00360               }
00361               sumVal += currVal;
00362             }
00363           } else {
00364             for(int i = myLastFC; i < (myLastFC + 8); i++){
00365               double currVal = tnAndValsList[i].values[0];
00366               if(currVal < minVal) {
00367                 minVal = currVal;
00368               }
00369               if(currVal > maxVal) {
00370                 maxVal = currVal;
00371               }
00372               sumVal += currVal;
00373             }
00374           }
00375           if((maxVal - minVal) >= thresholdFac) {
00376             
00377             tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC,
00378                 tnAndValsList.end());
00379           } else {
00380             
00381             ot::NodeAndValues<double, 1> tmpObj;
00382             tmpObj.node = tnAndValsList[myLastFC].node.getParent();
00383             tmpObj.values[0] = (sumVal/8.0);
00384             tmpList.push_back(tmpObj);
00385             if((myLastFC + 8) < inSz) {
00386               tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC + 8,
00387                   tnAndValsList.end());
00388             }
00389           }
00390         }
00391       } else {
00392         if(myLastFC >= 0) {
00393           
00394           double minVal = tnAndValsList[myLastFC].values[0];
00395           double maxVal = tnAndValsList[myLastFC].values[0];
00396           double sumVal = 0;
00397           if((myLastFC + 8) > inSz) {
00398             for(int i = myLastFC; i < inSz; i++){
00399               double currVal = tnAndValsList[i].values[0];
00400               if(currVal < minVal) {
00401                 minVal = currVal;
00402               }
00403               if(currVal > maxVal) {
00404                 maxVal = currVal;
00405               }
00406               sumVal += currVal;
00407             }
00408             if(rank < (npesCurr - 1)) {
00409               for(int i = 0; i < (myLastFC + 8 - inSz); i++){
00410                 double currVal = nextOcts[i].values[0];
00411                 if(currVal < minVal) {
00412                   minVal = currVal;
00413                 }
00414                 if(currVal > maxVal) {
00415                   maxVal = currVal;
00416                 }
00417                 sumVal += currVal;
00418               }
00419             }
00420           } else {
00421             for(int i = myLastFC; i < (myLastFC + 8); i++){
00422               double currVal = tnAndValsList[i].values[0];
00423               if(currVal < minVal) {
00424                 minVal = currVal;
00425               }
00426               if(currVal > maxVal) {
00427                 maxVal = currVal;
00428               }
00429               sumVal += currVal;
00430             }
00431           } 
00432           if((maxVal - minVal) >= thresholdFac) {
00433             
00434             tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC,
00435                 tnAndValsList.end());
00436           } else {
00437             
00438             ot::NodeAndValues<double, 1> tmpObj;
00439             tmpObj.node = tnAndValsList[myLastFC].node.getParent();
00440             tmpObj.values[0] = (sumVal/8.0);
00441             tmpList.push_back(tmpObj);
00442             if((myLastFC + 8) < inSz) {
00443               tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC + 8,
00444                   tnAndValsList.end());
00445             }
00446           }
00447         } else {
00448           
00449           
00450         }
00451       }
00452 
00453       DendroIntL outSz = tmpList.size();
00454       DendroIntL globOutSize;
00455       par::Mpi_Allreduce<DendroIntL>(&outSz, &globOutSize, 1, MPI_SUM, commCurr);
00456 
00457       
00458       if(!rank) {
00459         std::cout<<"In RG2O: globInSize: "<<globInSize<<" globOutSize: "
00460           <<globOutSize<<" npesCurr: "<<npesCurr<<std::endl;
00461       }
00462 
00463       if(globOutSize == globInSize) {
00464         repeatLoop = false;
00465       }
00466 
00467       if(globOutSize < MIN_GRAIN_SIZE) {
00468         repeatLoop = false; 
00469       }
00470 
00471       
00472       globInSize = globOutSize;
00473       tnAndValsList = tmpList;
00474       tmpList.clear();
00475 
00476       if(globInSize < (MIN_GRAIN_SIZE*npesCurr)) {
00477         int splittingSize = (globInSize/MIN_GRAIN_SIZE); 
00478         if(splittingSize == 0) {
00479           splittingSize = 1; 
00480         }
00481 
00482         unsigned int avgLoad = (globInSize/splittingSize);
00483         int leftOvers = (globInSize - (splittingSize*avgLoad));
00484 
00485         if(rank >= splittingSize) {
00486           par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, 0, commCurr);
00487         }else if(rank < leftOvers) {
00488           par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, (avgLoad + 1), commCurr);
00489         }else {
00490           par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, avgLoad, commCurr);
00491         }
00492         tnAndValsList = tmpList;
00493         tmpList.clear();
00494 
00495         MPI_Comm newComm;
00496         par::splitCommUsingSplittingRank(splittingSize, &newComm, commCurr);
00497         commCurr = newComm;
00498         npesCurr = splittingSize;
00499 
00500         if(rank >= splittingSize) {
00501           repeatLoop = false;
00502         }
00503       } else {
00504         par::partitionW<ot::NodeAndValues<double, 1> >(tnAndValsList, NULL, commCurr);
00505       } 
00506 
00507     }
00508 
00509     linOct.clear();
00510     for(int i = 0; i < tnAndValsList.size(); i++) {
00511       linOct.push_back(tnAndValsList[i].node);
00512     }
00513 
00514     PROF_RG2O_END
00515   }
00516 
00517   
00518   int completeOctree(const std::vector<TreeNode > & inp,
00519       std::vector<TreeNode > & out, 
00520       unsigned int dim, unsigned int maxDepth, bool isUnique,
00521       bool isSorted, bool assertNoEmptyProcs, MPI_Comm comm) {
00522 
00523     PROF_N2O_BEGIN
00524 
00525       TreeNode root (dim, maxDepth);
00526 
00527     int size;
00528     MPI_Comm_size(comm, &size);
00529 
00530     if(size == 1) {
00531       completeSubtree(root, inp, out, dim, maxDepth, isUnique, isSorted);
00532       PROF_N2O_END
00533     }
00534 
00535     out = inp;
00536 
00537     
00538     if (!isUnique) {
00539       par::removeDuplicates<ot::TreeNode>(out, isSorted, comm);
00540     } else if (!isSorted) {
00541       std::vector<TreeNode> tmpOut;
00542       par::sampleSort<ot::TreeNode>(out, tmpOut,comm); 
00543       out = tmpOut;
00544       tmpOut.clear();
00545     }
00546 
00547     
00548     MPI_Comm   new_comm;
00549     if(assertNoEmptyProcs) {
00550       new_comm = comm;
00551       assert(!out.empty());
00552     } else {
00553       par::splitComm2way(out.empty(), &new_comm, comm);
00554     }
00555 
00556     if(!(out.empty())) {        
00557       int new_rank, new_size; 
00558 
00559       MPI_Comm_rank (new_comm, &new_rank);
00560       MPI_Comm_size (new_comm, &new_size);
00561 
00562       MPI_Request requestSend;    
00563       MPI_Request requestRecv;
00564 
00565       ot::TreeNode begBuf;
00566       ot::TreeNode lastElem;
00567 
00568       if(new_rank) {
00569         
00570         par::Mpi_Irecv<ot::TreeNode>(&begBuf, 1, (new_rank-1) , 1, new_comm, &requestRecv);
00571       }
00572 
00573       if (new_rank < (new_size-1)) {
00574         lastElem = out[out.size() - 1];
00575         
00576         par::Mpi_Issend<ot::TreeNode>(&lastElem, 1, (new_rank+1), 1, new_comm, &requestSend);
00577       }
00578 
00579 
00580       
00581       
00582       if (new_rank == 0) {
00583         ot::TreeNode minCorner(0,0,0,maxDepth,dim,maxDepth);
00584 #ifdef __DEBUG_OCT__
00585         assert(areComparable(out[0], minCorner));
00586 #endif
00587         if ( (out[0] != minCorner) && (!out[0].isAncestor(minCorner)) ) {
00588           ot::TreeNode ncaTmp = getNCA(out[0],minCorner);
00589           std::vector<ot::TreeNode>  kids;
00590           ncaTmp.addChildren(kids);
00591           out.insert(out.begin(),kids[0]);
00592           kids.clear();
00593         }
00594       }
00595 
00596       
00597       if (new_rank == (new_size-1) ) {
00598         ot::TreeNode maxCorner(((1u << maxDepth)-1),
00599             ((1u << maxDepth)-1),((1u << maxDepth)-1),maxDepth,dim,maxDepth);
00600 #ifdef __DEBUG_OCT__
00601         assert(areComparable(out[out.size()-1], maxCorner));
00602 #endif
00603         if ( (out[out.size()-1] != maxCorner) && 
00604             (!out[out.size()-1].isAncestor(maxCorner)) ) {
00605           ot::TreeNode ncaTmp =  getNCA(out[out.size()-1],maxCorner);
00606           std::vector<ot::TreeNode> kids;
00607           ncaTmp.addChildren(kids);
00608           out.insert(out.end(),kids[(1 << dim)-1]);
00609           kids.clear();
00610         }
00611       }
00612 
00613       std::vector<ot::TreeNode> tmpList;
00614       for(unsigned int i = 0; i < (out.size()-1); i++) {
00615 #ifdef __DEBUG_OCT__
00616         assert(areComparable(out[i], out[i+1]));
00617 #endif
00618         if(out[i].isAncestor(out[i+1])) {
00619           appendCompleteRegion(out[i], out[i+1], tmpList, false, false);
00620         } else {
00621           appendCompleteRegion(out[i], out[i+1], tmpList, true, false);
00622         }
00623       }
00624 
00625       
00626       
00627       
00628       if(new_rank == (new_size - 1) ) {
00629         tmpList.push_back(out[out.size() - 1]);
00630       }
00631 
00632       if(new_rank) {
00633         MPI_Status statusWait;
00634         MPI_Wait(&requestRecv, &statusWait);
00635 
00636         std::vector<ot::TreeNode> begList;
00637 #ifdef __DEBUG_OCT__
00638         assert(areComparable(begBuf, out[0]));
00639 #endif
00640         if(begBuf.isAncestor(out[0])) {
00641           appendCompleteRegion(begBuf, out[0], begList, false, false);
00642         } else {
00643           appendCompleteRegion(begBuf, out[0], begList, true, false);
00644         }
00645         out = begList;
00646         begList.clear();
00647         out.insert(out.end(), tmpList.begin(), tmpList.end());
00648       } else {
00649         out = tmpList;
00650       }
00651 
00652       tmpList.clear();
00653 
00654       if(new_rank < (new_size-1)) {
00655         MPI_Status statusWait;
00656         MPI_Wait(&requestSend, &statusWait);
00657       }
00658 
00659     } 
00660 
00661 
00662     PROF_N2O_END
00663 
00664   }
00665 
00666   
00667   int completeSubtree(TreeNode block, const std::vector<TreeNode > & inp, std::vector<TreeNode > & out,
00668       unsigned int dim, unsigned int maxDepth, bool isUnique, bool isSorted) {
00669 
00670     PROF_N2O_SEQ_BEGIN
00671 
00672       out = inp;
00673     
00674     if (!isUnique) {
00675       seq::makeVectorUnique<TreeNode>(out, isSorted) ;
00676     } else if (!isSorted) {
00677       sort(out.begin(),out.end());
00678     }
00679 
00680     if (!out.empty()) {
00681       
00682       
00683       ot::TreeNode minCorner = block.getDFD();
00684 #ifdef __DEBUG_OCT__
00685       assert(areComparable(out[0], minCorner));
00686 #endif
00687       if ( (out[0] != minCorner) && (!out[0].isAncestor(minCorner)) ) {
00688         ot::TreeNode ncaTmp = getNCA(out[0],minCorner);
00689         std::vector<ot::TreeNode> kids;
00690         ncaTmp.addChildren(kids);
00691         out.insert(out.begin(),kids[0]);
00692         kids.clear();
00693       }
00694 
00695       
00696       ot::TreeNode maxCorner = block.getDLD();
00697 #ifdef __DEBUG_OCT__
00698       assert(areComparable(out[out.size()-1], maxCorner));
00699 #endif
00700       if ( (out[out.size()-1] != maxCorner) && (!out[out.size()-1].isAncestor(maxCorner)) ) {
00701         ot::TreeNode ncaTmp = getNCA(out[out.size()-1],maxCorner);
00702         std::vector<ot::TreeNode> kids;
00703         ncaTmp.addChildren(kids);
00704         out.insert(out.end(),kids[(1 << dim)-1]);
00705         kids.clear();
00706       }
00707 
00708       std::vector<ot::TreeNode> tmpList;
00709       for(unsigned int i = 0; i < (out.size()-1); i++) {
00710 #ifdef __DEBUG_OCT__
00711         assert(areComparable(out[i], out[i+1]));
00712 #endif
00713         if(out[i].isAncestor(out[i+1])) {
00714           appendCompleteRegion(out[i], out[i+1], tmpList, false, false);
00715         } else {
00716           appendCompleteRegion(out[i], out[i+1], tmpList, true, false);
00717         }
00718       }
00719 
00720       tmpList.push_back(out[out.size() - 1]);
00721 
00722       out = tmpList;
00723       tmpList.clear();
00724     }
00725 
00726     PROF_N2O_SEQ_END
00727 
00728   }
00729 
00730   int points2Octree(std::vector<double>& pts, double * gLens, std::vector<TreeNode> & nodes,
00731       unsigned int dim, unsigned int maxDepth, unsigned int maxNumPts, MPI_Comm comm ) {
00732 
00733     PROF_P2O_BEGIN
00734 
00735       int size;
00736     MPI_Comm_size(comm, &size);
00737 
00738     
00739     if (size == 1) {
00740       points2OctreeSeq(pts, gLens, nodes, dim, maxDepth, maxNumPts); 
00741       PROF_P2O_END
00742     }
00743 
00744     int rank;
00745     MPI_Comm_rank(comm, &rank);
00746 
00747     TreeNode root (dim, maxDepth);
00748 
00749     if (maxDepth == 0) {
00750       if (!rank) {
00751         nodes.resize(1);
00752         nodes[0] = root;
00753       } else {
00754         nodes.resize(0);
00755       }
00756       PROF_P2O_END
00757     }
00758 
00759     unsigned int ptsLen = pts.size();
00760     assert( (dim == 1) || (dim == 2) || (dim == 3 ));
00761     assert( (ptsLen%dim) == 0);
00762 
00763     DendroIntL numNodes = (ptsLen/dim);
00764 
00765     DendroIntL totSize;
00766     par::Mpi_Allreduce<DendroIntL>(&numNodes, &totSize, 1, MPI_SUM, comm);
00767 
00768     if (totSize <= static_cast<DendroIntL>(maxNumPts)) {
00769       if (!rank) {
00770         nodes.resize(1);
00771         nodes[0] = root;
00772       } else {
00773         nodes.resize(0);
00774       }
00775       PROF_P2O_END
00776     }
00777 
00778     
00779     
00780     const DendroIntL THOUSAND = 1000;
00781     if (totSize < (THOUSAND*size)) {
00782       int splittingSize = (totSize/THOUSAND); 
00783 
00784       if(splittingSize == 0) {
00785         splittingSize = 1; 
00786       }
00787 
00788       unsigned int avgLoad = (totSize/splittingSize);
00789       int leftOvers = (totSize - (splittingSize*avgLoad));
00790 
00791       std::vector<double> tmpPts;
00792       if(rank >= splittingSize) {
00793         par::scatterValues<double>(pts, tmpPts, 0, comm);
00794       }else if(rank < leftOvers) {
00795         par::scatterValues<double>(pts, tmpPts, (dim*(avgLoad+1)), comm);
00796       }else {
00797         par::scatterValues<double>(pts, tmpPts, (dim*avgLoad), comm);
00798       }
00799       pts.clear();
00800 
00801       MPI_Comm newComm;
00802       par::splitCommUsingSplittingRank(splittingSize, &newComm, comm);
00803 
00804 #ifndef __SILENT_MODE__
00805       if(!rank) {
00806         std::cout<<"Input to p2o is small ("<<totSize
00807           <<"). npes = "<<size<<" Splitting Comm. "<<std::endl;
00808       }
00809 #endif
00810 
00811       if(rank < splittingSize) {
00812         points2Octree(tmpPts, gLens, nodes, dim, maxDepth, maxNumPts, newComm); 
00813       }
00814       tmpPts.clear();
00815 
00816       PROF_P2O_END
00817     }
00818 
00819     
00820 
00821     nodes.resize(numNodes);
00822 
00823     for (DendroIntL i = 0; i < numNodes; i++) {
00824       
00825       
00826       unsigned int px = (unsigned int)(pts[i*dim]*((double)(1u << maxDepth))/(gLens[0]));
00827       unsigned int py, pz = 0;
00828       if(dim > 1) {
00829         py = (unsigned int)(pts[(i*dim)+1]*((double)(1u << maxDepth))/gLens[1]);
00830         if(dim > 2) {
00831           pz = (unsigned int)(pts[(i*dim)+2]*((double)(1u << maxDepth))/gLens[2]);
00832         }
00833       }
00834       nodes[i] = TreeNode(px, py, pz, maxDepth, dim, maxDepth);
00835     }
00836 
00837     pts.clear(); 
00838 
00839     std::vector<ot::TreeNode> tmpNodes ;
00840 
00841     
00842     par::sampleSort<ot::TreeNode>(nodes, tmpNodes, comm); 
00843     nodes = tmpNodes;
00844     tmpNodes.clear();
00845 
00846     std::vector<ot::TreeNode> leaves;
00847     std::vector<ot::TreeNode> minsAllBlocks;
00848 
00849     blockPartStage1_p2o(nodes, leaves, dim, maxDepth, comm);
00850     blockPartStage2_p2o(nodes, leaves, minsAllBlocks, dim, maxDepth, comm);
00851     
00852 
00853     p2oLocal(nodes, leaves, maxNumPts, dim, maxDepth);
00854 
00855     PROF_P2O_END
00856 
00857   }
00858 
00859   
00860   int points2OctreeSeq(std::vector<double>& pts, double * gLens, std::vector<TreeNode> & nodes,
00861       unsigned int dim, unsigned int maxDepth, unsigned int maxNumPts) {
00862 
00863     PROF_P2O_SEQ_BEGIN
00864 
00865       if (maxDepth == 0) {    
00866         nodes.resize(1);
00867         nodes[0] = TreeNode(dim,maxDepth);
00868         PROF_P2O_SEQ_END
00869       }
00870 
00871     unsigned int ptsLen = pts.size();
00872     assert( (dim == 1) || (dim == 2) || (dim == 3 ));
00873     assert( (ptsLen%dim) == 0);
00874 
00875     TreeNode root (dim,maxDepth);
00876     unsigned int numNodes = ptsLen/dim;
00877     nodes.resize(numNodes);
00878 
00879     for (unsigned int i = 0; i < numNodes; i++) {
00880       
00881       
00882       unsigned int px = (unsigned int)(pts[i*dim]*((double)(1u << maxDepth))/(gLens[0]));
00883       unsigned int py, pz = 0;
00884       if(dim > 1) {
00885         py = (unsigned int)(pts[(i*dim)+1]*((double)(1u << maxDepth))/gLens[1]);
00886         if(dim > 2) {
00887           pz = (unsigned int)(pts[(i*dim)+2]*((double)(1u << maxDepth))/gLens[2]);
00888         }
00889       }
00890       nodes[i] = TreeNode(px, py, pz, maxDepth, dim, maxDepth);
00891     }
00892 
00893     pts.clear(); 
00894 
00895     unsigned int totSize = nodes.size();
00896 
00897     if (totSize <= maxNumPts) {
00898       nodes.resize(1);
00899       nodes[0] = root;
00900       PROF_P2O_SEQ_END
00901     }
00902 
00903     
00904     sort(nodes.begin(), nodes.end());
00905 
00906     std::vector<TreeNode> leaves;
00907     leaves.push_back(root);
00908 
00909     p2oLocal(nodes, leaves, maxNumPts, dim, maxDepth);
00910 
00911     PROF_P2O_SEQ_END
00912 
00913   }
00914 
00915   int p2oLocal(std::vector<TreeNode> & nodes, std::vector<TreeNode>& leaves,
00916       unsigned int maxNumPts, unsigned int dim, unsigned int maxDepth) {
00917     PROF_P2O_LOCAL_BEGIN
00918 
00919       std::vector<TreeNode> nodesDup = nodes;
00920     nodes.clear();
00921 
00922     std::vector<TreeNode> addList;
00923     std::vector<TreeNode> nodesTmp;
00924 
00925     std::vector<TreeNode> *nodeSrc = &nodes;
00926     std::vector<TreeNode> *nodeDest = &nodesTmp;
00927     std::vector<TreeNode> *leafSrc = &leaves;
00928     std::vector<TreeNode> *leafDest = &addList;
00929 
00930     
00931     do {
00932       
00933       
00934       for (unsigned int i = 0; i < leafSrc->size(); i++) {
00935         (*leafSrc)[i].setWeight(0);
00936       }
00937 
00938       
00939       unsigned int nextNode = 0;
00940       unsigned int nextPt = 0;
00941       
00942       
00943       
00944       
00945       
00946       
00947       
00948       
00949       
00950       while (nextPt < nodesDup.size()) {
00951         if (nodesDup[nextPt] >= (*leafSrc)[nextNode]) {
00952 #ifdef __DEBUG_OCT__
00953           assert(areComparable((*leafSrc)[nextNode], nodesDup[nextPt]));
00954 #endif
00955           if (((*leafSrc)[nextNode].isAncestor(nodesDup[nextPt])) ||
00956               ((*leafSrc)[nextNode] == nodesDup[nextPt])) {
00957             (*leafSrc)[nextNode].addWeight(1);
00958             nextPt++;
00959           } else {
00960             nextNode++;
00961             if (nextNode == leafSrc->size()) {
00962               
00963               break;
00964             }
00965           }
00966         } else {
00967           nextPt++;
00968         }
00969       }
00970 
00971       leafDest->resize((1 << dim)*(leafSrc->size()));
00972       unsigned int addListSz=0;     
00973       unsigned int nodesPrevSz = nodeSrc->size();
00974       nodeSrc->resize(nodesPrevSz + (leafSrc->size()));
00975       nodeDest->resize(nodeSrc->size());
00976       unsigned int tmpCtr=0;
00977       unsigned int nodesCtr=0;
00978       
00979       for (unsigned int i = 0; i < leafSrc->size(); i++) {
00980         if ( ((*leafSrc)[i].getWeight() > maxNumPts) &&
00981             ((*leafSrc)[i].getLevel() < maxDepth) ) {
00982           TreeNode thisNode = (*leafSrc)[i];
00983           std::vector<TreeNode> children;
00984           thisNode.addChildren(children);
00985           for (int ci = 0; ci < (1 << dim); ci++) {
00986             (*leafDest)[addListSz] = children[ci];
00987             addListSz++;
00988           }
00989           children.clear();
00990         } else {
00991           while ((nodesCtr < nodesPrevSz) &&
00992               ((*nodeSrc)[nodesCtr] < (*leafSrc)[i]) ) {
00993             (*nodeDest)[tmpCtr++] = (*nodeSrc)[nodesCtr++];
00994           }
00995           (*nodeDest)[tmpCtr++] = (*leafSrc)[i];
00996         }
00997       }
00998 
00999       for (unsigned int i = nodesCtr; i < nodesPrevSz; i++) {
01000         (*nodeDest)[tmpCtr++] = (*nodeSrc)[i];
01001       }
01002 
01003       leafDest->resize(addListSz);
01004       nodeDest->resize(tmpCtr);
01005 
01006       
01007       std::vector<TreeNode> *tmpPtr = nodeSrc;
01008       nodeSrc = nodeDest;
01009       nodeDest = tmpPtr;
01010       tmpPtr = leafSrc;
01011       leafSrc = leafDest;
01012       leafDest = tmpPtr;
01013       leafDest->clear();
01014       nodeDest->clear();  
01015     } while (!leafSrc->empty());
01016 
01017     if ( nodeSrc != &nodes ) {
01018       nodes = nodesTmp;
01019     }
01020     nodesTmp.clear();
01021 
01022     
01023     for (unsigned int i = 0; i < nodes.size(); i++) {
01024       nodes[i].setWeight(1);
01025     }
01026 
01027     PROF_P2O_LOCAL_END
01028   }
01029 
01030   
01031   
01032   int appendCompleteRegion(TreeNode first, TreeNode second, 
01033       std::vector<ot::TreeNode>& newNodes, bool includeMin, bool includeMax) {
01034 
01035     PROF_COMPLETE_REGION_BEGIN
01036 
01037       unsigned int dim = first.getDim();
01038     unsigned int maxDepth = first.getMaxDepth();
01039 
01040     TreeNode min = ((first < second) ? first : second);
01041 
01042     if(includeMin) {
01043       newNodes.push_back(min);
01044     }
01045 
01046     if (first == second) {
01047       PROF_COMPLETE_REGION_END
01048     }
01049 
01050     TreeNode max = ((first > second) ? first : second);
01051 
01052     
01053     TreeNode nca = getNCA(min,max);
01054 
01055     if(min == nca) {
01056       
01057       ot::TreeNode tmpAncestor = min;
01058       bool repeatLoop;
01059       do {
01060         repeatLoop = false;
01061         std::vector<ot::TreeNode> tmpChildList;
01062         tmpAncestor.addChildren(tmpChildList);
01063         for(unsigned int j = 0; j < tmpChildList.size(); j++) {
01064 #ifdef __DEBUG_OCT__
01065           assert(areComparable(tmpChildList[j], max));
01066 #endif
01067           if( (tmpChildList[j] < max) &&
01068               (!(tmpChildList[j].isAncestor(max))) ) {
01069             newNodes.push_back(tmpChildList[j]);
01070           } else if(tmpChildList[j].isAncestor(max)) {
01071             tmpAncestor = tmpChildList[j];
01072             repeatLoop = true;
01073             break;
01074           } else {
01075             assert(tmpChildList[j] == max);
01076             break;
01077           }
01078         }
01079       } while (repeatLoop);
01080     } else {
01081       TreeNode currentNode = min;    
01082       while(currentNode > nca) {
01083         TreeNode parentOfCurrent = currentNode.getParent();
01084         std::vector<ot::TreeNode> myBros;
01085         parentOfCurrent.addChildren(myBros);
01086         for(unsigned int i = 0; i < myBros.size(); i++) {
01087 #ifdef __DEBUG_OCT__
01088           assert(areComparable(myBros[i], max));
01089 #endif
01090           if( (myBros[i] > min) &&
01091               (myBros[i] < max) && (!(myBros[i].isAncestor(max))) ) {
01092             
01093             newNodes.push_back(myBros[i]);
01094           } else if(myBros[i].isAncestor(max)) {
01095             
01096             
01097             
01098             
01099             
01100             
01101             ot::TreeNode tmpAncestor = myBros[i];
01102             bool repeatLoop;
01103             do {
01104               repeatLoop = false;
01105               std::vector<ot::TreeNode> tmpChildList;
01106               tmpAncestor.addChildren(tmpChildList);
01107               for(unsigned int j = 0; j < tmpChildList.size(); j++) {
01108 #ifdef __DEBUG_OCT__
01109                 assert(areComparable(tmpChildList[j], max));
01110 #endif
01111                 if( (tmpChildList[j] < max) &&
01112                     (!(tmpChildList[j].isAncestor(max))) ) {
01113                   newNodes.push_back(tmpChildList[j]);
01114                 } else if(tmpChildList[j].isAncestor(max)) {
01115                   tmpAncestor = tmpChildList[j];
01116                   repeatLoop = true;
01117                   break;
01118                 } else {
01119                   assert(tmpChildList[j] == max);
01120                   break;
01121                 }
01122               }
01123             } while (repeatLoop);
01124             break;      
01125           }
01126         }
01127         currentNode = parentOfCurrent;
01128       }
01129     }
01130 
01131     if(includeMax) {
01132       newNodes.push_back(max);
01133     }
01134 
01135     PROF_COMPLETE_REGION_END
01136   }
01137 
01138   
01139 
01140 
01141 
01142 
01143 
01144 
01145 
01146 
01147 
01148 
01149 
01150 
01151 
01152 
01153 
01154 
01155 
01156 
01157 
01158 
01159 
01160 
01161 
01162 
01163 
01164 
01165 
01166 
01167 
01168 
01169 
01170 
01171 
01172 
01173 
01174 
01175 
01176 
01177 
01178 
01179 
01180 
01181 
01182 
01183 
01184 
01185 
01186 
01187 
01188 
01189 
01190 
01191 
01192 
01193 
01194 
01195 
01196 
01197 
01198 
01199 
01200 
01201 
01202 
01203 
01204 
01205 
01206 }
01207