Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

OctFunctions.C

Go to the documentation of this file.
00001 
00011 #include "TreeNode.h"
00012 #include "parUtils.h"
00013 #include "seqUtils.h"
00014 #include <cstring>
00015 
00016 #ifdef __DEBUG__
00017 #ifndef __DEBUG_OCT__
00018 #define __DEBUG_OCT__
00019 #endif
00020 #endif
00021 
00022 #ifdef __DEBUG_OCT__
00023 #ifndef __MEASURE_FLAG_NODES__
00024 #define __MEASURE_FLAG_NODES__
00025 #endif
00026 #endif
00027 
00028 namespace ot {
00029 
00030   //inOct1 and inOct2 must be globally sorted and complete
00031   int mergeOctrees(std::vector<TreeNode>& inOct1, std::vector<TreeNode>& inOct2,
00032       std::vector<TreeNode>& outOct, MPI_Comm comm) {
00033     PROF_MERGE_OCTREES_BEGIN
00034 
00035       int rank;
00036     int npes;
00037     MPI_Comm_rank(comm, &rank);
00038     MPI_Comm_size(comm, &npes);
00039 
00040     assert(!(inOct1.empty()));
00041 
00042     //Get the mins of inOct1
00043     ot::TreeNode* mins = new ot::TreeNode[npes];
00044     par::Mpi_Allgather<ot::TreeNode>(&(*(inOct1.begin())), mins, 1, comm);
00045 
00046     //Distribute inOct2 to align with inOct1
00047     int* sendCnts = new int[npes];
00048     for(int i = 0; i < npes; i++) {
00049       sendCnts[i] = 0;
00050     }//end for i 
00051 
00052     if(!(inOct2.empty())) {
00053       //Since inOct1 is complete and sorted, mins[0] is a deepest first
00054       //decendant of root.
00055       //Since inOct2 is also complete and sorted, on the first non-empty processor
00056       //inOct2[0] will also be a deepest first decendant of root.
00057       //Now mins[0] can be an ancestor, decendant or equal to inOct2[0] (global
00058       //first element, so only happens on the first processor). So i=0
00059       //is handled separately. 
00060       int minsCnt = 0;
00061       if(inOct2[0] < mins[0]) {
00062         sendCnts[0]++;
00063       } else {
00064         while( (minsCnt < npes) && (mins[minsCnt] <= inOct2[0]) ) {
00065           minsCnt++;
00066         }
00067         sendCnts[minsCnt - 1]++;
00068       }
00069       for(int i = 1; i < inOct2.size(); i++) {
00070         while( (minsCnt < npes) && (mins[minsCnt] <= inOct2[i]) ) {
00071           minsCnt++;
00072         }
00073         sendCnts[minsCnt - 1]++;
00074       }//end for i
00075     }
00076 
00077     int * recvCnts = new int[npes];
00078     par::Mpi_Alltoall<int>(sendCnts, recvCnts, 1, comm);
00079 
00080     int * sendDisps = new int[npes];
00081     int * recvDisps = new int[npes];
00082     sendDisps[0] = 0;
00083     recvDisps[0] = 0;
00084     for(int i = 1; i < npes; i++) {
00085       sendDisps[i] = sendDisps[i - 1] + sendCnts[i - 1];
00086       recvDisps[i] = recvDisps[i - 1] + recvCnts[i - 1];
00087     }//end for i
00088 
00089     outOct.resize(recvDisps[npes - 1] + recvCnts[npes - 1]);
00090 
00091     par::Mpi_Alltoallv_sparse<ot::TreeNode>(&(*(inOct2.begin())), sendCnts, sendDisps,
00092         &(*(outOct.begin())), recvCnts, recvDisps, comm);
00093 
00094     delete [] mins;
00095     delete [] sendCnts;
00096     delete [] sendDisps;
00097     delete [] recvCnts;
00098     delete [] recvDisps;
00099 
00100     //Merge outOct and inOct1 locally
00101     std::vector<ot::TreeNode> tmpOct;
00102     int cnt1 = 0;
00103     int cnt2 = 0;
00104     while( (cnt1 < inOct1.size()) && (cnt2 < outOct.size()) ) {
00105       if(inOct1[cnt1] < outOct[cnt2]) {
00106         tmpOct.push_back(inOct1[cnt1]);
00107         cnt1++;
00108       } else {
00109         tmpOct.push_back(outOct[cnt2]);
00110         cnt2++;
00111       }
00112     }
00113 
00114     while(cnt1 < inOct1.size()) {
00115       tmpOct.push_back(inOct1[cnt1]);
00116       cnt1++;
00117     }
00118 
00119     while(cnt2 < outOct.size()) {
00120       tmpOct.push_back(outOct[cnt2]);
00121       cnt2++;
00122     }
00123 
00124     outOct = tmpOct;
00125     tmpOct.clear();
00126 
00127     //Linearize outOct. This will remove duplicates and ancestors
00128     lineariseList(outOct, comm);
00129 
00130     PROF_MERGE_OCTREES_END
00131   }//end function
00132 
00133   int refineOctree(const std::vector<ot::TreeNode> & inp,
00134       std::vector<ot::TreeNode> &out) {
00135     out.clear();
00136     for(unsigned int i = 0; i < inp.size(); i++) {
00137       if(inp[i].getLevel() < inp[i].getMaxDepth()) {
00138         inp[i].addChildren(out);
00139       } else {
00140         out.push_back(inp[i]);
00141       }
00142     }
00143     return 1;
00144   }//end function 
00145 
00146   int refineAndPartitionOctree(const std::vector<ot::TreeNode> & inp,
00147       std::vector<ot::TreeNode> &out, MPI_Comm comm) {
00148     refineOctree(inp,out);
00149     par::partitionW<ot::TreeNode>(out, NULL,comm);
00150     return 1;
00151   }//end function
00152 
00153   int createRegularOctree(std::vector<ot::TreeNode>& out, unsigned int lev, 
00154       unsigned int dim, unsigned int maxDepth, MPI_Comm comm) {
00155     TreeNode root(dim,maxDepth);
00156     out.clear();
00157     int rank;
00158     MPI_Comm_rank(comm,&rank);
00159     if(!rank) {
00160       out.push_back(root);
00161     }
00162     for(int i = 0; i < lev; i++) {
00163       std::vector<ot::TreeNode> tmp;
00164       refineAndPartitionOctree(out,tmp,comm);
00165       out = tmp;
00166       tmp.clear();
00167     }
00168     return 1;
00169   }
00170 
00171   //list must be sorted.
00172   int lineariseList(std::vector<ot::TreeNode> & list, bool skipLast) {
00173     std::vector<ot::TreeNode> tmp;
00174     if(!(list.empty())) {
00175       for(unsigned int i = 0; i < (list.size()-1); i++) {
00176 #ifdef __DEBUG_OCT__
00177         assert(areComparable(list[i], list[i+1]));
00178 #endif
00179         if( (!(list[i].isAncestor(list[i+1]))) && (list[i] != list[i+1]) ) {
00180           tmp.push_back(list[i]);
00181         }
00182       }
00183       if(!skipLast) {
00184         tmp.push_back(list[list.size()-1]);
00185       }
00186     }
00187 
00188     list = tmp;
00189     tmp.clear();
00190     return 1;
00191   }//end fn.
00192 
00193   //list must be sorted.
00194   int lineariseList(std::vector<ot::TreeNode> & list, MPI_Comm comm) {
00195     int rank,size;
00196     MPI_Comm_rank(comm,&rank);
00197     MPI_Comm_size(comm,&size);
00198 
00199     if(size == 1) {
00200       lineariseList(list, false);
00201       return 1;
00202     }
00203 
00204     //Remove empty processors...
00205     int new_rank, new_size; 
00206     MPI_Comm   new_comm;
00207     par::splitComm2way(list.empty(), &new_comm,comm);
00208 
00209     MPI_Comm_rank (new_comm, &new_rank);
00210     MPI_Comm_size (new_comm, &new_size);
00211     if(!list.empty()) {
00212       //Send the last octant to the next processor. 
00213       ot::TreeNode lastOctant = list[list.size()-1];
00214       ot::TreeNode lastOnPrev;
00215 
00216       MPI_Request recvRequest;
00217       MPI_Request sendRequest;
00218 
00219       if(new_rank) {
00220         par::Mpi_Irecv<ot::TreeNode>( &lastOnPrev, 1, new_rank-1, 1, new_comm, &recvRequest);
00221       }
00222       if(new_rank < (new_size-1)) {
00223         par::Mpi_Issend<ot::TreeNode>( &lastOctant, 1, new_rank+1, 1, new_comm,  &sendRequest);
00224       }
00225 
00226       if(new_rank) {
00227         std::vector<ot::TreeNode> tmp(list.size()+1);
00228         for(int i = 0; i < list.size(); i++) {
00229           tmp[i+1] = list[i];
00230         }
00231 
00232         MPI_Status statusWait;
00233         MPI_Wait(&recvRequest, &statusWait);
00234         tmp[0] = lastOnPrev;
00235 
00236         list = tmp;
00237         tmp.clear();
00238       }
00239 
00240       if(new_rank == (new_size-1)) {
00241         lineariseList(list, false);
00242       }else {
00243         lineariseList(list, true);
00244       }
00245 
00246       if(new_rank < (new_size-1)) {
00247         MPI_Status statusWait;
00248         MPI_Wait(&sendRequest, &statusWait);
00249       }      
00250     }//not empty procs only
00251 
00252     return 1;
00253   }//end fn.
00254 
00255   bool lessThanUsingWts ( TreeNode  const & a,  TreeNode  const & b)  {
00256     if(a == b){
00257       return (a.getWeight() < b.getWeight());
00258     }else {
00259       return (a < b);
00260     }
00261   }//end function
00262 
00263   unsigned int getNodeWeight(const TreeNode * t) {
00264     return t->getWeight();
00265   }
00266 
00267   bool bPartComparator(TreeNode a, TreeNode b) {
00268     if ( (a.getWeight()) != (b.getWeight()) ) {
00269       //pick denser
00270       return( (a.getWeight()) > (b.getWeight()) );
00271     } else if (a.getWeight() > 1) {
00272       //pick finer
00273       return( (a.getLevel()) > (b.getLevel()) );
00274     } else {
00275       //pick coarser
00276       return( (a.getLevel()) < (b.getLevel()) );
00277     }
00278   }//end function
00279 
00280   //If one of first and second is an ancestor of the other, it is returned.  
00281   TreeNode getNCA(TreeNode first, TreeNode second) {
00282 #ifdef __DEBUG_OCT__
00283     assert(areComparable(first,second));
00284     assert(first != second);
00285 #endif
00286     unsigned int fx = first.getX();
00287     unsigned int sx = second.getX();
00288     unsigned int fy = first.getY();
00289     unsigned int sy = second.getY();
00290     unsigned int fz = first.getZ();
00291     unsigned int sz = second.getZ();
00292     unsigned int maxDepth = first.getMaxDepth(); 
00293     unsigned int dim = first.getDim(); 
00294     unsigned int maxDiff = (unsigned int)(std::max((std::max((fx^sx),(fy^sy))),(fz^sz)));
00295     unsigned int maxDiffBinLen = binOp::binLength(maxDiff);
00296     //Eliminate the last maxDiffBinLen bits.
00297     unsigned int ncaX = ((fx>>maxDiffBinLen)<<maxDiffBinLen);
00298     unsigned int ncaY = ((fy>>maxDiffBinLen)<<maxDiffBinLen);
00299     unsigned int ncaZ = ((fz>>maxDiffBinLen)<<maxDiffBinLen);
00300     unsigned int ncaLev = (maxDepth - maxDiffBinLen);
00301     TreeNode nca(ncaX,ncaY,ncaZ,ncaLev,dim,maxDepth);
00302     return nca;
00303   }//end function
00304 
00305   int readPtsFromFile(char* filename, std::vector<double>& pts) {
00306     FILE* infile;
00307     infile = fopen(filename,"rb");
00308     unsigned int temp;
00309     fread(&temp,sizeof(unsigned int),1,infile);
00310 
00311     double* ptsTemp = NULL;
00312 
00313     if(temp) {
00314       ptsTemp = new double[3*temp];
00315       fread(ptsTemp, sizeof(double),3*temp,infile);
00316     }
00317 
00318     fclose(infile);
00319     pts.resize(3*temp);
00320 
00321     for (int i=0; i < (3*temp); i++) {
00322       pts[i] = ptsTemp[i];
00323     }//end for
00324 
00325     if(ptsTemp) {
00326       delete [] ptsTemp;
00327       ptsTemp = NULL;
00328     }
00329 
00330     return 1;
00331   }//end function
00332 
00333   int readDataPtsFromFile(char* filename, std::vector<double>& pts,
00334       std::vector<double>& ptVals) {
00335     // file format:
00336     // 4 bytes (unsigned int?)  number of points N
00337     // 3*N*8 bytes coordinates of point (double);  X1 Y1 Z1 X2 Y2 Z2 ....
00338     // N*8 weights attached to points (double)
00339     // 
00340     FILE* infile;
00341     unsigned int temp;
00342 
00343     assert(sizeof(double)==8);
00344     assert(sizeof(unsigned int)==4);
00345 
00346     infile = fopen(filename,"rb");
00347     fread(&temp,sizeof(unsigned int),1,infile);
00348 
00349     pts.resize(3*temp);
00350     ptVals.resize(temp);
00351 
00352     fread(&(pts[0]), sizeof(double),3*temp,infile);
00353     fread(&(ptVals[0]), sizeof(double),temp,infile);
00354 
00355     fclose(infile);
00356 
00357     return 1;
00358   }//end function
00359 
00360   int writePtsToFile(char* filename, std::vector<double>& pts) {
00361     FILE* outfile = fopen(filename,"wb");
00362     unsigned int ptsLen = pts.size();
00363     double * ptsTemp = NULL;
00364     if(!pts.empty()) {
00365       ptsTemp = (&(*(pts.begin())));
00366     }
00367 
00368     if (ptsLen >0) {
00369       unsigned int numPts = ptsLen/3;
00370       fwrite(&numPts,sizeof(unsigned int),1,outfile);
00371       fwrite(ptsTemp, sizeof(double),ptsLen,outfile);
00372     }
00373     fclose(outfile);
00374     return 1;
00375   }//end function
00376 
00377   int writeDataPtsToFile(char* filename, std::vector<double>& pts,
00378       std::vector<double>& data) {
00379     FILE* outfile = fopen(filename,"wb");
00380     unsigned int ptsLen = pts.size();
00381     unsigned int numPts=ptsLen/3;
00382 
00383     assert(sizeof(double)==8);
00384     assert(sizeof(unsigned int)==4);
00385 
00386     fwrite(&numPts,sizeof(unsigned int),1,outfile);
00387     fwrite(&(pts[0]), sizeof(double),ptsLen,outfile);
00388     fwrite(&(data[0]), sizeof(double),numPts,outfile);
00389     fclose(outfile);
00390     return 1;
00391   }//end function
00392 
00393   int readNodesFromFile (char* filename, std::vector<TreeNode > & nodes) {
00394     FILE* infile = fopen(filename,"r");
00395     unsigned int numNode;
00396     unsigned int dim, maxDepth;
00397     fscanf(infile,"%u",&dim);
00398     fscanf(infile,"%u",&maxDepth); 
00399     fscanf(infile,"%u",&numNode);
00400     nodes.resize(numNode) ;   
00401 
00402     for (unsigned int i =0; i< nodes.size(); i++) {
00403       unsigned int x,y,z,d;
00404       fscanf(infile,"%u",&x);
00405       fscanf(infile,"%u",&y);
00406       fscanf(infile,"%u",&z);
00407       fscanf(infile,"%u",&d); 
00408       nodes[i] = ot::TreeNode (x,y,z,d, dim, maxDepth);
00409     }
00410     fclose(infile);
00411     return 1;
00412   }//end function
00413 
00414   int writeNodesToFile (char* filename, const std::vector<TreeNode> & nodes) {
00415     FILE* outfile = fopen(filename,"w");
00416     if (!nodes.empty()) {
00417       unsigned int dim = nodes[0].getDim();
00418       unsigned int maxDepth = nodes[0].getMaxDepth();
00419       fprintf(outfile,"%u %u\n",dim,maxDepth); 
00420       fprintf(outfile,"%u\n",static_cast<unsigned int>(nodes.size()));
00421       for (unsigned int i =0; i< nodes.size(); i++) {
00422         assert(nodes[i].getDim() == dim);
00423         assert(nodes[i].getMaxDepth() == maxDepth);
00424         fprintf(outfile,"%u %u %u %u\n",nodes[i].getX(),nodes[i].getY(),nodes[i].getZ(),nodes[i].getLevel());
00425       }
00426     }
00427     fclose(outfile);
00428     return 1;
00429   }//end function
00430 
00431   bool areComparable (TreeNode first, TreeNode second) {
00432     return( ( (first.getDim()) == (second.getDim()) ) && ( (first.getMaxDepth()) == (second.getMaxDepth()) ) );
00433   }
00434 
00435   int int2str(int n,char*s) {
00436     int tmpd[20];
00437     int i=0;
00438     int j;   
00439     if (n==0) {
00440       strcpy(s,"0\0");
00441     } else {
00442       while (n>0) {
00443         tmpd[i]= (n%10);
00444         n= (int)(n/10);
00445         i++;
00446       }
00447       for (j=i-1;j>=0;j--) {
00448         s[i-j-1]=int2char(tmpd[j]);
00449       }
00450       s[i]='\0';
00451     }
00452     return 1;
00453   }//end function
00454 
00455   char int2char(int d) {
00456     switch (d) {
00457       case 0: return '0';
00458       case 1: return '1';
00459       case 2: return '2';
00460       case 3: return '3';
00461       case 4: return '4';
00462       case 5: return '5';
00463       case 6: return '6';
00464       case 7: return '7';
00465       case 8: return '8';
00466       case 9: return '9';
00467       default: return '\0';
00468     }
00469   }//end function
00470 
00471   // This will add boundary nodes and will also embed the octree one level higher
00472   // to enable the addition of the boundary nodes.
00473   void addBoundaryNodesType2(std::vector<ot::TreeNode> &in, 
00474       std::vector<ot::TreeNode>& bdy, 
00475       unsigned int dim, unsigned int maxDepth) {
00476 
00477     PROF_ADD_BDY_BEGIN
00478       assert(bdy.empty());
00479 
00480     for (unsigned int i = 0; i < in.size(); i++) {
00481       // get basic info ...
00482       unsigned int d   = in[i].getLevel();
00483       unsigned int x = in[i].getX();
00484       unsigned int y = in[i].getY();
00485       unsigned int z = in[i].getZ();
00486 
00487       unsigned char bdyFlags;
00488       // check if this is a boundary octant or not ...
00489       if ( in[i].isBoundaryOctant(ot::TreeNode::POSITIVE, &bdyFlags) ) {
00490         // bdy flags tells us which octants to add ...
00491 
00492         //NOTE: == is important since a&(b+c) will be true if
00493         //a=b, a=c and a=b+c
00494 
00495         // +x and more ... add additional octant in +x dir
00496         if ( bdyFlags & ot::TreeNode::X_POS_BDY ) {
00497           bdy.push_back(ot::TreeNode( (1u << maxDepth), y, z, (d+1), dim, maxDepth+1));
00498         }
00499 
00500         // +y and more ... add additional octant in +y dir
00501         if ( bdyFlags & ot::TreeNode::Y_POS_BDY ) {
00502           bdy.push_back(ot::TreeNode(x, (1u << maxDepth), z,
00503                 (d+1), dim, maxDepth+1));
00504         }
00505 
00506         // +z and more ... add additional octant in +z dir
00507         if ( bdyFlags & ot::TreeNode::Z_POS_BDY ) {
00508           bdy.push_back(ot::TreeNode(x, y, (1u << maxDepth),
00509                 (d+1), dim, maxDepth+1));
00510         }
00511 
00512         //+x+y and more
00513         if ( (bdyFlags & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY)) == 
00514             (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY) ) { 
00515           bdy.push_back(ot::TreeNode((1u << maxDepth),(1u << maxDepth), z,
00516                 (d+1), dim, maxDepth+1));
00517         }
00518 
00519         //+x+z and more
00520         if ( (bdyFlags & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Z_POS_BDY)) ==
00521             (ot::TreeNode::X_POS_BDY + ot::TreeNode::Z_POS_BDY) ) {
00522           bdy.push_back(ot::TreeNode((1u << maxDepth), y, (1u << maxDepth),
00523                 (d+1), dim, maxDepth+1));
00524         }
00525 
00526         //+y+z and more
00527         if ( (bdyFlags & (ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY)) == 
00528             (ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY) ) {
00529           bdy.push_back(ot::TreeNode(x, (1u << maxDepth), (1u << maxDepth),
00530                 (d+1), dim, maxDepth+1));
00531         }
00532 
00533         // if global corner ...
00534         //+x +y and +z only
00535         if ( (bdyFlags & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY +
00536                 ot::TreeNode::Z_POS_BDY)) == (ot::TreeNode::X_POS_BDY +
00537                 ot::TreeNode::Y_POS_BDY +  ot::TreeNode::Z_POS_BDY) ) {
00538           bdy.push_back(ot::TreeNode((1u << maxDepth), (1u << maxDepth), (1u << maxDepth),
00539                 (d+1), dim, maxDepth+1));
00540         }
00541       }//end if boundary
00542 
00543       // Embed the actual octant in one level higher ...
00544       in[i] = ot::TreeNode(x, y, z, d+1, dim, maxDepth+1);
00545 
00546     }//end for i
00547 
00548     // A Parallel Sort for the bdy nodes follows.  
00549     //Then in and bdy will be merged.
00550 
00551     PROF_ADD_BDY_END
00552   }//end function
00553 
00554   void markBoundaryNodesAtAllLevels(std::vector<ot::TreeNode>& finestOctree, unsigned int nlevels,
00555       std::vector<ot::TreeNode>* coarserOctrees, unsigned int maxDepth) {
00556     //coarser octrees
00557     for(int lev = 0; lev < (nlevels - 1); lev++) {
00558       for(unsigned int i = 0; i < coarserOctrees[lev].size(); i++) {
00559         unsigned int myX = coarserOctrees[lev][i].getX();
00560         unsigned int myY = coarserOctrees[lev][i].getY();
00561         unsigned int myZ = coarserOctrees[lev][i].getZ();
00562         if( (myX == (1u << (maxDepth - 1))) ||
00563             (myY == (1u << (maxDepth - 1))) ||
00564             (myZ == (1u << (maxDepth - 1))) ) {
00565           coarserOctrees[lev][i].orFlag(ot::TreeNode::BOUNDARY);
00566         }
00567       }//end for i
00568     }//end for lev
00569 
00570     //finest octree
00571     for(unsigned int i = 0; i < finestOctree.size(); i++) {
00572       unsigned int myX = finestOctree[i].getX();
00573       unsigned int myY = finestOctree[i].getY();
00574       unsigned int myZ = finestOctree[i].getZ();
00575       if( (myX == (1u << (maxDepth - 1))) ||
00576           (myY == (1u << (maxDepth - 1))) ||
00577           (myZ == (1u << (maxDepth - 1))) ) {
00578         finestOctree[i].orFlag(ot::TreeNode::BOUNDARY);
00579       }
00580     }//end for i
00581 
00582   }//end function
00583 
00584   void discardExtraBoundaryOctants(std::vector<ot::TreeNode>& in,
00585       unsigned int dim, unsigned int maxDepth) {
00586 
00587     std::vector<ot::TreeNode> tmpOctree;
00588     for(int i = 0; i < in.size(); i++) {
00589       unsigned int myX = in[i].getX();
00590       unsigned int myY = in[i].getY();
00591       unsigned int myZ = in[i].getZ();
00592       if( (myX <= (1u << (maxDepth - 1))) &&
00593           (myY <= (1u << (maxDepth - 1))) &&
00594           (myZ <= (1u << (maxDepth - 1))) ) {
00595         tmpOctree.push_back(in[i]);
00596       }
00597     }//end for i
00598 
00599     if(tmpOctree.size() < in.size()) {
00600       in = tmpOctree;
00601     }
00602   }//end function
00603 
00604   //A simple implementation for now. This just calls flagNodesType3 for all
00605   //levels. A smarter implementation would use the fact that regular coarse
00606   //grid nodes remain regular on all finer octrees 
00607   void markHangingNodesAtAllLevels(std::vector<ot::TreeNode>& finestOctree, unsigned int nlevels, 
00608       std::vector<ot::TreeNode>* coarserOctrees, MPI_Comm* activeComms,
00609       unsigned int dim, unsigned int maxDepth) {
00610 
00611     for(int lev = 0; lev < (nlevels - 1); lev++) {
00612       if( !(coarserOctrees[lev].empty()) ) {
00613         ot::flagNodesType3(coarserOctrees[lev], activeComms[lev + 1]);
00614       }
00615     }//end for lev
00616 
00617     if( !(finestOctree.empty()) ) {
00618       ot::flagNodesType3(finestOctree, activeComms[0]);
00619     }
00620 
00621   }//end function
00622 
00623   // This will add boundary nodes and will also embed the octree one level higher
00624   // to enable the addition of the boundary nodes. The positive boundary nodes
00625   // are also marked as BOUNDARY.
00626   void addBoundaryNodesType1(std::vector<ot::TreeNode> &in, 
00627       std::vector<ot::TreeNode>& bdy, 
00628       unsigned int dim, unsigned int maxDepth) {
00629     PROF_ADD_BDY_BEGIN
00630 
00631       assert(bdy.empty());
00632 
00633     for (unsigned int i = 0; i < in.size(); i++) {
00634       // get basic info ...
00635       unsigned int d   = in[i].getLevel();
00636       unsigned int x = in[i].getX();
00637       unsigned int y = in[i].getY();
00638       unsigned int z = in[i].getZ();
00639 
00640       unsigned char bdyFlags;
00641       // check if this is a boundary octant or not ...
00642       if ( in[i].isBoundaryOctant(ot::TreeNode::POSITIVE, &bdyFlags) ) {
00643         // bdy flags tells us which octants to add ...
00644 
00645         //NOTE: == is important since a&(b+c) will be true if
00646         //a=b, a=c and a=b+c
00647 
00648         // +x and more ... add additional octant in +x dir
00649         if ( bdyFlags & ot::TreeNode::X_POS_BDY ) {
00650           bdy.push_back(ot::TreeNode( (1u << maxDepth), y, z, (d+1) |
00651                 ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00652         }
00653 
00654         // +y and more ... add additional octant in +y dir
00655         if ( bdyFlags & ot::TreeNode::Y_POS_BDY ) {
00656           bdy.push_back(ot::TreeNode(x, (1u << maxDepth), z,
00657                 (d+1) | ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00658         }
00659 
00660         // +z and more ... add additional octant in +z dir
00661         if ( bdyFlags & ot::TreeNode::Z_POS_BDY ) {
00662           bdy.push_back(ot::TreeNode(x, y, (1u << maxDepth),
00663                 (d+1) | ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00664         }
00665 
00666         //+x+y and more
00667         if ( (bdyFlags & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY)) == 
00668             (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY) ) { 
00669           bdy.push_back(ot::TreeNode((1u << maxDepth),(1u << maxDepth), z,
00670                 (d+1) | ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00671         }
00672 
00673         //+x+z and more
00674         if ( (bdyFlags & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Z_POS_BDY)) ==
00675             (ot::TreeNode::X_POS_BDY + ot::TreeNode::Z_POS_BDY) ) {
00676           bdy.push_back(ot::TreeNode((1u << maxDepth), y, (1u << maxDepth),
00677                 (d+1) | ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00678         }
00679 
00680         //+y+z and more
00681         if ( (bdyFlags & (ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY)) == 
00682             (ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY) ) {
00683           bdy.push_back(ot::TreeNode(x, (1u << maxDepth),(1u << maxDepth),
00684                 (d+1) | ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00685         }
00686 
00687         // if global corner ...
00688         //+x +y and +z only
00689         if ( (bdyFlags & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY +
00690                 ot::TreeNode::Z_POS_BDY)) == (ot::TreeNode::X_POS_BDY +
00691                 ot::TreeNode::Y_POS_BDY +  ot::TreeNode::Z_POS_BDY) ) {
00692           bdy.push_back(ot::TreeNode((1u << maxDepth), (1u << maxDepth), (1u << maxDepth),
00693                 (d+1) | ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00694         }
00695       }//end if boundary
00696 
00697       // Embed the actual octant in one level higher ...
00698       in[i] = ot::TreeNode(x, y, z, d+1, dim, maxDepth+1);
00699 
00700     }//end for i
00701 
00702     // A Parallel Sort for the bdy nodes follows in the constructor.  
00703     //Then in and bdy will be merged.
00704 
00705     PROF_ADD_BDY_END
00706   }//end function
00707 
00708   void flagNodesType1(std::vector<ot::TreeNode> & in, MPI_Comm comm) {
00709 
00710 #ifdef __PROF_WITH_BARRIER__
00711     MPI_Barrier(comm);
00712 #endif
00713 
00714     PROF_MARK_HANGING_BEGIN
00715 
00716       PROF_FLN_STAGE1_BEGIN
00717 
00718       int npes;
00719     int rank;
00720 
00721     MPI_Comm_rank(comm, &rank);
00722     MPI_Comm_size(comm, &npes);
00723 
00724     std::vector<unsigned int > keysCount(in.size());
00725     std::vector<ot::TreeNode > keys;
00726 
00727     assert(!in.empty());
00728     unsigned int maxD = in[0].getMaxDepth();
00729     unsigned int dim  = in[0].getDim();
00730     ot::TreeNode* inPtr = (&(*(in.begin())));
00731 
00732     //1. Generate Keys
00733     for (int i = 0; i < in.size(); i++) {
00734       keysCount[i] = 0;
00735       unsigned int myLev = inPtr[i].getLevel();
00736       if (myLev == 1) {
00737         continue;
00738       }
00739       unsigned int childNum = inPtr[i].getChildNumber();   
00740       unsigned int mySz = (1u << (maxD - myLev));
00741       unsigned int myX = inPtr[i].getX();
00742       unsigned int myY = inPtr[i].getY();
00743       unsigned int myZ = inPtr[i].getZ();    
00744 
00745       switch (childNum) {
00746         case 0:
00747           {
00748             break;
00749           }
00750         case 7:
00751           {
00752             break;
00753           }
00754         case 1:
00755           {
00756             //-y,-z,-yz
00757             if (myY) {
00758               TreeNode tmp(myX,myY-mySz,myZ,myLev,dim,maxD);
00759               keys.push_back(tmp.getParent());
00760               keysCount[i]++;
00761             }
00762             if (myZ) {
00763               TreeNode tmp(myX,myY,myZ-mySz,myLev,dim,maxD);
00764               keys.push_back(tmp.getParent());
00765               keysCount[i]++;
00766             }
00767             if (myY && myZ) {
00768               TreeNode tmp(myX,myY-mySz,myZ-mySz,myLev,dim,maxD);         
00769               keys.push_back(tmp.getParent());
00770               keysCount[i]++;
00771             }
00772             break;
00773           }
00774         case 2:
00775           {
00776             //-x,-z,-xz
00777             if (myX) {
00778               TreeNode tmp(myX-mySz,myY,myZ,myLev,dim,maxD);
00779               keys.push_back(tmp.getParent());
00780               keysCount[i]++;
00781             }
00782             if (myZ) {
00783               TreeNode tmp(myX,myY,myZ-mySz,myLev,dim,maxD);
00784               keys.push_back(tmp.getParent());
00785               keysCount[i]++;
00786             }
00787             if (myX && myZ) {
00788               TreeNode tmp(myX-mySz,myY,myZ-mySz,myLev,dim,maxD);
00789               keys.push_back(tmp.getParent());
00790               keysCount[i]++;
00791             }
00792             break;
00793           }
00794         case 3:
00795           {
00796             //-z
00797             if (myZ) {
00798               TreeNode tmp(myX,myY,myZ-mySz,myLev,dim,maxD);
00799               keys.push_back(tmp.getParent());
00800               keysCount[i]++;
00801             }
00802             break;
00803           }
00804         case 4:
00805           {
00806             //-x,-y,-xy
00807             if (myX) {
00808               TreeNode tmp(myX-mySz,myY,myZ,myLev,dim,maxD);
00809               keys.push_back(tmp.getParent());
00810               keysCount[i]++;
00811             }
00812             if (myY) {
00813               TreeNode tmp(myX,myY-mySz,myZ,myLev,dim,maxD);
00814               keys.push_back(tmp.getParent());
00815               keysCount[i]++;
00816             }
00817             if (myX && myY) {
00818               TreeNode tmp(myX-mySz,myY-mySz,myZ,myLev,dim,maxD);
00819               keys.push_back(tmp.getParent());
00820               keysCount[i]++;
00821             }
00822             break;
00823           }
00824         case 5:
00825           {
00826             //-y
00827             if (myY) {
00828               TreeNode tmp(myX,myY-mySz,myZ,myLev,dim,maxD);
00829               keys.push_back(tmp.getParent());
00830               keysCount[i]++;
00831             }
00832             break;
00833           }
00834         case 6:
00835           {
00836             //-x
00837             if (myX) {
00838               TreeNode tmp(myX-mySz,myY,myZ,myLev,dim,maxD);
00839               keys.push_back(tmp.getParent());    
00840               keysCount[i]++;
00841             }
00842             break;      
00843           }
00844         default: assert(false);
00845       }//end switch
00846     }//end for i        
00847 
00848     PROF_FLN_STAGE1_END
00849       PROF_FLN_STAGE2_BEGIN
00850 
00851       // allocate memory for the mins array
00852       std::vector<ot::TreeNode> mins(npes);
00853 
00854     par::Mpi_Allgather<ot::TreeNode>(inPtr, &(*(mins.begin())), 1, comm);
00855 
00856     unsigned int *part = NULL;
00857     if(!keys.empty()) {
00858       part = new unsigned int[keys.size()];    
00859     }
00860 
00861     for (unsigned int i = 0; i < keys.size(); i++) {
00862       unsigned int idx;
00863       //maxLB returns the last index in a sorted array
00864       //such that a[ind] <= key and  a[index +1] > key
00865       bool found = seq::maxLowerBound<TreeNode >(mins, keys[i], idx, NULL, NULL);
00866       if (!found ) {
00867         part[i] = rank;
00868       } else {
00869         part[i] = idx;
00870       }
00871     }//end for i
00872     mins.clear();
00873 
00874     PROF_FLN_STAGE2_END
00875       PROF_FLN_STAGE3_BEGIN
00876 
00877       int *numKeysSend = new int[npes];
00878     int *numKeysRecv = new int[npes];
00879     for (int i = 0; i < npes; i++) {
00880       numKeysSend[i] = 0;
00881     }
00882 
00883     // calculate the number of keys to send ...
00884     for (unsigned int i = 0; i < keys.size(); i++) {
00885       assert(part[i] < npes);
00886       numKeysSend[part[i]]++;
00887     }
00888 
00889     // Now do an All2All to get inumKeysRecv
00890     par::Mpi_Alltoall<int>(numKeysSend, numKeysRecv, 1, comm);
00891 
00892     unsigned int totalKeys=0; // total number of local keys ...
00893     for (int i = 0; i < npes; i++) {
00894       totalKeys += numKeysRecv[i];
00895     }
00896 
00897 #ifdef __MEASURE_FLAG_NODES__
00898     MPI_Barrier(comm);
00899     int numProcsSend = 0;
00900     int numProcsRecv = 0;
00901     for(int i = 0; i < npes; i++) {
00902       if(numKeysSend[i]) {
00903         numProcsSend++;
00904       }
00905       if(numKeysRecv[i]) {
00906         numProcsRecv++;
00907       }
00908     }//end for i
00909     int* allNumProcsSend = new int[npes];
00910     int* allNumProcsRecv = new int[npes];
00911     unsigned int* allKeysSz = new unsigned int[npes];
00912     unsigned int* allTotalRecv = new unsigned int[npes]; 
00913     unsigned int localKeysSize = keys.size(); 
00914     par::Mpi_Gather<int>(&numProcsSend, allNumProcsSend, 1, 0, comm);
00915     par::Mpi_Gather<int>(&numProcsRecv, allNumProcsRecv, 1, 0, comm);
00916     par::Mpi_Gather<unsigned int>(&localKeysSize, allKeysSz, 1, 0, comm);
00917     par::Mpi_Gather<unsigned int>(&totalKeys, allTotalRecv, 1, 0, comm);
00918     if(!rank) {
00919       for(int i = 0; i < npes; i++) {
00920         std::cout<<"In Flag Nodes:  allNumProcsSend["<<i<<"] = "<<allNumProcsSend[i]<<std::endl;
00921         std::cout<<"In Flag Nodes:  allNumProcsRecv["<<i<<"] = "<<allNumProcsRecv[i]<<std::endl;
00922         std::cout<<"In Flag Nodes:  allKeysSz["<<i<<"] = "<<allKeysSz[i]<<std::endl;
00923         std::cout<<"In Flag Nodes:  allTotalRecv["<<i<<"] = "<<allTotalRecv[i]<<std::endl;
00924       }//end for i
00925     }
00926 
00927     delete [] allNumProcsSend;
00928     delete [] allNumProcsRecv;
00929     delete [] allKeysSz;
00930     delete [] allTotalRecv;
00931     MPI_Barrier(comm);
00932 #endif
00933 
00934     // create the send and recv buffers ...
00935     std::vector<ot::TreeNode> sendK (keys.size());
00936     std::vector<ot::TreeNode> recvK (totalKeys);
00937     // the mapping ..
00938     unsigned int * comm_map = NULL;
00939     if(!keys.empty()) {
00940       comm_map = new unsigned int [keys.size()];
00941     }
00942 
00943     // Now create sendK
00944     int *sendOffsets = new int[npes]; sendOffsets[0] = 0;
00945     int *recvOffsets = new int[npes]; recvOffsets[0] = 0;
00946     int *numKeysTmp = new int[npes]; numKeysTmp[0] = 0; 
00947 
00948     // compute offsets ...
00949     for (int i = 1; i < npes; i++) {
00950       sendOffsets[i] = sendOffsets[i-1] + numKeysSend[i-1];
00951       recvOffsets[i] = recvOffsets[i-1] + numKeysRecv[i-1];
00952       numKeysTmp[i] = 0; 
00953     }
00954 
00955     for (unsigned int i = 0; i < keys.size(); i++) {
00956       unsigned int ni = numKeysTmp[part[i]];
00957       numKeysTmp[part[i]]++;
00958       // set entry ...
00959       assert((sendOffsets[part[i]] + ni) < keys.size());
00960       sendK[sendOffsets[part[i]] + ni] = keys[i];
00961       // save mapping .. will need it later ...
00962       comm_map[i] = sendOffsets[part[i]] + ni;
00963     }
00964     unsigned int keysSz = keys.size();
00965     keys.clear();
00966 
00967     if(part) {
00968       delete [] part;
00969       part = NULL;
00970     }
00971 
00972     delete [] numKeysTmp;
00973     numKeysTmp = NULL;
00974 
00975     ot::TreeNode* sendKptr = NULL;
00976     ot::TreeNode* recvKptr = NULL;
00977     if(!sendK.empty()) {
00978       sendKptr = &(*(sendK.begin()));
00979     }
00980     if(!recvK.empty()) {
00981       recvKptr = &(*(recvK.begin()));
00982     }
00983 
00984     par::Mpi_Alltoallv_sparse<ot::TreeNode>(sendKptr, numKeysSend, sendOffsets,
00985         recvKptr, numKeysRecv, recvOffsets, comm);
00986 
00987     sendK.clear();
00988 
00989     PROF_FLN_STAGE3_END
00990       PROF_FLN_STAGE4_BEGIN
00991 
00992       std::vector<char>  resSend (totalKeys);
00993     std::vector<char>  resRecv (keysSz);
00994 
00995     for (unsigned int i = 0; i < totalKeys; i++) {
00996       unsigned int idx;
00997       resSend[i] = static_cast<char>(
00998           seq::BinarySearch<ot::TreeNode>(inPtr,
00999             in.size(),  recvK[i], &idx)) ;   
01000     }//end for i
01001     recvK.clear();
01002 
01003     PROF_FLN_STAGE4_END
01004       PROF_FLN_STAGE5_BEGIN
01005 
01006       char* resSendPtr = NULL;
01007     char* resRecvPtr = NULL;
01008     if(!resSend.empty()) {
01009       resSendPtr = &(*(resSend.begin()));
01010     }
01011     if(!resRecv.empty()) {
01012       resRecvPtr = &(*(resRecv.begin()));
01013     }
01014 
01015     par::Mpi_Alltoallv_sparse<char>( resSendPtr, numKeysRecv, recvOffsets, 
01016         resRecvPtr, numKeysSend, sendOffsets, comm);
01017 
01018     resSend.clear();
01019 
01020     delete [] sendOffsets;
01021     sendOffsets = NULL;
01022 
01023     delete [] recvOffsets;
01024     recvOffsets = NULL;
01025 
01026     delete [] numKeysSend;
01027     numKeysSend = NULL;
01028 
01029     delete [] numKeysRecv;
01030     numKeysRecv = NULL;
01031 
01032     unsigned int st = 0;
01033     for (unsigned int i = 0; i < in.size(); i++) {
01034       bool isHanging = false;
01035       for (unsigned int k = 0; k < keysCount[i]; k++) {
01036         assert(comm_map[st + k] < resRecv.size());
01037         if (resRecv[comm_map[st + k]]) {
01038           isHanging = true;
01039           break;
01040         }
01041       }//end for k
01042       if (!isHanging) {
01043         in[i].orFlag(ot::TreeNode::NODE);
01044       }
01045       st += keysCount[i];
01046     }//end for i
01047     assert(st == keysSz);
01048     // Clean up ...
01049     if(comm_map) {
01050       delete [] comm_map; 
01051       comm_map = NULL;
01052     }
01053 
01054     resRecv.clear();
01055 
01056     PROF_FLN_STAGE5_END
01057 
01058       PROF_MARK_HANGING_END
01059   }//end function
01060 
01061 
01062   void flagNodesType2(std::vector<ot::TreeNode> & in, MPI_Comm comm) {
01063 
01064 #ifdef __PROF_WITH_BARRIER__
01065     MPI_Barrier(comm);
01066 #endif
01067 
01068     PROF_MARK_HANGING_BEGIN
01069 
01070       PROF_FLN_STAGE1_BEGIN
01071 
01072       int npes;
01073     int rank;
01074 
01075     MPI_Comm_rank(comm, &rank);
01076     MPI_Comm_size(comm, &npes);
01077 
01078     std::vector<ot::TreeNode > keys;
01079 
01080     assert(!in.empty());
01081     unsigned int maxD = in[0].getMaxDepth();
01082     unsigned int dim  = in[0].getDim();
01083 
01084     ot::TreeNode* inPtr = (&(*in.begin()));
01085 
01086     //1. Generate Keys
01087     //Only true octants need to generate keys. Pseudo-octants don't generate keys.
01088     //This is because the positive boundary nodes can only be edge-hanging, they
01089     //can't be face hanging. And the edge hanging node will be identified by the
01090     //key generated by the true element instead
01091     for (int i = 0; i < in.size(); i++) {
01092       unsigned int myLev = inPtr[i].getLevel();
01093       if (myLev == maxD) {
01094         continue;
01095       }
01096       unsigned int mySz = (1u << (maxD - myLev));
01097       unsigned int myX = inPtr[i].getX();
01098       unsigned int myY = inPtr[i].getY();
01099       unsigned int myZ = inPtr[i].getZ();    
01100 
01101       if( (myX + mySz) <= (1u << (maxD-1)) ) {
01102         //Add C6 of my +x neighbour (Face Hanging)
01103         TreeNode tmp6x((myX + mySz), (myY + (mySz>>1)), (myZ + (mySz>>1)), (myLev + 1), dim, maxD);
01104         keys.push_back(tmp6x);
01105 
01106         //Add C4 of my +x neighbour (Edge Hanging)
01107         TreeNode tmp4x((myX + mySz), myY, (myZ + (mySz>>1)), (myLev + 1), dim, maxD);
01108         keys.push_back(tmp4x);
01109 
01110         //Add C2 of my +x neighbour (Edge Hanging)
01111         TreeNode tmp2x((myX + mySz), (myY + (mySz>>1)), myZ, (myLev + 1), dim, maxD);
01112         keys.push_back(tmp2x);
01113 
01114         if( (myY + mySz) <= (1u << (maxD-1)) ) {
01115           //Add C4 of my +xy neighbour (Edge Hanging)
01116           TreeNode tmp4xy((myX + mySz), (myY + mySz), (myZ + (mySz>>1)), (myLev + 1), dim, maxD);
01117           keys.push_back(tmp4xy);
01118         }
01119 
01120         if( (myZ + mySz) <= (1u << (maxD-1)) ) {
01121           //Add C2 of my +xz neighbour (Edge Hanging)
01122           TreeNode tmp2xz((myX + mySz), (myY + (mySz>>1)), (myZ + mySz), (myLev + 1), dim, maxD);
01123           keys.push_back(tmp2xz);
01124         }
01125       } 
01126 
01127       if( (myY + mySz) <= (1u << (maxD-1)) ) {
01128         //Add C5 of my +y neighbour (Face Hanging)
01129         TreeNode tmp5y((myX + (mySz>>1)), (myY + mySz), (myZ + (mySz>>1)), (myLev + 1), dim, maxD);
01130         keys.push_back(tmp5y);
01131 
01132         //Add C1 of my +y neighbour (Edge Hanging)
01133         TreeNode tmp1y((myX + (mySz>>1)), (myY + mySz), myZ, (myLev + 1), dim, maxD);
01134         keys.push_back(tmp1y);
01135 
01136         //Add C4 of my +y neighbour (Edge Hanging)
01137         TreeNode tmp4y(myX, (myY + mySz), (myZ + (mySz>>1)), (myLev + 1), dim, maxD);
01138         keys.push_back(tmp4y);
01139 
01140         if( (myZ + mySz) <= (1u << (maxD-1)) ) {
01141           //Add C1 of my +yz neighbour (Edge Hanging)
01142           TreeNode tmp1yz((myX + (mySz>>1)), (myY + mySz), (myZ + mySz), (myLev + 1), dim, maxD);
01143           keys.push_back(tmp1yz);
01144         }
01145       } 
01146 
01147       if( (myZ + mySz) <= (1u << (maxD-1)) ) {
01148         //Add C3 of my +z neighbour (Face Hanging)
01149         TreeNode tmp3z((myX + (mySz>>1)), (myY + (mySz>>1)), (myZ + mySz), (myLev + 1), dim, maxD);
01150         keys.push_back(tmp3z);
01151 
01152         //Add C1 of my +z neighbour (Edge Hanging)
01153         TreeNode tmp1z((myX + (mySz>>1)), myY, (myZ + mySz), (myLev + 1), dim, maxD);
01154         keys.push_back(tmp1z);
01155 
01156         //Add C2 of my +z neighbour (Edge Hanging)
01157         TreeNode tmp2z(myX, (myY + (mySz>>1)), (myZ + mySz), (myLev + 1), dim, maxD);
01158         keys.push_back(tmp2z);
01159       }
01160     }//end for i        
01161 
01162 #ifdef __MEASURE_FLAG_NODES__
01163     unsigned int forwardKeysCount = 0;
01164     for (int i = 0; i < in.size(); i++) {
01165       unsigned int myLev = inPtr[i].getLevel();
01166       if (myLev == 1) {
01167         continue;
01168       }
01169       unsigned int childNum = inPtr[i].getChildNumber();   
01170       unsigned int mySz = (1u << (maxD - myLev));
01171       unsigned int myX = inPtr[i].getX();
01172       unsigned int myY = inPtr[i].getY();
01173       unsigned int myZ = inPtr[i].getZ();    
01174 
01175       switch (childNum) {
01176         case 0:
01177           {
01178             break;
01179           }
01180         case 7:
01181           {
01182             break;
01183           }
01184         case 1:
01185           {
01186             //-y,-z,-yz
01187             if (myY) {
01188               forwardKeysCount++;
01189             }
01190             if (myZ) {
01191               forwardKeysCount++;
01192             }
01193             if (myY && myZ) {
01194               forwardKeysCount++;
01195             }
01196             break;
01197           }
01198         case 2:
01199           {
01200             //-x,-z,-xz
01201             if (myX) {
01202               forwardKeysCount++;
01203             }
01204             if (myZ) {
01205               forwardKeysCount++;
01206             }
01207             if (myX && myZ) {
01208               forwardKeysCount++;
01209             }
01210             break;
01211           }
01212         case 3:
01213           {
01214             //-z
01215             if (myZ) {
01216               forwardKeysCount++;
01217             }
01218             break;
01219           }
01220         case 4:
01221           {
01222             //-x,-y,-xy
01223             if (myX) {
01224               forwardKeysCount++;
01225             }
01226             if (myY) {
01227               forwardKeysCount++;
01228             }
01229             if (myX && myY) {
01230               forwardKeysCount++;
01231             }
01232             break;
01233           }
01234         case 5:
01235           {
01236             //-y
01237             if (myY) {
01238               forwardKeysCount++;
01239             }
01240             break;
01241           }
01242         case 6:
01243           {
01244             //-x
01245             if (myX) {
01246               forwardKeysCount++;
01247             }
01248             break;      
01249           }
01250         default: assert(false);
01251       }//end switch
01252     }//end for i        
01253 #endif
01254 
01255     PROF_FLN_STAGE1_END
01256       PROF_FLN_STAGE2_BEGIN
01257 
01258 #ifdef __MEASURE_FLAG_NODES__
01259       unsigned int keysSzBefore = keys.size();
01260 #endif
01261 
01262     //Make keys sorted and unique locally. There could still be duplicates
01263     //globally and keys need not be globally sorted
01264     seq::makeVectorUnique<ot::TreeNode>(keys, false);
01265 
01266 #ifdef __MEASURE_FLAG_NODES__
01267     unsigned int keysSzAfter = keys.size();
01268     unsigned int* allKeysSzBefore = new unsigned int[npes];
01269     unsigned int* allKeysSzAfter = new unsigned int[npes];
01270     unsigned int* allForwardKeysCount = new unsigned int[npes];
01271     par::Mpi_Gather<unsigned int>(&keysSzBefore, allKeysSzBefore, 1, 0, comm);
01272     par::Mpi_Gather<unsigned int>(&keysSzAfter, allKeysSzAfter, 1, 0, comm);
01273     par::Mpi_Gather<unsigned int>(&forwardKeysCount, allForwardKeysCount, 1, 0, comm);
01274     if(!rank) {
01275       for(int i = 0; i < npes; i++) {
01276         std::cout<<"rank = "<<i<<"keys Before = "<<allKeysSzBefore[i]
01277           <<" After = "<<allKeysSzAfter[i]
01278           <<" Forward = "<<allForwardKeysCount[i]
01279           <<std::endl; 
01280       }//end for i
01281     }
01282     delete [] allKeysSzBefore;
01283     delete [] allKeysSzAfter;
01284     delete [] allForwardKeysCount;
01285     MPI_Barrier(comm);
01286 #endif
01287 
01288     PROF_FLN_STAGE2_END
01289       PROF_FLN_STAGE3_BEGIN
01290 
01291       // allocate memory for the mins array
01292       ot::TreeNode* mins = new ot::TreeNode[npes];
01293 
01294     par::Mpi_Allgather<ot::TreeNode>(inPtr, mins, 1, comm);
01295     //Mins will be sorted and unique
01296 
01297     MPI_Request* requests = new MPI_Request[npes<<1];
01298     ot::TreeNode* keysPtr = NULL;
01299     if(!(keys.empty())) {
01300       keysPtr = (&(*keys.begin()));
01301     }
01302 
01303     int* sendOffsets = new int[npes];
01304     int* sendCounts = new int[npes];
01305     for(int i = 0; i < npes; i++) {
01306       sendOffsets[i] = 0;
01307       sendCounts[i] = 0;
01308     }
01309 
01310     for(int i = 0; i < (npes - 1); i++) {
01311       for(int j = sendOffsets[i]; j < keys.size(); j++) {
01312         if(keysPtr[j] >= mins[i+1]) {
01313           break;
01314         } else {
01315           sendCounts[i]++;
01316         }
01317       }
01318       sendOffsets[i + 1] = (sendOffsets[i] + sendCounts[i]);
01319     }//end for i
01320 
01321     sendCounts[npes - 1] = (keys.size() - sendOffsets[npes - 1]);
01322 
01323     delete [] mins;
01324 
01325     int* recvCounts = new int[npes];
01326     par::Mpi_Alltoall(sendCounts, recvCounts, 1, comm); 
01327 
01328     std::vector<ot::TreeNode>* recvNodes = new std::vector<ot::TreeNode>[npes];
01329     for(int i = 0; i < npes; i++) {
01330       recvNodes[i].resize(recvCounts[i]);
01331     }//end for i
01332 
01333     //Post Recvs
01334     for(int i = 0; i < rank; i++) {
01335       if(recvCounts[i]) {
01336         par::Mpi_Irecv<ot::TreeNode>( (&(*((recvNodes[i]).begin()))) , recvCounts[i],
01337             i, 1, comm, (requests + (i<<1)) );
01338       }
01339     }//end for i
01340 
01341     for(int i = (rank + 1); i < npes; i++) {
01342       if(recvCounts[i]) {
01343         par::Mpi_Irecv<ot::TreeNode>( (&(*((recvNodes[i]).begin()))) , recvCounts[i],
01344             i, 1, comm, (requests + (i<<1)) );
01345       }
01346     }//end for i
01347 
01348     //Post Sends
01349     for(int i = 0; i < rank; i++) {
01350       if(sendCounts[i]) {
01351         par::Mpi_Issend<ot::TreeNode>( (keysPtr + sendOffsets[i]), sendCounts[i], 
01352             i, 1, comm, (requests + ((i<<1) + 1)) );
01353       }
01354     }//end for i
01355 
01356     for(int i = (rank + 1); i < npes; i++) {
01357       if(sendCounts[i]) {
01358         par::Mpi_Issend<ot::TreeNode>( (keysPtr + sendOffsets[i]), sendCounts[i], 
01359             i, 1, comm, (requests + ((i<<1) + 1)) );
01360       }
01361     }//end for i
01362 
01363     PROF_FLN_STAGE3_END
01364       PROF_FLN_STAGE4_BEGIN
01365 
01366       bool* isHanging = new bool[in.size()];
01367     for(int i = 0; i < in.size(); i++) {
01368       isHanging[i] = false; 
01369     }//end for i
01370 
01371     for(int i = sendOffsets[rank]; i < (sendOffsets[rank] + sendCounts[rank]); i++) {
01372       unsigned int retIdx;
01373       bool found =  seq::BinarySearch(inPtr, in.size(), keysPtr[i], &retIdx);
01374       if(found) {
01375         //hanging
01376         isHanging[retIdx] = true;
01377       }
01378     }//end for i
01379 
01380     delete [] sendOffsets;
01381 
01382     PROF_FLN_STAGE4_END
01383       PROF_FLN_STAGE5_BEGIN
01384 
01385       //Wait for the recvs to complete
01386       for(int i = 0; i < rank; i++) {
01387         if(recvCounts[i]) {
01388           MPI_Status status;
01389           MPI_Wait( (requests + (i<<1)), &status);
01390         }
01391       }//end for i
01392 
01393     for(int i = (rank + 1); i < npes; i++) {
01394       if(recvCounts[i]) {
01395         MPI_Status status;
01396         MPI_Wait( (requests + (i<<1)), &status);
01397       }
01398     }//end for i
01399 
01400     PROF_FLN_STAGE5_END
01401       PROF_FLN_STAGE6_BEGIN
01402 
01403       for(int i = 0; i < rank; i++) {
01404         for(int j = 0; j < recvCounts[i]; j++) {
01405           unsigned int retIdx;
01406           bool found =  seq::BinarySearch(inPtr, in.size(), recvNodes[i][j], &retIdx);
01407           if(found) {
01408             //hanging
01409             isHanging[retIdx] = true;
01410           }
01411         }//end for j
01412       }//end for i
01413 
01414     for(int i = (rank + 1); i < npes; i++) {
01415       for(int j = 0; j < recvCounts[i]; j++) {
01416         unsigned int retIdx;
01417         bool found =  seq::BinarySearch(inPtr, in.size(), recvNodes[i][j], &retIdx);
01418         if(found) {
01419           //hanging
01420           isHanging[retIdx] = true;
01421         }
01422       }//end for j
01423     }//end for i
01424 
01425     for(int i = 0; i < in.size(); i++) {
01426       if(!isHanging[i]) {
01427         inPtr[i].orFlag(ot::TreeNode::NODE);
01428       } 
01429     }//end for i
01430 
01431     delete [] isHanging;
01432     delete [] recvNodes;
01433     delete [] recvCounts;
01434 
01435     PROF_FLN_STAGE6_END
01436       PROF_FLN_STAGE7_BEGIN
01437 
01438       //Wait for the sends to complete
01439       for(int i = 0; i < rank; i++) {
01440         if(sendCounts[i]) {
01441           MPI_Status status;
01442           MPI_Wait( (requests + ((i<<1) + 1)), &status);
01443         }
01444       }//end for i
01445 
01446     for(int i = (rank + 1); i < npes; i++) {
01447       if(sendCounts[i]) {
01448         MPI_Status status;
01449         MPI_Wait( (requests + ((i<<1) + 1)), &status);
01450       }
01451     }//end for i
01452 
01453     keys.clear();
01454     delete [] sendCounts;
01455     delete [] requests;
01456 
01457     PROF_FLN_STAGE7_END
01458 
01459       PROF_MARK_HANGING_END
01460 
01461   }//end function
01462 
01463   void flagNodesType3(std::vector<ot::TreeNode> & in, MPI_Comm comm) {
01464 
01465 #ifdef __PROF_WITH_BARRIER__
01466     MPI_Barrier(comm);
01467 #endif
01468 
01469     PROF_MARK_HANGING_BEGIN
01470 
01471       PROF_FLN_STAGE1_BEGIN
01472 
01473       int npes;
01474     int rank;
01475 
01476     MPI_Comm_rank(comm, &rank);
01477     MPI_Comm_size(comm, &npes);
01478 
01479     std::vector<ot::TreeNode > keys;
01480 
01481     assert(!in.empty());
01482     unsigned int maxD = in[0].getMaxDepth();
01483     unsigned int dim  = in[0].getDim();
01484     ot::TreeNode* inPtr = (&(*(in.begin())));
01485 
01486     //1. Generate Keys
01487     for (int i = 0; i < in.size(); i++) {
01488       //keysCount[i] = 0;
01489       unsigned int myLev = inPtr[i].getLevel();
01490       if (myLev == 1) {
01491         continue;
01492       }
01493       unsigned int currKeySz = static_cast<unsigned int>(keys.size());
01494       unsigned int childNum = inPtr[i].getChildNumber();   
01495       unsigned int mySz = (1u << (maxD - myLev));
01496       unsigned int myX = inPtr[i].getX();
01497       unsigned int myY = inPtr[i].getY();
01498       unsigned int myZ = inPtr[i].getZ();    
01499 
01500       switch (childNum) {
01501         case 0:
01502           {
01503             break;
01504           }
01505         case 7:
01506           {
01507             break;
01508           }
01509         case 1:
01510           {
01511             //-y,-z,-yz
01512             if (myY) {
01513               TreeNode tmp(myX,myY-mySz,myZ,myLev,dim,maxD);
01514               keys.push_back(tmp.getParent());
01515             }
01516             if (myZ) {
01517               TreeNode tmp(myX,myY,myZ-mySz,myLev,dim,maxD);
01518               keys.push_back(tmp.getParent());
01519             }
01520             if (myY && myZ) {
01521               TreeNode tmp(myX,myY-mySz,myZ-mySz,myLev,dim,maxD);         
01522               keys.push_back(tmp.getParent());
01523             }
01524             break;
01525           }
01526         case 2:
01527           {
01528             //-x,-z,-xz
01529             if (myX) {
01530               TreeNode tmp(myX-mySz,myY,myZ,myLev,dim,maxD);
01531               keys.push_back(tmp.getParent());
01532             }
01533             if (myZ) {
01534               TreeNode tmp(myX,myY,myZ-mySz,myLev,dim,maxD);
01535               keys.push_back(tmp.getParent());
01536             }
01537             if (myX && myZ) {
01538               TreeNode tmp(myX-mySz,myY,myZ-mySz,myLev,dim,maxD);
01539               keys.push_back(tmp.getParent());
01540             }
01541             break;
01542           }
01543         case 3:
01544           {
01545             //-z
01546             if (myZ) {
01547               TreeNode tmp(myX,myY,myZ-mySz,myLev,dim,maxD);
01548               keys.push_back(tmp.getParent());
01549             }
01550             break;
01551           }
01552         case 4:
01553           {
01554             //-x,-y,-xy
01555             if (myX) {
01556               TreeNode tmp(myX-mySz,myY,myZ,myLev,dim,maxD);
01557               keys.push_back(tmp.getParent());
01558             }
01559             if (myY) {
01560               TreeNode tmp(myX,myY-mySz,myZ,myLev,dim,maxD);
01561               keys.push_back(tmp.getParent());
01562             }
01563             if (myX && myY) {
01564               TreeNode tmp(myX-mySz,myY-mySz,myZ,myLev,dim,maxD);
01565               keys.push_back(tmp.getParent());
01566             }
01567             break;
01568           }
01569         case 5:
01570           {
01571             //-y
01572             if (myY) {
01573               TreeNode tmp(myX,myY-mySz,myZ,myLev,dim,maxD);
01574               keys.push_back(tmp.getParent());
01575             }
01576             break;
01577           }
01578         case 6:
01579           {
01580             //-x
01581             if (myX) {
01582               TreeNode tmp(myX-mySz,myY,myZ,myLev,dim,maxD);
01583               keys.push_back(tmp.getParent());    
01584             }
01585             break;      
01586           }
01587         default: assert(false);
01588       }//end switch
01589       for(int  j = currKeySz; j < keys.size(); j++) {
01590         keys[j].setWeight(i);
01591       }//end for j 
01592     }//end for i        
01593 
01594     PROF_FLN_STAGE1_END
01595 
01596 #ifdef __DEBUG_DA_PUBLIC__
01597       MPI_Barrier(comm);
01598     if(!rank) {
01599       std::cout<<"FLN Stage 1 passed."<<std::endl;
01600     }
01601     MPI_Barrier(comm);
01602 #endif
01603 
01604     PROF_FLN_STAGE2_BEGIN
01605 
01606       // allocate memory for the mins array
01607       std::vector<ot::TreeNode> mins(npes);
01608 
01609     par::Mpi_Allgather<ot::TreeNode>(inPtr, (&(*(mins.begin()))), 1, comm);
01610 
01611     unsigned int *part = NULL;
01612     if(!keys.empty()) {
01613       part = new unsigned int[keys.size()];    
01614     }
01615 
01616     for (unsigned int i = 0; i < keys.size(); i++) {
01617       unsigned int idx;
01618       //maxLB returns the last index in a sorted array
01619       //such that a[ind] <= key and  a[index +1] > key
01620       bool found = seq::maxLowerBound<TreeNode >(mins, keys[i], idx, NULL, NULL);
01621       if (!found ) {
01622         part[i] = rank;
01623       } else {
01624         part[i] = idx;
01625       }
01626     }//end for i
01627     mins.clear();
01628 
01629     PROF_FLN_STAGE2_END 
01630 
01631 #ifdef __DEBUG_DA_PUBLIC__
01632       MPI_Barrier(comm);
01633     if(!rank) {
01634       std::cout<<"FLN Stage 2 passed."<<std::endl;
01635     }
01636     MPI_Barrier(comm);
01637 #endif
01638 
01639     PROF_FLN_STAGE3_BEGIN
01640 
01641       int *numKeysSend = new int[npes];
01642     int *numKeysRecv = new int[npes];
01643     for (int i = 0; i < npes; i++) {
01644       numKeysSend[i] = 0;
01645     }
01646 
01647     // calculate the number of keys to send ...
01648     for (unsigned int i = 0; i < keys.size(); i++) {
01649       assert(part[i] < npes);
01650       numKeysSend[part[i]]++;
01651     }
01652 
01653     // Now do an All2All to get inumKeysRecv
01654     par::Mpi_Alltoall<int>(numKeysSend, numKeysRecv, 1, comm);
01655 
01656     unsigned int totalKeys=0; // total number of local keys ...
01657     for (int i = 0; i < npes; i++) {
01658       totalKeys += numKeysRecv[i];
01659     }
01660 
01661 #ifdef __MEASURE_FLAG_NODES__
01662     MPI_Barrier(comm);
01663     int numProcsSend = 0;
01664     int numProcsRecv = 0;
01665     for(int i = 0; i < npes; i++) {
01666       if(numKeysSend[i]) {
01667         numProcsSend++;
01668       }
01669       if(numKeysRecv[i]) {
01670         numProcsRecv++;
01671       }
01672     }//end for i
01673     int* allNumProcsSend = new int[npes];
01674     int* allNumProcsRecv = new int[npes];
01675     unsigned int* allKeysSz = new unsigned int[npes];
01676     unsigned int* allTotalRecv = new unsigned int[npes]; 
01677     unsigned int localKeysSize = keys.size(); 
01678     par::Mpi_Gather<int>(&numProcsSend, allNumProcsSend, 1, 0, comm);
01679     par::Mpi_Gather<int>(&numProcsRecv, allNumProcsRecv, 1, 0, comm);
01680     par::Mpi_Gather<unsigned int>(&localKeysSize, allKeysSz, 1, 0, comm);
01681     par::Mpi_Gather<unsigned int>(&totalKeys, allTotalRecv, 1, 0, comm);
01682     if(!rank) {
01683       for(int i = 0; i < npes; i++) {
01684         std::cout<<"In Flag Nodes:  allNumProcsSend["<<i<<"] = "<<allNumProcsSend[i]<<std::endl;
01685         std::cout<<"In Flag Nodes:  allNumProcsRecv["<<i<<"] = "<<allNumProcsRecv[i]<<std::endl;
01686         std::cout<<"In Flag Nodes:  allKeysSz["<<i<<"] = "<<allKeysSz[i]<<std::endl;
01687         std::cout<<"In Flag Nodes:  allTotalRecv["<<i<<"] = "<<allTotalRecv[i]<<std::endl;
01688       }//end for i
01689     }
01690 
01691     delete [] allNumProcsSend;
01692     delete [] allNumProcsRecv;
01693     delete [] allKeysSz;
01694     delete [] allTotalRecv;
01695     MPI_Barrier(comm);
01696 #endif
01697 
01698     // create the send and recv buffers ...
01699     ot::TreeNode* sendK = NULL;
01700     if(!(keys.empty())) {
01701       sendK = new ot::TreeNode[keys.size()];
01702     }
01703 
01704     ot::TreeNode* recvK = NULL;
01705     if(totalKeys) {
01706       recvK = new ot::TreeNode[totalKeys];
01707     }
01708 
01709     // the mapping ..
01710     unsigned int* comm_map = NULL;
01711     if(!keys.empty()) {
01712       comm_map = new unsigned int[keys.size()];
01713     }
01714 
01715     // Now create sendK
01716     int *sendOffsets = new int[npes]; sendOffsets[0] = 0;
01717     int *recvOffsets = new int[npes]; recvOffsets[0] = 0;
01718     int *numKeysTmp = new int[npes]; numKeysTmp[0] = 0; 
01719 
01720     // compute offsets ...
01721     for (int i = 1; i < npes; i++) {
01722       sendOffsets[i] = sendOffsets[i-1] + numKeysSend[i-1];
01723       recvOffsets[i] = recvOffsets[i-1] + numKeysRecv[i-1];
01724       numKeysTmp[i] = 0; 
01725     }
01726 
01727     for (unsigned int i = 0; i < keys.size(); i++) {
01728       unsigned int ni = numKeysTmp[part[i]];
01729       numKeysTmp[part[i]]++;
01730       // set entry ...
01731       assert((sendOffsets[part[i]] + ni) < keys.size());
01732       sendK[sendOffsets[part[i]] + ni] = keys[i];
01733       // save mapping .. will need it later ...
01734       comm_map[sendOffsets[part[i]] + ni] = keys[i].getWeight();
01735     }
01736     unsigned int keysSz = keys.size();
01737     keys.clear();
01738 
01739     if(part) {
01740       delete [] part;
01741       part = NULL;
01742     }
01743 
01744     delete [] numKeysTmp;
01745     numKeysTmp = NULL;
01746 
01747     MPI_Request* requests1 = new MPI_Request[npes<<1];
01748 
01749     //Post Recvs
01750     for(int i = 0; i < rank; i++) {
01751       if(numKeysRecv[i]) {
01752         par::Mpi_Irecv<ot::TreeNode>( (recvK + recvOffsets[i]) , numKeysRecv[i],
01753             i, 1, comm, (requests1 + (i<<1)) );
01754       }
01755     }//end for i
01756 
01757     for(int i = (rank + 1); i < npes; i++) {
01758       if(numKeysRecv[i]) {
01759         par::Mpi_Irecv<ot::TreeNode>( (recvK + recvOffsets[i]) , numKeysRecv[i],
01760             i, 1, comm, (requests1 + (i<<1)) );
01761       }
01762     }//end for i
01763 
01764     //Post Sends
01765     for(int i = 0; i < rank; i++) {
01766       if(numKeysSend[i]) {
01767         par::Mpi_Issend<ot::TreeNode>( (sendK + sendOffsets[i]), numKeysSend[i], 
01768             i, 1, comm, (requests1 + ((i<<1) + 1)) );
01769       }
01770     }//end for i
01771 
01772     for(int i = (rank + 1); i < npes; i++) {
01773       if(numKeysSend[i]) {
01774         par::Mpi_Issend<ot::TreeNode>( (sendK + sendOffsets[i]), numKeysSend[i], 
01775             i, 1, comm, (requests1 + ((i<<1) + 1)) );
01776       }
01777     }//end for i
01778 
01779     PROF_FLN_STAGE3_END
01780 
01781 #ifdef __DEBUG_DA_PUBLIC__
01782       MPI_Barrier(comm);
01783     if(!rank) {
01784       std::cout<<"FLN Stage 3 passed."<<std::endl;
01785     }
01786     MPI_Barrier(comm);
01787 #endif
01788 
01789     PROF_FLN_STAGE4_BEGIN
01790 
01791       bool* resSend = NULL;
01792     if(totalKeys) {
01793       resSend = new bool[totalKeys];
01794     }
01795 
01796     bool* resRecv = NULL;
01797     if(keysSz) {
01798       resRecv = new bool[keysSz];
01799     }
01800 
01801     for (unsigned int i = sendOffsets[rank];
01802         i < (sendOffsets[rank] + numKeysSend[rank]); i++) {
01803       unsigned int idx;
01804       resSend[recvOffsets[rank] + (i - sendOffsets[rank])] =
01805         seq::BinarySearch<ot::TreeNode>(inPtr, in.size(), sendK[i], &idx);   
01806     }//end for i
01807 
01808     PROF_FLN_STAGE4_END 
01809 
01810 #ifdef __DEBUG_DA_PUBLIC__
01811       MPI_Barrier(comm);
01812     if(!rank) {
01813       std::cout<<"FLN Stage 4 passed."<<std::endl;
01814     }
01815     MPI_Barrier(comm);
01816 #endif
01817 
01818     PROF_FLN_STAGE5_BEGIN
01819 
01820       //Wait for the recvs to complete
01821       for(int i = 0; i < rank; i++) {
01822         if(numKeysRecv[i]) {
01823           MPI_Status status;
01824           MPI_Wait( (requests1 + (i<<1)), &status);
01825         }
01826       }//end for i
01827 
01828     for(int i = (rank + 1); i < npes; i++) {
01829       if(numKeysRecv[i]) {
01830         MPI_Status status;
01831         MPI_Wait( (requests1 + (i<<1)), &status);
01832       }
01833     }//end for i
01834 
01835     PROF_FLN_STAGE5_END 
01836 
01837 #ifdef __DEBUG_DA_PUBLIC__
01838       MPI_Barrier(comm);
01839     if(!rank) {
01840       std::cout<<"FLN Stage 5 passed."<<std::endl;
01841     }
01842     MPI_Barrier(comm);
01843 #endif
01844 
01845     PROF_FLN_STAGE6_BEGIN
01846 
01847       for (unsigned int i = 0; i < recvOffsets[rank]; i++) {
01848         unsigned int idx;
01849         resSend[i] = seq::BinarySearch<ot::TreeNode>(inPtr, in.size(), recvK[i], &idx);   
01850       }//end for i
01851 
01852     for (unsigned int i = (recvOffsets[rank] + numKeysRecv[rank]); i < totalKeys; i++) {
01853       unsigned int idx;
01854       resSend[i] = seq::BinarySearch<ot::TreeNode>(inPtr, in.size(), recvK[i], &idx);   
01855     }//end for i
01856 
01857     PROF_FLN_STAGE6_END
01858 
01859 #ifdef __DEBUG_DA_PUBLIC__
01860       MPI_Barrier(comm);
01861     if(!rank) {
01862       std::cout<<"FLN Stage 6 passed."<<std::endl;
01863     }
01864     MPI_Barrier(comm);
01865 #endif
01866 
01867     PROF_FLN_STAGE7_BEGIN
01868 
01869       MPI_Request* requests2 = new MPI_Request[npes<<1];
01870 
01871     //Post Recvs
01872     for(int i = 0; i < rank; i++) {
01873       if(numKeysSend[i]) {
01874         par::Mpi_Irecv<bool>( (resRecv + sendOffsets[i]) , numKeysSend[i],
01875             i, 1, comm, (requests2 + (i<<1)) );
01876       }
01877     }//end for i
01878 
01879     for(int i = (rank + 1); i < npes; i++) {
01880       if(numKeysSend[i]) {
01881         par::Mpi_Irecv<bool>( (resRecv + sendOffsets[i]) , numKeysSend[i],
01882             i, 1, comm, (requests2 + (i<<1)) );
01883       }
01884     }//end for i
01885 
01886     //Post Sends
01887     for(int i = 0; i < rank; i++) {
01888       if(numKeysRecv[i]) {
01889         par::Mpi_Issend<bool>( (resSend + recvOffsets[i]), numKeysRecv[i], 
01890             i, 1, comm, (requests2 + ((i<<1) + 1)) );
01891       }
01892     }//end for i
01893 
01894     for(int i = (rank + 1); i < npes; i++) {
01895       if(numKeysRecv[i]) {
01896         par::Mpi_Issend<bool>( (resSend + recvOffsets[i]), numKeysRecv[i], 
01897             i, 1, comm, (requests2 + ((i<<1) + 1)) );
01898       }
01899     }//end for i
01900 
01901     PROF_FLN_STAGE7_END
01902 
01903 #ifdef __DEBUG_DA_PUBLIC__
01904       MPI_Barrier(comm);
01905     if(!rank) {
01906       std::cout<<"FLN Stage 7 passed."<<std::endl;
01907     }
01908     MPI_Barrier(comm);
01909 #endif
01910 
01911     PROF_FLN_STAGE8_BEGIN
01912       bool* isHanging = new bool[in.size()];
01913     for(unsigned int i = 0; i < in.size(); i++) {
01914       isHanging[i] = false;
01915     }//end for i
01916 
01917     for(int i = recvOffsets[rank]; i < (recvOffsets[rank] + numKeysRecv[rank]); i++) {
01918       if(resSend[i]) {
01919         isHanging[comm_map[sendOffsets[rank] + i - recvOffsets[rank]]] = true;
01920       }
01921     }//end for i
01922 
01923     PROF_FLN_STAGE8_END
01924 
01925 #ifdef __DEBUG_DA_PUBLIC__
01926       MPI_Barrier(comm);
01927     if(!rank) {
01928       std::cout<<"FLN Stage 8 passed."<<std::endl;
01929     }
01930     MPI_Barrier(comm);
01931 #endif
01932 
01933     PROF_FLN_STAGE9_BEGIN
01934 
01935       //wait for recvs
01936       for(int i = 0; i < rank; i++) {
01937         if(numKeysSend[i]) {
01938           MPI_Status status;
01939           MPI_Wait( (requests2 + (i<<1)), &status);
01940         }
01941       }//end for i
01942 
01943     for(int i = (rank + 1); i < npes; i++) {
01944       if(numKeysSend[i]) {
01945         MPI_Status status;
01946         MPI_Wait( (requests2 + (i<<1)), &status);
01947       }
01948     }//end for i
01949 
01950     PROF_FLN_STAGE9_END
01951 
01952 #ifdef __DEBUG_DA_PUBLIC__
01953       MPI_Barrier(comm);
01954     if(!rank) {
01955       std::cout<<"FLN Stage 9 passed."<<std::endl;
01956     }
01957     MPI_Barrier(comm);
01958 #endif
01959 
01960     PROF_FLN_STAGE10_BEGIN
01961 
01962       for(int i = 0; i < sendOffsets[rank]; i++) {
01963         if(resRecv[i]) {
01964           isHanging[comm_map[i]] = true;
01965         }
01966       }//end for i
01967 
01968     for(int i = (sendOffsets[rank] + numKeysSend[rank]); i < keysSz; i++) {
01969       if(resRecv[i]) {
01970         isHanging[comm_map[i]] = true;
01971       }
01972     }//end for i
01973 
01974     for(unsigned int i = 0; i < in.size(); i++) {
01975       if (!isHanging[i]) {
01976         in[i].orFlag(ot::TreeNode::NODE);
01977       }
01978     }//end for i
01979 
01980     PROF_FLN_STAGE10_END
01981 
01982 #ifdef __DEBUG_DA_PUBLIC__
01983       MPI_Barrier(comm);
01984     if(!rank) {
01985       std::cout<<"FLN Stage 10 passed."<<std::endl;
01986     }
01987     MPI_Barrier(comm);
01988 #endif
01989 
01990     PROF_FLN_STAGE11_BEGIN
01991 
01992       //Wait for the sends to complete
01993       for(int i = 0; i < rank; i++) {
01994         if(numKeysSend[i]) {
01995           MPI_Status status;
01996           MPI_Wait( (requests1 + ((i<<1) + 1)), &status);
01997         }
01998       }//end for i
01999 
02000     for(int i = (rank + 1); i < npes; i++) {
02001       if(numKeysSend[i]) {
02002         MPI_Status status;
02003         MPI_Wait( (requests1 + ((i<<1) + 1)), &status);
02004       }
02005     }//end for i
02006 
02007     for(int i = 0; i < rank; i++) {
02008       if(numKeysRecv[i]) {
02009         MPI_Status status;
02010         MPI_Wait( (requests2 + ((i<<1) + 1)), &status);
02011       }
02012     }//end for i
02013 
02014     for(int i = (rank + 1); i < npes; i++) {
02015       if(numKeysRecv[i]) {
02016         MPI_Status status;
02017         MPI_Wait( (requests2 + ((i<<1) + 1)), &status);
02018       }
02019     }//end for i
02020 
02021     // Clean up ...
02022     if(comm_map) {
02023       delete [] comm_map; 
02024     }
02025 
02026     if(resRecv) {
02027       delete [] resRecv;
02028     }
02029 
02030     if(resSend) {
02031       delete []resSend;
02032     }
02033 
02034     if(sendK) {
02035       delete [] sendK;
02036     }
02037 
02038     if(recvK) {
02039       delete [] recvK;
02040     }
02041 
02042     delete [] sendOffsets;
02043     delete [] recvOffsets;
02044     delete [] numKeysSend;
02045     delete [] numKeysRecv;
02046     delete [] requests1;
02047     delete [] requests2;
02048 
02049     PROF_FLN_STAGE11_END
02050 
02051 #ifdef __DEBUG_DA_PUBLIC__
02052       MPI_Barrier(comm);
02053     if(!rank) {
02054       std::cout<<"FLN Stage 11 passed."<<std::endl;
02055     }
02056     MPI_Barrier(comm);
02057 #endif
02058 
02059     PROF_MARK_HANGING_END
02060   }//end function
02061 
02062 }//end namespace
02063 
02064 
02065 

Generated on Tue Mar 24 16:14:05 2009 for DENDRO by  doxygen 1.3.9.1