00001
00011 #include "TreeNode.h"
00012 #include "parUtils.h"
00013 #include "seqUtils.h"
00014 #include <cstring>
00015
00016 #ifdef __DEBUG__
00017 #ifndef __DEBUG_OCT__
00018 #define __DEBUG_OCT__
00019 #endif
00020 #endif
00021
00022 #ifdef __DEBUG_OCT__
00023 #ifndef __MEASURE_FLAG_NODES__
00024 #define __MEASURE_FLAG_NODES__
00025 #endif
00026 #endif
00027
00028 namespace ot {
00029
00030
00031 int mergeOctrees(std::vector<TreeNode>& inOct1, std::vector<TreeNode>& inOct2,
00032 std::vector<TreeNode>& outOct, MPI_Comm comm) {
00033 PROF_MERGE_OCTREES_BEGIN
00034
00035 int rank;
00036 int npes;
00037 MPI_Comm_rank(comm, &rank);
00038 MPI_Comm_size(comm, &npes);
00039
00040 assert(!(inOct1.empty()));
00041
00042
00043 ot::TreeNode* mins = new ot::TreeNode[npes];
00044 par::Mpi_Allgather<ot::TreeNode>(&(*(inOct1.begin())), mins, 1, comm);
00045
00046
00047 int* sendCnts = new int[npes];
00048 for(int i = 0; i < npes; i++) {
00049 sendCnts[i] = 0;
00050 }
00051
00052 if(!(inOct2.empty())) {
00053
00054
00055
00056
00057
00058
00059
00060 int minsCnt = 0;
00061 if(inOct2[0] < mins[0]) {
00062 sendCnts[0]++;
00063 } else {
00064 while( (minsCnt < npes) && (mins[minsCnt] <= inOct2[0]) ) {
00065 minsCnt++;
00066 }
00067 sendCnts[minsCnt - 1]++;
00068 }
00069 for(int i = 1; i < inOct2.size(); i++) {
00070 while( (minsCnt < npes) && (mins[minsCnt] <= inOct2[i]) ) {
00071 minsCnt++;
00072 }
00073 sendCnts[minsCnt - 1]++;
00074 }
00075 }
00076
00077 int * recvCnts = new int[npes];
00078 par::Mpi_Alltoall<int>(sendCnts, recvCnts, 1, comm);
00079
00080 int * sendDisps = new int[npes];
00081 int * recvDisps = new int[npes];
00082 sendDisps[0] = 0;
00083 recvDisps[0] = 0;
00084 for(int i = 1; i < npes; i++) {
00085 sendDisps[i] = sendDisps[i - 1] + sendCnts[i - 1];
00086 recvDisps[i] = recvDisps[i - 1] + recvCnts[i - 1];
00087 }
00088
00089 outOct.resize(recvDisps[npes - 1] + recvCnts[npes - 1]);
00090
00091 par::Mpi_Alltoallv_sparse<ot::TreeNode>(&(*(inOct2.begin())), sendCnts, sendDisps,
00092 &(*(outOct.begin())), recvCnts, recvDisps, comm);
00093
00094 delete [] mins;
00095 delete [] sendCnts;
00096 delete [] sendDisps;
00097 delete [] recvCnts;
00098 delete [] recvDisps;
00099
00100
00101 std::vector<ot::TreeNode> tmpOct;
00102 int cnt1 = 0;
00103 int cnt2 = 0;
00104 while( (cnt1 < inOct1.size()) && (cnt2 < outOct.size()) ) {
00105 if(inOct1[cnt1] < outOct[cnt2]) {
00106 tmpOct.push_back(inOct1[cnt1]);
00107 cnt1++;
00108 } else {
00109 tmpOct.push_back(outOct[cnt2]);
00110 cnt2++;
00111 }
00112 }
00113
00114 while(cnt1 < inOct1.size()) {
00115 tmpOct.push_back(inOct1[cnt1]);
00116 cnt1++;
00117 }
00118
00119 while(cnt2 < outOct.size()) {
00120 tmpOct.push_back(outOct[cnt2]);
00121 cnt2++;
00122 }
00123
00124 outOct = tmpOct;
00125 tmpOct.clear();
00126
00127
00128 lineariseList(outOct, comm);
00129
00130 PROF_MERGE_OCTREES_END
00131 }
00132
00133 int refineOctree(const std::vector<ot::TreeNode> & inp,
00134 std::vector<ot::TreeNode> &out) {
00135 out.clear();
00136 for(unsigned int i = 0; i < inp.size(); i++) {
00137 if(inp[i].getLevel() < inp[i].getMaxDepth()) {
00138 inp[i].addChildren(out);
00139 } else {
00140 out.push_back(inp[i]);
00141 }
00142 }
00143 return 1;
00144 }
00145
00146 int refineAndPartitionOctree(const std::vector<ot::TreeNode> & inp,
00147 std::vector<ot::TreeNode> &out, MPI_Comm comm) {
00148 refineOctree(inp,out);
00149 par::partitionW<ot::TreeNode>(out, NULL,comm);
00150 return 1;
00151 }
00152
00153 int createRegularOctree(std::vector<ot::TreeNode>& out, unsigned int lev,
00154 unsigned int dim, unsigned int maxDepth, MPI_Comm comm) {
00155 TreeNode root(dim,maxDepth);
00156 out.clear();
00157 int rank;
00158 MPI_Comm_rank(comm,&rank);
00159 if(!rank) {
00160 out.push_back(root);
00161 }
00162 for(int i = 0; i < lev; i++) {
00163 std::vector<ot::TreeNode> tmp;
00164 refineAndPartitionOctree(out,tmp,comm);
00165 out = tmp;
00166 tmp.clear();
00167 }
00168 return 1;
00169 }
00170
00171
00172 int lineariseList(std::vector<ot::TreeNode> & list, bool skipLast) {
00173 std::vector<ot::TreeNode> tmp;
00174 if(!(list.empty())) {
00175 for(unsigned int i = 0; i < (list.size()-1); i++) {
00176 #ifdef __DEBUG_OCT__
00177 assert(areComparable(list[i], list[i+1]));
00178 #endif
00179 if( (!(list[i].isAncestor(list[i+1]))) && (list[i] != list[i+1]) ) {
00180 tmp.push_back(list[i]);
00181 }
00182 }
00183 if(!skipLast) {
00184 tmp.push_back(list[list.size()-1]);
00185 }
00186 }
00187
00188 list = tmp;
00189 tmp.clear();
00190 return 1;
00191 }
00192
00193
00194 int lineariseList(std::vector<ot::TreeNode> & list, MPI_Comm comm) {
00195 int rank,size;
00196 MPI_Comm_rank(comm,&rank);
00197 MPI_Comm_size(comm,&size);
00198
00199 if(size == 1) {
00200 lineariseList(list, false);
00201 return 1;
00202 }
00203
00204
00205 int new_rank, new_size;
00206 MPI_Comm new_comm;
00207 par::splitComm2way(list.empty(), &new_comm,comm);
00208
00209 MPI_Comm_rank (new_comm, &new_rank);
00210 MPI_Comm_size (new_comm, &new_size);
00211 if(!list.empty()) {
00212
00213 ot::TreeNode lastOctant = list[list.size()-1];
00214 ot::TreeNode lastOnPrev;
00215
00216 MPI_Request recvRequest;
00217 MPI_Request sendRequest;
00218
00219 if(new_rank) {
00220 par::Mpi_Irecv<ot::TreeNode>( &lastOnPrev, 1, new_rank-1, 1, new_comm, &recvRequest);
00221 }
00222 if(new_rank < (new_size-1)) {
00223 par::Mpi_Issend<ot::TreeNode>( &lastOctant, 1, new_rank+1, 1, new_comm, &sendRequest);
00224 }
00225
00226 if(new_rank) {
00227 std::vector<ot::TreeNode> tmp(list.size()+1);
00228 for(int i = 0; i < list.size(); i++) {
00229 tmp[i+1] = list[i];
00230 }
00231
00232 MPI_Status statusWait;
00233 MPI_Wait(&recvRequest, &statusWait);
00234 tmp[0] = lastOnPrev;
00235
00236 list = tmp;
00237 tmp.clear();
00238 }
00239
00240 if(new_rank == (new_size-1)) {
00241 lineariseList(list, false);
00242 }else {
00243 lineariseList(list, true);
00244 }
00245
00246 if(new_rank < (new_size-1)) {
00247 MPI_Status statusWait;
00248 MPI_Wait(&sendRequest, &statusWait);
00249 }
00250 }
00251
00252 return 1;
00253 }
00254
00255 bool lessThanUsingWts ( TreeNode const & a, TreeNode const & b) {
00256 if(a == b){
00257 return (a.getWeight() < b.getWeight());
00258 }else {
00259 return (a < b);
00260 }
00261 }
00262
00263 unsigned int getNodeWeight(const TreeNode * t) {
00264 return t->getWeight();
00265 }
00266
00267 bool bPartComparator(TreeNode a, TreeNode b) {
00268 if ( (a.getWeight()) != (b.getWeight()) ) {
00269
00270 return( (a.getWeight()) > (b.getWeight()) );
00271 } else if (a.getWeight() > 1) {
00272
00273 return( (a.getLevel()) > (b.getLevel()) );
00274 } else {
00275
00276 return( (a.getLevel()) < (b.getLevel()) );
00277 }
00278 }
00279
00280
00281 TreeNode getNCA(TreeNode first, TreeNode second) {
00282 #ifdef __DEBUG_OCT__
00283 assert(areComparable(first,second));
00284 assert(first != second);
00285 #endif
00286 unsigned int fx = first.getX();
00287 unsigned int sx = second.getX();
00288 unsigned int fy = first.getY();
00289 unsigned int sy = second.getY();
00290 unsigned int fz = first.getZ();
00291 unsigned int sz = second.getZ();
00292 unsigned int maxDepth = first.getMaxDepth();
00293 unsigned int dim = first.getDim();
00294 unsigned int maxDiff = (unsigned int)(std::max((std::max((fx^sx),(fy^sy))),(fz^sz)));
00295 unsigned int maxDiffBinLen = binOp::binLength(maxDiff);
00296
00297 unsigned int ncaX = ((fx>>maxDiffBinLen)<<maxDiffBinLen);
00298 unsigned int ncaY = ((fy>>maxDiffBinLen)<<maxDiffBinLen);
00299 unsigned int ncaZ = ((fz>>maxDiffBinLen)<<maxDiffBinLen);
00300 unsigned int ncaLev = (maxDepth - maxDiffBinLen);
00301 TreeNode nca(ncaX,ncaY,ncaZ,ncaLev,dim,maxDepth);
00302 return nca;
00303 }
00304
00305 int readPtsFromFile(char* filename, std::vector<double>& pts) {
00306 FILE* infile;
00307 infile = fopen(filename,"rb");
00308 unsigned int temp;
00309 fread(&temp,sizeof(unsigned int),1,infile);
00310
00311 double* ptsTemp = NULL;
00312
00313 if(temp) {
00314 ptsTemp = new double[3*temp];
00315 fread(ptsTemp, sizeof(double),3*temp,infile);
00316 }
00317
00318 fclose(infile);
00319 pts.resize(3*temp);
00320
00321 for (int i=0; i < (3*temp); i++) {
00322 pts[i] = ptsTemp[i];
00323 }
00324
00325 if(ptsTemp) {
00326 delete [] ptsTemp;
00327 ptsTemp = NULL;
00328 }
00329
00330 return 1;
00331 }
00332
00333 int readDataPtsFromFile(char* filename, std::vector<double>& pts,
00334 std::vector<double>& ptVals) {
00335
00336
00337
00338
00339
00340 FILE* infile;
00341 unsigned int temp;
00342
00343 assert(sizeof(double)==8);
00344 assert(sizeof(unsigned int)==4);
00345
00346 infile = fopen(filename,"rb");
00347 fread(&temp,sizeof(unsigned int),1,infile);
00348
00349 pts.resize(3*temp);
00350 ptVals.resize(temp);
00351
00352 fread(&(pts[0]), sizeof(double),3*temp,infile);
00353 fread(&(ptVals[0]), sizeof(double),temp,infile);
00354
00355 fclose(infile);
00356
00357 return 1;
00358 }
00359
00360 int writePtsToFile(char* filename, std::vector<double>& pts) {
00361 FILE* outfile = fopen(filename,"wb");
00362 unsigned int ptsLen = pts.size();
00363 double * ptsTemp = NULL;
00364 if(!pts.empty()) {
00365 ptsTemp = (&(*(pts.begin())));
00366 }
00367
00368 if (ptsLen >0) {
00369 unsigned int numPts = ptsLen/3;
00370 fwrite(&numPts,sizeof(unsigned int),1,outfile);
00371 fwrite(ptsTemp, sizeof(double),ptsLen,outfile);
00372 }
00373 fclose(outfile);
00374 return 1;
00375 }
00376
00377 int writeDataPtsToFile(char* filename, std::vector<double>& pts,
00378 std::vector<double>& data) {
00379 FILE* outfile = fopen(filename,"wb");
00380 unsigned int ptsLen = pts.size();
00381 unsigned int numPts=ptsLen/3;
00382
00383 assert(sizeof(double)==8);
00384 assert(sizeof(unsigned int)==4);
00385
00386 fwrite(&numPts,sizeof(unsigned int),1,outfile);
00387 fwrite(&(pts[0]), sizeof(double),ptsLen,outfile);
00388 fwrite(&(data[0]), sizeof(double),numPts,outfile);
00389 fclose(outfile);
00390 return 1;
00391 }
00392
00393 int readNodesFromFile (char* filename, std::vector<TreeNode > & nodes) {
00394 FILE* infile = fopen(filename,"r");
00395 unsigned int numNode;
00396 unsigned int dim, maxDepth;
00397 fscanf(infile,"%u",&dim);
00398 fscanf(infile,"%u",&maxDepth);
00399 fscanf(infile,"%u",&numNode);
00400 nodes.resize(numNode) ;
00401
00402 for (unsigned int i =0; i< nodes.size(); i++) {
00403 unsigned int x,y,z,d;
00404 fscanf(infile,"%u",&x);
00405 fscanf(infile,"%u",&y);
00406 fscanf(infile,"%u",&z);
00407 fscanf(infile,"%u",&d);
00408 nodes[i] = ot::TreeNode (x,y,z,d, dim, maxDepth);
00409 }
00410 fclose(infile);
00411 return 1;
00412 }
00413
00414 int writeNodesToFile (char* filename, const std::vector<TreeNode> & nodes) {
00415 FILE* outfile = fopen(filename,"w");
00416 if (!nodes.empty()) {
00417 unsigned int dim = nodes[0].getDim();
00418 unsigned int maxDepth = nodes[0].getMaxDepth();
00419 fprintf(outfile,"%u %u\n",dim,maxDepth);
00420 fprintf(outfile,"%u\n",static_cast<unsigned int>(nodes.size()));
00421 for (unsigned int i =0; i< nodes.size(); i++) {
00422 assert(nodes[i].getDim() == dim);
00423 assert(nodes[i].getMaxDepth() == maxDepth);
00424 fprintf(outfile,"%u %u %u %u\n",nodes[i].getX(),nodes[i].getY(),nodes[i].getZ(),nodes[i].getLevel());
00425 }
00426 }
00427 fclose(outfile);
00428 return 1;
00429 }
00430
00431 bool areComparable (TreeNode first, TreeNode second) {
00432 return( ( (first.getDim()) == (second.getDim()) ) && ( (first.getMaxDepth()) == (second.getMaxDepth()) ) );
00433 }
00434
00435 int int2str(int n,char*s) {
00436 int tmpd[20];
00437 int i=0;
00438 int j;
00439 if (n==0) {
00440 strcpy(s,"0\0");
00441 } else {
00442 while (n>0) {
00443 tmpd[i]= (n%10);
00444 n= (int)(n/10);
00445 i++;
00446 }
00447 for (j=i-1;j>=0;j--) {
00448 s[i-j-1]=int2char(tmpd[j]);
00449 }
00450 s[i]='\0';
00451 }
00452 return 1;
00453 }
00454
00455 char int2char(int d) {
00456 switch (d) {
00457 case 0: return '0';
00458 case 1: return '1';
00459 case 2: return '2';
00460 case 3: return '3';
00461 case 4: return '4';
00462 case 5: return '5';
00463 case 6: return '6';
00464 case 7: return '7';
00465 case 8: return '8';
00466 case 9: return '9';
00467 default: return '\0';
00468 }
00469 }
00470
00471
00472
00473 void addBoundaryNodesType2(std::vector<ot::TreeNode> &in,
00474 std::vector<ot::TreeNode>& bdy,
00475 unsigned int dim, unsigned int maxDepth) {
00476
00477 PROF_ADD_BDY_BEGIN
00478 assert(bdy.empty());
00479
00480 for (unsigned int i = 0; i < in.size(); i++) {
00481
00482 unsigned int d = in[i].getLevel();
00483 unsigned int x = in[i].getX();
00484 unsigned int y = in[i].getY();
00485 unsigned int z = in[i].getZ();
00486
00487 unsigned char bdyFlags;
00488
00489 if ( in[i].isBoundaryOctant(ot::TreeNode::POSITIVE, &bdyFlags) ) {
00490
00491
00492
00493
00494
00495
00496 if ( bdyFlags & ot::TreeNode::X_POS_BDY ) {
00497 bdy.push_back(ot::TreeNode( (1u << maxDepth), y, z, (d+1), dim, maxDepth+1));
00498 }
00499
00500
00501 if ( bdyFlags & ot::TreeNode::Y_POS_BDY ) {
00502 bdy.push_back(ot::TreeNode(x, (1u << maxDepth), z,
00503 (d+1), dim, maxDepth+1));
00504 }
00505
00506
00507 if ( bdyFlags & ot::TreeNode::Z_POS_BDY ) {
00508 bdy.push_back(ot::TreeNode(x, y, (1u << maxDepth),
00509 (d+1), dim, maxDepth+1));
00510 }
00511
00512
00513 if ( (bdyFlags & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY)) ==
00514 (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY) ) {
00515 bdy.push_back(ot::TreeNode((1u << maxDepth),(1u << maxDepth), z,
00516 (d+1), dim, maxDepth+1));
00517 }
00518
00519
00520 if ( (bdyFlags & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Z_POS_BDY)) ==
00521 (ot::TreeNode::X_POS_BDY + ot::TreeNode::Z_POS_BDY) ) {
00522 bdy.push_back(ot::TreeNode((1u << maxDepth), y, (1u << maxDepth),
00523 (d+1), dim, maxDepth+1));
00524 }
00525
00526
00527 if ( (bdyFlags & (ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY)) ==
00528 (ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY) ) {
00529 bdy.push_back(ot::TreeNode(x, (1u << maxDepth), (1u << maxDepth),
00530 (d+1), dim, maxDepth+1));
00531 }
00532
00533
00534
00535 if ( (bdyFlags & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY +
00536 ot::TreeNode::Z_POS_BDY)) == (ot::TreeNode::X_POS_BDY +
00537 ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY) ) {
00538 bdy.push_back(ot::TreeNode((1u << maxDepth), (1u << maxDepth), (1u << maxDepth),
00539 (d+1), dim, maxDepth+1));
00540 }
00541 }
00542
00543
00544 in[i] = ot::TreeNode(x, y, z, d+1, dim, maxDepth+1);
00545
00546 }
00547
00548
00549
00550
00551 PROF_ADD_BDY_END
00552 }
00553
00554 void markBoundaryNodesAtAllLevels(std::vector<ot::TreeNode>& finestOctree, unsigned int nlevels,
00555 std::vector<ot::TreeNode>* coarserOctrees, unsigned int maxDepth) {
00556
00557 for(int lev = 0; lev < (nlevels - 1); lev++) {
00558 for(unsigned int i = 0; i < coarserOctrees[lev].size(); i++) {
00559 unsigned int myX = coarserOctrees[lev][i].getX();
00560 unsigned int myY = coarserOctrees[lev][i].getY();
00561 unsigned int myZ = coarserOctrees[lev][i].getZ();
00562 if( (myX == (1u << (maxDepth - 1))) ||
00563 (myY == (1u << (maxDepth - 1))) ||
00564 (myZ == (1u << (maxDepth - 1))) ) {
00565 coarserOctrees[lev][i].orFlag(ot::TreeNode::BOUNDARY);
00566 }
00567 }
00568 }
00569
00570
00571 for(unsigned int i = 0; i < finestOctree.size(); i++) {
00572 unsigned int myX = finestOctree[i].getX();
00573 unsigned int myY = finestOctree[i].getY();
00574 unsigned int myZ = finestOctree[i].getZ();
00575 if( (myX == (1u << (maxDepth - 1))) ||
00576 (myY == (1u << (maxDepth - 1))) ||
00577 (myZ == (1u << (maxDepth - 1))) ) {
00578 finestOctree[i].orFlag(ot::TreeNode::BOUNDARY);
00579 }
00580 }
00581
00582 }
00583
00584 void discardExtraBoundaryOctants(std::vector<ot::TreeNode>& in,
00585 unsigned int dim, unsigned int maxDepth) {
00586
00587 std::vector<ot::TreeNode> tmpOctree;
00588 for(int i = 0; i < in.size(); i++) {
00589 unsigned int myX = in[i].getX();
00590 unsigned int myY = in[i].getY();
00591 unsigned int myZ = in[i].getZ();
00592 if( (myX <= (1u << (maxDepth - 1))) &&
00593 (myY <= (1u << (maxDepth - 1))) &&
00594 (myZ <= (1u << (maxDepth - 1))) ) {
00595 tmpOctree.push_back(in[i]);
00596 }
00597 }
00598
00599 if(tmpOctree.size() < in.size()) {
00600 in = tmpOctree;
00601 }
00602 }
00603
00604
00605
00606
00607 void markHangingNodesAtAllLevels(std::vector<ot::TreeNode>& finestOctree, unsigned int nlevels,
00608 std::vector<ot::TreeNode>* coarserOctrees, MPI_Comm* activeComms,
00609 unsigned int dim, unsigned int maxDepth) {
00610
00611 for(int lev = 0; lev < (nlevels - 1); lev++) {
00612 if( !(coarserOctrees[lev].empty()) ) {
00613 ot::flagNodesType3(coarserOctrees[lev], activeComms[lev + 1]);
00614 }
00615 }
00616
00617 if( !(finestOctree.empty()) ) {
00618 ot::flagNodesType3(finestOctree, activeComms[0]);
00619 }
00620
00621 }
00622
00623
00624
00625
00626 void addBoundaryNodesType1(std::vector<ot::TreeNode> &in,
00627 std::vector<ot::TreeNode>& bdy,
00628 unsigned int dim, unsigned int maxDepth) {
00629 PROF_ADD_BDY_BEGIN
00630
00631 assert(bdy.empty());
00632
00633 for (unsigned int i = 0; i < in.size(); i++) {
00634
00635 unsigned int d = in[i].getLevel();
00636 unsigned int x = in[i].getX();
00637 unsigned int y = in[i].getY();
00638 unsigned int z = in[i].getZ();
00639
00640 unsigned char bdyFlags;
00641
00642 if ( in[i].isBoundaryOctant(ot::TreeNode::POSITIVE, &bdyFlags) ) {
00643
00644
00645
00646
00647
00648
00649 if ( bdyFlags & ot::TreeNode::X_POS_BDY ) {
00650 bdy.push_back(ot::TreeNode( (1u << maxDepth), y, z, (d+1) |
00651 ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00652 }
00653
00654
00655 if ( bdyFlags & ot::TreeNode::Y_POS_BDY ) {
00656 bdy.push_back(ot::TreeNode(x, (1u << maxDepth), z,
00657 (d+1) | ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00658 }
00659
00660
00661 if ( bdyFlags & ot::TreeNode::Z_POS_BDY ) {
00662 bdy.push_back(ot::TreeNode(x, y, (1u << maxDepth),
00663 (d+1) | ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00664 }
00665
00666
00667 if ( (bdyFlags & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY)) ==
00668 (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY) ) {
00669 bdy.push_back(ot::TreeNode((1u << maxDepth),(1u << maxDepth), z,
00670 (d+1) | ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00671 }
00672
00673
00674 if ( (bdyFlags & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Z_POS_BDY)) ==
00675 (ot::TreeNode::X_POS_BDY + ot::TreeNode::Z_POS_BDY) ) {
00676 bdy.push_back(ot::TreeNode((1u << maxDepth), y, (1u << maxDepth),
00677 (d+1) | ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00678 }
00679
00680
00681 if ( (bdyFlags & (ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY)) ==
00682 (ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY) ) {
00683 bdy.push_back(ot::TreeNode(x, (1u << maxDepth),(1u << maxDepth),
00684 (d+1) | ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00685 }
00686
00687
00688
00689 if ( (bdyFlags & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY +
00690 ot::TreeNode::Z_POS_BDY)) == (ot::TreeNode::X_POS_BDY +
00691 ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY) ) {
00692 bdy.push_back(ot::TreeNode((1u << maxDepth), (1u << maxDepth), (1u << maxDepth),
00693 (d+1) | ot::TreeNode::BOUNDARY, dim, maxDepth+1));
00694 }
00695 }
00696
00697
00698 in[i] = ot::TreeNode(x, y, z, d+1, dim, maxDepth+1);
00699
00700 }
00701
00702
00703
00704
00705 PROF_ADD_BDY_END
00706 }
00707
00708 void flagNodesType1(std::vector<ot::TreeNode> & in, MPI_Comm comm) {
00709
00710 #ifdef __PROF_WITH_BARRIER__
00711 MPI_Barrier(comm);
00712 #endif
00713
00714 PROF_MARK_HANGING_BEGIN
00715
00716 PROF_FLN_STAGE1_BEGIN
00717
00718 int npes;
00719 int rank;
00720
00721 MPI_Comm_rank(comm, &rank);
00722 MPI_Comm_size(comm, &npes);
00723
00724 std::vector<unsigned int > keysCount(in.size());
00725 std::vector<ot::TreeNode > keys;
00726
00727 assert(!in.empty());
00728 unsigned int maxD = in[0].getMaxDepth();
00729 unsigned int dim = in[0].getDim();
00730 ot::TreeNode* inPtr = (&(*(in.begin())));
00731
00732
00733 for (int i = 0; i < in.size(); i++) {
00734 keysCount[i] = 0;
00735 unsigned int myLev = inPtr[i].getLevel();
00736 if (myLev == 1) {
00737 continue;
00738 }
00739 unsigned int childNum = inPtr[i].getChildNumber();
00740 unsigned int mySz = (1u << (maxD - myLev));
00741 unsigned int myX = inPtr[i].getX();
00742 unsigned int myY = inPtr[i].getY();
00743 unsigned int myZ = inPtr[i].getZ();
00744
00745 switch (childNum) {
00746 case 0:
00747 {
00748 break;
00749 }
00750 case 7:
00751 {
00752 break;
00753 }
00754 case 1:
00755 {
00756
00757 if (myY) {
00758 TreeNode tmp(myX,myY-mySz,myZ,myLev,dim,maxD);
00759 keys.push_back(tmp.getParent());
00760 keysCount[i]++;
00761 }
00762 if (myZ) {
00763 TreeNode tmp(myX,myY,myZ-mySz,myLev,dim,maxD);
00764 keys.push_back(tmp.getParent());
00765 keysCount[i]++;
00766 }
00767 if (myY && myZ) {
00768 TreeNode tmp(myX,myY-mySz,myZ-mySz,myLev,dim,maxD);
00769 keys.push_back(tmp.getParent());
00770 keysCount[i]++;
00771 }
00772 break;
00773 }
00774 case 2:
00775 {
00776
00777 if (myX) {
00778 TreeNode tmp(myX-mySz,myY,myZ,myLev,dim,maxD);
00779 keys.push_back(tmp.getParent());
00780 keysCount[i]++;
00781 }
00782 if (myZ) {
00783 TreeNode tmp(myX,myY,myZ-mySz,myLev,dim,maxD);
00784 keys.push_back(tmp.getParent());
00785 keysCount[i]++;
00786 }
00787 if (myX && myZ) {
00788 TreeNode tmp(myX-mySz,myY,myZ-mySz,myLev,dim,maxD);
00789 keys.push_back(tmp.getParent());
00790 keysCount[i]++;
00791 }
00792 break;
00793 }
00794 case 3:
00795 {
00796
00797 if (myZ) {
00798 TreeNode tmp(myX,myY,myZ-mySz,myLev,dim,maxD);
00799 keys.push_back(tmp.getParent());
00800 keysCount[i]++;
00801 }
00802 break;
00803 }
00804 case 4:
00805 {
00806
00807 if (myX) {
00808 TreeNode tmp(myX-mySz,myY,myZ,myLev,dim,maxD);
00809 keys.push_back(tmp.getParent());
00810 keysCount[i]++;
00811 }
00812 if (myY) {
00813 TreeNode tmp(myX,myY-mySz,myZ,myLev,dim,maxD);
00814 keys.push_back(tmp.getParent());
00815 keysCount[i]++;
00816 }
00817 if (myX && myY) {
00818 TreeNode tmp(myX-mySz,myY-mySz,myZ,myLev,dim,maxD);
00819 keys.push_back(tmp.getParent());
00820 keysCount[i]++;
00821 }
00822 break;
00823 }
00824 case 5:
00825 {
00826
00827 if (myY) {
00828 TreeNode tmp(myX,myY-mySz,myZ,myLev,dim,maxD);
00829 keys.push_back(tmp.getParent());
00830 keysCount[i]++;
00831 }
00832 break;
00833 }
00834 case 6:
00835 {
00836
00837 if (myX) {
00838 TreeNode tmp(myX-mySz,myY,myZ,myLev,dim,maxD);
00839 keys.push_back(tmp.getParent());
00840 keysCount[i]++;
00841 }
00842 break;
00843 }
00844 default: assert(false);
00845 }
00846 }
00847
00848 PROF_FLN_STAGE1_END
00849 PROF_FLN_STAGE2_BEGIN
00850
00851
00852 std::vector<ot::TreeNode> mins(npes);
00853
00854 par::Mpi_Allgather<ot::TreeNode>(inPtr, &(*(mins.begin())), 1, comm);
00855
00856 unsigned int *part = NULL;
00857 if(!keys.empty()) {
00858 part = new unsigned int[keys.size()];
00859 }
00860
00861 for (unsigned int i = 0; i < keys.size(); i++) {
00862 unsigned int idx;
00863
00864
00865 bool found = seq::maxLowerBound<TreeNode >(mins, keys[i], idx, NULL, NULL);
00866 if (!found ) {
00867 part[i] = rank;
00868 } else {
00869 part[i] = idx;
00870 }
00871 }
00872 mins.clear();
00873
00874 PROF_FLN_STAGE2_END
00875 PROF_FLN_STAGE3_BEGIN
00876
00877 int *numKeysSend = new int[npes];
00878 int *numKeysRecv = new int[npes];
00879 for (int i = 0; i < npes; i++) {
00880 numKeysSend[i] = 0;
00881 }
00882
00883
00884 for (unsigned int i = 0; i < keys.size(); i++) {
00885 assert(part[i] < npes);
00886 numKeysSend[part[i]]++;
00887 }
00888
00889
00890 par::Mpi_Alltoall<int>(numKeysSend, numKeysRecv, 1, comm);
00891
00892 unsigned int totalKeys=0;
00893 for (int i = 0; i < npes; i++) {
00894 totalKeys += numKeysRecv[i];
00895 }
00896
00897 #ifdef __MEASURE_FLAG_NODES__
00898 MPI_Barrier(comm);
00899 int numProcsSend = 0;
00900 int numProcsRecv = 0;
00901 for(int i = 0; i < npes; i++) {
00902 if(numKeysSend[i]) {
00903 numProcsSend++;
00904 }
00905 if(numKeysRecv[i]) {
00906 numProcsRecv++;
00907 }
00908 }
00909 int* allNumProcsSend = new int[npes];
00910 int* allNumProcsRecv = new int[npes];
00911 unsigned int* allKeysSz = new unsigned int[npes];
00912 unsigned int* allTotalRecv = new unsigned int[npes];
00913 unsigned int localKeysSize = keys.size();
00914 par::Mpi_Gather<int>(&numProcsSend, allNumProcsSend, 1, 0, comm);
00915 par::Mpi_Gather<int>(&numProcsRecv, allNumProcsRecv, 1, 0, comm);
00916 par::Mpi_Gather<unsigned int>(&localKeysSize, allKeysSz, 1, 0, comm);
00917 par::Mpi_Gather<unsigned int>(&totalKeys, allTotalRecv, 1, 0, comm);
00918 if(!rank) {
00919 for(int i = 0; i < npes; i++) {
00920 std::cout<<"In Flag Nodes: allNumProcsSend["<<i<<"] = "<<allNumProcsSend[i]<<std::endl;
00921 std::cout<<"In Flag Nodes: allNumProcsRecv["<<i<<"] = "<<allNumProcsRecv[i]<<std::endl;
00922 std::cout<<"In Flag Nodes: allKeysSz["<<i<<"] = "<<allKeysSz[i]<<std::endl;
00923 std::cout<<"In Flag Nodes: allTotalRecv["<<i<<"] = "<<allTotalRecv[i]<<std::endl;
00924 }
00925 }
00926
00927 delete [] allNumProcsSend;
00928 delete [] allNumProcsRecv;
00929 delete [] allKeysSz;
00930 delete [] allTotalRecv;
00931 MPI_Barrier(comm);
00932 #endif
00933
00934
00935 std::vector<ot::TreeNode> sendK (keys.size());
00936 std::vector<ot::TreeNode> recvK (totalKeys);
00937
00938 unsigned int * comm_map = NULL;
00939 if(!keys.empty()) {
00940 comm_map = new unsigned int [keys.size()];
00941 }
00942
00943
00944 int *sendOffsets = new int[npes]; sendOffsets[0] = 0;
00945 int *recvOffsets = new int[npes]; recvOffsets[0] = 0;
00946 int *numKeysTmp = new int[npes]; numKeysTmp[0] = 0;
00947
00948
00949 for (int i = 1; i < npes; i++) {
00950 sendOffsets[i] = sendOffsets[i-1] + numKeysSend[i-1];
00951 recvOffsets[i] = recvOffsets[i-1] + numKeysRecv[i-1];
00952 numKeysTmp[i] = 0;
00953 }
00954
00955 for (unsigned int i = 0; i < keys.size(); i++) {
00956 unsigned int ni = numKeysTmp[part[i]];
00957 numKeysTmp[part[i]]++;
00958
00959 assert((sendOffsets[part[i]] + ni) < keys.size());
00960 sendK[sendOffsets[part[i]] + ni] = keys[i];
00961
00962 comm_map[i] = sendOffsets[part[i]] + ni;
00963 }
00964 unsigned int keysSz = keys.size();
00965 keys.clear();
00966
00967 if(part) {
00968 delete [] part;
00969 part = NULL;
00970 }
00971
00972 delete [] numKeysTmp;
00973 numKeysTmp = NULL;
00974
00975 ot::TreeNode* sendKptr = NULL;
00976 ot::TreeNode* recvKptr = NULL;
00977 if(!sendK.empty()) {
00978 sendKptr = &(*(sendK.begin()));
00979 }
00980 if(!recvK.empty()) {
00981 recvKptr = &(*(recvK.begin()));
00982 }
00983
00984 par::Mpi_Alltoallv_sparse<ot::TreeNode>(sendKptr, numKeysSend, sendOffsets,
00985 recvKptr, numKeysRecv, recvOffsets, comm);
00986
00987 sendK.clear();
00988
00989 PROF_FLN_STAGE3_END
00990 PROF_FLN_STAGE4_BEGIN
00991
00992 std::vector<char> resSend (totalKeys);
00993 std::vector<char> resRecv (keysSz);
00994
00995 for (unsigned int i = 0; i < totalKeys; i++) {
00996 unsigned int idx;
00997 resSend[i] = static_cast<char>(
00998 seq::BinarySearch<ot::TreeNode>(inPtr,
00999 in.size(), recvK[i], &idx)) ;
01000 }
01001 recvK.clear();
01002
01003 PROF_FLN_STAGE4_END
01004 PROF_FLN_STAGE5_BEGIN
01005
01006 char* resSendPtr = NULL;
01007 char* resRecvPtr = NULL;
01008 if(!resSend.empty()) {
01009 resSendPtr = &(*(resSend.begin()));
01010 }
01011 if(!resRecv.empty()) {
01012 resRecvPtr = &(*(resRecv.begin()));
01013 }
01014
01015 par::Mpi_Alltoallv_sparse<char>( resSendPtr, numKeysRecv, recvOffsets,
01016 resRecvPtr, numKeysSend, sendOffsets, comm);
01017
01018 resSend.clear();
01019
01020 delete [] sendOffsets;
01021 sendOffsets = NULL;
01022
01023 delete [] recvOffsets;
01024 recvOffsets = NULL;
01025
01026 delete [] numKeysSend;
01027 numKeysSend = NULL;
01028
01029 delete [] numKeysRecv;
01030 numKeysRecv = NULL;
01031
01032 unsigned int st = 0;
01033 for (unsigned int i = 0; i < in.size(); i++) {
01034 bool isHanging = false;
01035 for (unsigned int k = 0; k < keysCount[i]; k++) {
01036 assert(comm_map[st + k] < resRecv.size());
01037 if (resRecv[comm_map[st + k]]) {
01038 isHanging = true;
01039 break;
01040 }
01041 }
01042 if (!isHanging) {
01043 in[i].orFlag(ot::TreeNode::NODE);
01044 }
01045 st += keysCount[i];
01046 }
01047 assert(st == keysSz);
01048
01049 if(comm_map) {
01050 delete [] comm_map;
01051 comm_map = NULL;
01052 }
01053
01054 resRecv.clear();
01055
01056 PROF_FLN_STAGE5_END
01057
01058 PROF_MARK_HANGING_END
01059 }
01060
01061
01062 void flagNodesType2(std::vector<ot::TreeNode> & in, MPI_Comm comm) {
01063
01064 #ifdef __PROF_WITH_BARRIER__
01065 MPI_Barrier(comm);
01066 #endif
01067
01068 PROF_MARK_HANGING_BEGIN
01069
01070 PROF_FLN_STAGE1_BEGIN
01071
01072 int npes;
01073 int rank;
01074
01075 MPI_Comm_rank(comm, &rank);
01076 MPI_Comm_size(comm, &npes);
01077
01078 std::vector<ot::TreeNode > keys;
01079
01080 assert(!in.empty());
01081 unsigned int maxD = in[0].getMaxDepth();
01082 unsigned int dim = in[0].getDim();
01083
01084 ot::TreeNode* inPtr = (&(*in.begin()));
01085
01086
01087
01088
01089
01090
01091 for (int i = 0; i < in.size(); i++) {
01092 unsigned int myLev = inPtr[i].getLevel();
01093 if (myLev == maxD) {
01094 continue;
01095 }
01096 unsigned int mySz = (1u << (maxD - myLev));
01097 unsigned int myX = inPtr[i].getX();
01098 unsigned int myY = inPtr[i].getY();
01099 unsigned int myZ = inPtr[i].getZ();
01100
01101 if( (myX + mySz) <= (1u << (maxD-1)) ) {
01102
01103 TreeNode tmp6x((myX + mySz), (myY + (mySz>>1)), (myZ + (mySz>>1)), (myLev + 1), dim, maxD);
01104 keys.push_back(tmp6x);
01105
01106
01107 TreeNode tmp4x((myX + mySz), myY, (myZ + (mySz>>1)), (myLev + 1), dim, maxD);
01108 keys.push_back(tmp4x);
01109
01110
01111 TreeNode tmp2x((myX + mySz), (myY + (mySz>>1)), myZ, (myLev + 1), dim, maxD);
01112 keys.push_back(tmp2x);
01113
01114 if( (myY + mySz) <= (1u << (maxD-1)) ) {
01115
01116 TreeNode tmp4xy((myX + mySz), (myY + mySz), (myZ + (mySz>>1)), (myLev + 1), dim, maxD);
01117 keys.push_back(tmp4xy);
01118 }
01119
01120 if( (myZ + mySz) <= (1u << (maxD-1)) ) {
01121
01122 TreeNode tmp2xz((myX + mySz), (myY + (mySz>>1)), (myZ + mySz), (myLev + 1), dim, maxD);
01123 keys.push_back(tmp2xz);
01124 }
01125 }
01126
01127 if( (myY + mySz) <= (1u << (maxD-1)) ) {
01128
01129 TreeNode tmp5y((myX + (mySz>>1)), (myY + mySz), (myZ + (mySz>>1)), (myLev + 1), dim, maxD);
01130 keys.push_back(tmp5y);
01131
01132
01133 TreeNode tmp1y((myX + (mySz>>1)), (myY + mySz), myZ, (myLev + 1), dim, maxD);
01134 keys.push_back(tmp1y);
01135
01136
01137 TreeNode tmp4y(myX, (myY + mySz), (myZ + (mySz>>1)), (myLev + 1), dim, maxD);
01138 keys.push_back(tmp4y);
01139
01140 if( (myZ + mySz) <= (1u << (maxD-1)) ) {
01141
01142 TreeNode tmp1yz((myX + (mySz>>1)), (myY + mySz), (myZ + mySz), (myLev + 1), dim, maxD);
01143 keys.push_back(tmp1yz);
01144 }
01145 }
01146
01147 if( (myZ + mySz) <= (1u << (maxD-1)) ) {
01148
01149 TreeNode tmp3z((myX + (mySz>>1)), (myY + (mySz>>1)), (myZ + mySz), (myLev + 1), dim, maxD);
01150 keys.push_back(tmp3z);
01151
01152
01153 TreeNode tmp1z((myX + (mySz>>1)), myY, (myZ + mySz), (myLev + 1), dim, maxD);
01154 keys.push_back(tmp1z);
01155
01156
01157 TreeNode tmp2z(myX, (myY + (mySz>>1)), (myZ + mySz), (myLev + 1), dim, maxD);
01158 keys.push_back(tmp2z);
01159 }
01160 }
01161
01162 #ifdef __MEASURE_FLAG_NODES__
01163 unsigned int forwardKeysCount = 0;
01164 for (int i = 0; i < in.size(); i++) {
01165 unsigned int myLev = inPtr[i].getLevel();
01166 if (myLev == 1) {
01167 continue;
01168 }
01169 unsigned int childNum = inPtr[i].getChildNumber();
01170 unsigned int mySz = (1u << (maxD - myLev));
01171 unsigned int myX = inPtr[i].getX();
01172 unsigned int myY = inPtr[i].getY();
01173 unsigned int myZ = inPtr[i].getZ();
01174
01175 switch (childNum) {
01176 case 0:
01177 {
01178 break;
01179 }
01180 case 7:
01181 {
01182 break;
01183 }
01184 case 1:
01185 {
01186
01187 if (myY) {
01188 forwardKeysCount++;
01189 }
01190 if (myZ) {
01191 forwardKeysCount++;
01192 }
01193 if (myY && myZ) {
01194 forwardKeysCount++;
01195 }
01196 break;
01197 }
01198 case 2:
01199 {
01200
01201 if (myX) {
01202 forwardKeysCount++;
01203 }
01204 if (myZ) {
01205 forwardKeysCount++;
01206 }
01207 if (myX && myZ) {
01208 forwardKeysCount++;
01209 }
01210 break;
01211 }
01212 case 3:
01213 {
01214
01215 if (myZ) {
01216 forwardKeysCount++;
01217 }
01218 break;
01219 }
01220 case 4:
01221 {
01222
01223 if (myX) {
01224 forwardKeysCount++;
01225 }
01226 if (myY) {
01227 forwardKeysCount++;
01228 }
01229 if (myX && myY) {
01230 forwardKeysCount++;
01231 }
01232 break;
01233 }
01234 case 5:
01235 {
01236
01237 if (myY) {
01238 forwardKeysCount++;
01239 }
01240 break;
01241 }
01242 case 6:
01243 {
01244
01245 if (myX) {
01246 forwardKeysCount++;
01247 }
01248 break;
01249 }
01250 default: assert(false);
01251 }
01252 }
01253 #endif
01254
01255 PROF_FLN_STAGE1_END
01256 PROF_FLN_STAGE2_BEGIN
01257
01258 #ifdef __MEASURE_FLAG_NODES__
01259 unsigned int keysSzBefore = keys.size();
01260 #endif
01261
01262
01263
01264 seq::makeVectorUnique<ot::TreeNode>(keys, false);
01265
01266 #ifdef __MEASURE_FLAG_NODES__
01267 unsigned int keysSzAfter = keys.size();
01268 unsigned int* allKeysSzBefore = new unsigned int[npes];
01269 unsigned int* allKeysSzAfter = new unsigned int[npes];
01270 unsigned int* allForwardKeysCount = new unsigned int[npes];
01271 par::Mpi_Gather<unsigned int>(&keysSzBefore, allKeysSzBefore, 1, 0, comm);
01272 par::Mpi_Gather<unsigned int>(&keysSzAfter, allKeysSzAfter, 1, 0, comm);
01273 par::Mpi_Gather<unsigned int>(&forwardKeysCount, allForwardKeysCount, 1, 0, comm);
01274 if(!rank) {
01275 for(int i = 0; i < npes; i++) {
01276 std::cout<<"rank = "<<i<<"keys Before = "<<allKeysSzBefore[i]
01277 <<" After = "<<allKeysSzAfter[i]
01278 <<" Forward = "<<allForwardKeysCount[i]
01279 <<std::endl;
01280 }
01281 }
01282 delete [] allKeysSzBefore;
01283 delete [] allKeysSzAfter;
01284 delete [] allForwardKeysCount;
01285 MPI_Barrier(comm);
01286 #endif
01287
01288 PROF_FLN_STAGE2_END
01289 PROF_FLN_STAGE3_BEGIN
01290
01291
01292 ot::TreeNode* mins = new ot::TreeNode[npes];
01293
01294 par::Mpi_Allgather<ot::TreeNode>(inPtr, mins, 1, comm);
01295
01296
01297 MPI_Request* requests = new MPI_Request[npes<<1];
01298 ot::TreeNode* keysPtr = NULL;
01299 if(!(keys.empty())) {
01300 keysPtr = (&(*keys.begin()));
01301 }
01302
01303 int* sendOffsets = new int[npes];
01304 int* sendCounts = new int[npes];
01305 for(int i = 0; i < npes; i++) {
01306 sendOffsets[i] = 0;
01307 sendCounts[i] = 0;
01308 }
01309
01310 for(int i = 0; i < (npes - 1); i++) {
01311 for(int j = sendOffsets[i]; j < keys.size(); j++) {
01312 if(keysPtr[j] >= mins[i+1]) {
01313 break;
01314 } else {
01315 sendCounts[i]++;
01316 }
01317 }
01318 sendOffsets[i + 1] = (sendOffsets[i] + sendCounts[i]);
01319 }
01320
01321 sendCounts[npes - 1] = (keys.size() - sendOffsets[npes - 1]);
01322
01323 delete [] mins;
01324
01325 int* recvCounts = new int[npes];
01326 par::Mpi_Alltoall(sendCounts, recvCounts, 1, comm);
01327
01328 std::vector<ot::TreeNode>* recvNodes = new std::vector<ot::TreeNode>[npes];
01329 for(int i = 0; i < npes; i++) {
01330 recvNodes[i].resize(recvCounts[i]);
01331 }
01332
01333
01334 for(int i = 0; i < rank; i++) {
01335 if(recvCounts[i]) {
01336 par::Mpi_Irecv<ot::TreeNode>( (&(*((recvNodes[i]).begin()))) , recvCounts[i],
01337 i, 1, comm, (requests + (i<<1)) );
01338 }
01339 }
01340
01341 for(int i = (rank + 1); i < npes; i++) {
01342 if(recvCounts[i]) {
01343 par::Mpi_Irecv<ot::TreeNode>( (&(*((recvNodes[i]).begin()))) , recvCounts[i],
01344 i, 1, comm, (requests + (i<<1)) );
01345 }
01346 }
01347
01348
01349 for(int i = 0; i < rank; i++) {
01350 if(sendCounts[i]) {
01351 par::Mpi_Issend<ot::TreeNode>( (keysPtr + sendOffsets[i]), sendCounts[i],
01352 i, 1, comm, (requests + ((i<<1) + 1)) );
01353 }
01354 }
01355
01356 for(int i = (rank + 1); i < npes; i++) {
01357 if(sendCounts[i]) {
01358 par::Mpi_Issend<ot::TreeNode>( (keysPtr + sendOffsets[i]), sendCounts[i],
01359 i, 1, comm, (requests + ((i<<1) + 1)) );
01360 }
01361 }
01362
01363 PROF_FLN_STAGE3_END
01364 PROF_FLN_STAGE4_BEGIN
01365
01366 bool* isHanging = new bool[in.size()];
01367 for(int i = 0; i < in.size(); i++) {
01368 isHanging[i] = false;
01369 }
01370
01371 for(int i = sendOffsets[rank]; i < (sendOffsets[rank] + sendCounts[rank]); i++) {
01372 unsigned int retIdx;
01373 bool found = seq::BinarySearch(inPtr, in.size(), keysPtr[i], &retIdx);
01374 if(found) {
01375
01376 isHanging[retIdx] = true;
01377 }
01378 }
01379
01380 delete [] sendOffsets;
01381
01382 PROF_FLN_STAGE4_END
01383 PROF_FLN_STAGE5_BEGIN
01384
01385
01386 for(int i = 0; i < rank; i++) {
01387 if(recvCounts[i]) {
01388 MPI_Status status;
01389 MPI_Wait( (requests + (i<<1)), &status);
01390 }
01391 }
01392
01393 for(int i = (rank + 1); i < npes; i++) {
01394 if(recvCounts[i]) {
01395 MPI_Status status;
01396 MPI_Wait( (requests + (i<<1)), &status);
01397 }
01398 }
01399
01400 PROF_FLN_STAGE5_END
01401 PROF_FLN_STAGE6_BEGIN
01402
01403 for(int i = 0; i < rank; i++) {
01404 for(int j = 0; j < recvCounts[i]; j++) {
01405 unsigned int retIdx;
01406 bool found = seq::BinarySearch(inPtr, in.size(), recvNodes[i][j], &retIdx);
01407 if(found) {
01408
01409 isHanging[retIdx] = true;
01410 }
01411 }
01412 }
01413
01414 for(int i = (rank + 1); i < npes; i++) {
01415 for(int j = 0; j < recvCounts[i]; j++) {
01416 unsigned int retIdx;
01417 bool found = seq::BinarySearch(inPtr, in.size(), recvNodes[i][j], &retIdx);
01418 if(found) {
01419
01420 isHanging[retIdx] = true;
01421 }
01422 }
01423 }
01424
01425 for(int i = 0; i < in.size(); i++) {
01426 if(!isHanging[i]) {
01427 inPtr[i].orFlag(ot::TreeNode::NODE);
01428 }
01429 }
01430
01431 delete [] isHanging;
01432 delete [] recvNodes;
01433 delete [] recvCounts;
01434
01435 PROF_FLN_STAGE6_END
01436 PROF_FLN_STAGE7_BEGIN
01437
01438
01439 for(int i = 0; i < rank; i++) {
01440 if(sendCounts[i]) {
01441 MPI_Status status;
01442 MPI_Wait( (requests + ((i<<1) + 1)), &status);
01443 }
01444 }
01445
01446 for(int i = (rank + 1); i < npes; i++) {
01447 if(sendCounts[i]) {
01448 MPI_Status status;
01449 MPI_Wait( (requests + ((i<<1) + 1)), &status);
01450 }
01451 }
01452
01453 keys.clear();
01454 delete [] sendCounts;
01455 delete [] requests;
01456
01457 PROF_FLN_STAGE7_END
01458
01459 PROF_MARK_HANGING_END
01460
01461 }
01462
01463 void flagNodesType3(std::vector<ot::TreeNode> & in, MPI_Comm comm) {
01464
01465 #ifdef __PROF_WITH_BARRIER__
01466 MPI_Barrier(comm);
01467 #endif
01468
01469 PROF_MARK_HANGING_BEGIN
01470
01471 PROF_FLN_STAGE1_BEGIN
01472
01473 int npes;
01474 int rank;
01475
01476 MPI_Comm_rank(comm, &rank);
01477 MPI_Comm_size(comm, &npes);
01478
01479 std::vector<ot::TreeNode > keys;
01480
01481 assert(!in.empty());
01482 unsigned int maxD = in[0].getMaxDepth();
01483 unsigned int dim = in[0].getDim();
01484 ot::TreeNode* inPtr = (&(*(in.begin())));
01485
01486
01487 for (int i = 0; i < in.size(); i++) {
01488
01489 unsigned int myLev = inPtr[i].getLevel();
01490 if (myLev == 1) {
01491 continue;
01492 }
01493 unsigned int currKeySz = static_cast<unsigned int>(keys.size());
01494 unsigned int childNum = inPtr[i].getChildNumber();
01495 unsigned int mySz = (1u << (maxD - myLev));
01496 unsigned int myX = inPtr[i].getX();
01497 unsigned int myY = inPtr[i].getY();
01498 unsigned int myZ = inPtr[i].getZ();
01499
01500 switch (childNum) {
01501 case 0:
01502 {
01503 break;
01504 }
01505 case 7:
01506 {
01507 break;
01508 }
01509 case 1:
01510 {
01511
01512 if (myY) {
01513 TreeNode tmp(myX,myY-mySz,myZ,myLev,dim,maxD);
01514 keys.push_back(tmp.getParent());
01515 }
01516 if (myZ) {
01517 TreeNode tmp(myX,myY,myZ-mySz,myLev,dim,maxD);
01518 keys.push_back(tmp.getParent());
01519 }
01520 if (myY && myZ) {
01521 TreeNode tmp(myX,myY-mySz,myZ-mySz,myLev,dim,maxD);
01522 keys.push_back(tmp.getParent());
01523 }
01524 break;
01525 }
01526 case 2:
01527 {
01528
01529 if (myX) {
01530 TreeNode tmp(myX-mySz,myY,myZ,myLev,dim,maxD);
01531 keys.push_back(tmp.getParent());
01532 }
01533 if (myZ) {
01534 TreeNode tmp(myX,myY,myZ-mySz,myLev,dim,maxD);
01535 keys.push_back(tmp.getParent());
01536 }
01537 if (myX && myZ) {
01538 TreeNode tmp(myX-mySz,myY,myZ-mySz,myLev,dim,maxD);
01539 keys.push_back(tmp.getParent());
01540 }
01541 break;
01542 }
01543 case 3:
01544 {
01545
01546 if (myZ) {
01547 TreeNode tmp(myX,myY,myZ-mySz,myLev,dim,maxD);
01548 keys.push_back(tmp.getParent());
01549 }
01550 break;
01551 }
01552 case 4:
01553 {
01554
01555 if (myX) {
01556 TreeNode tmp(myX-mySz,myY,myZ,myLev,dim,maxD);
01557 keys.push_back(tmp.getParent());
01558 }
01559 if (myY) {
01560 TreeNode tmp(myX,myY-mySz,myZ,myLev,dim,maxD);
01561 keys.push_back(tmp.getParent());
01562 }
01563 if (myX && myY) {
01564 TreeNode tmp(myX-mySz,myY-mySz,myZ,myLev,dim,maxD);
01565 keys.push_back(tmp.getParent());
01566 }
01567 break;
01568 }
01569 case 5:
01570 {
01571
01572 if (myY) {
01573 TreeNode tmp(myX,myY-mySz,myZ,myLev,dim,maxD);
01574 keys.push_back(tmp.getParent());
01575 }
01576 break;
01577 }
01578 case 6:
01579 {
01580
01581 if (myX) {
01582 TreeNode tmp(myX-mySz,myY,myZ,myLev,dim,maxD);
01583 keys.push_back(tmp.getParent());
01584 }
01585 break;
01586 }
01587 default: assert(false);
01588 }
01589 for(int j = currKeySz; j < keys.size(); j++) {
01590 keys[j].setWeight(i);
01591 }
01592 }
01593
01594 PROF_FLN_STAGE1_END
01595
01596 #ifdef __DEBUG_DA_PUBLIC__
01597 MPI_Barrier(comm);
01598 if(!rank) {
01599 std::cout<<"FLN Stage 1 passed."<<std::endl;
01600 }
01601 MPI_Barrier(comm);
01602 #endif
01603
01604 PROF_FLN_STAGE2_BEGIN
01605
01606
01607 std::vector<ot::TreeNode> mins(npes);
01608
01609 par::Mpi_Allgather<ot::TreeNode>(inPtr, (&(*(mins.begin()))), 1, comm);
01610
01611 unsigned int *part = NULL;
01612 if(!keys.empty()) {
01613 part = new unsigned int[keys.size()];
01614 }
01615
01616 for (unsigned int i = 0; i < keys.size(); i++) {
01617 unsigned int idx;
01618
01619
01620 bool found = seq::maxLowerBound<TreeNode >(mins, keys[i], idx, NULL, NULL);
01621 if (!found ) {
01622 part[i] = rank;
01623 } else {
01624 part[i] = idx;
01625 }
01626 }
01627 mins.clear();
01628
01629 PROF_FLN_STAGE2_END
01630
01631 #ifdef __DEBUG_DA_PUBLIC__
01632 MPI_Barrier(comm);
01633 if(!rank) {
01634 std::cout<<"FLN Stage 2 passed."<<std::endl;
01635 }
01636 MPI_Barrier(comm);
01637 #endif
01638
01639 PROF_FLN_STAGE3_BEGIN
01640
01641 int *numKeysSend = new int[npes];
01642 int *numKeysRecv = new int[npes];
01643 for (int i = 0; i < npes; i++) {
01644 numKeysSend[i] = 0;
01645 }
01646
01647
01648 for (unsigned int i = 0; i < keys.size(); i++) {
01649 assert(part[i] < npes);
01650 numKeysSend[part[i]]++;
01651 }
01652
01653
01654 par::Mpi_Alltoall<int>(numKeysSend, numKeysRecv, 1, comm);
01655
01656 unsigned int totalKeys=0;
01657 for (int i = 0; i < npes; i++) {
01658 totalKeys += numKeysRecv[i];
01659 }
01660
01661 #ifdef __MEASURE_FLAG_NODES__
01662 MPI_Barrier(comm);
01663 int numProcsSend = 0;
01664 int numProcsRecv = 0;
01665 for(int i = 0; i < npes; i++) {
01666 if(numKeysSend[i]) {
01667 numProcsSend++;
01668 }
01669 if(numKeysRecv[i]) {
01670 numProcsRecv++;
01671 }
01672 }
01673 int* allNumProcsSend = new int[npes];
01674 int* allNumProcsRecv = new int[npes];
01675 unsigned int* allKeysSz = new unsigned int[npes];
01676 unsigned int* allTotalRecv = new unsigned int[npes];
01677 unsigned int localKeysSize = keys.size();
01678 par::Mpi_Gather<int>(&numProcsSend, allNumProcsSend, 1, 0, comm);
01679 par::Mpi_Gather<int>(&numProcsRecv, allNumProcsRecv, 1, 0, comm);
01680 par::Mpi_Gather<unsigned int>(&localKeysSize, allKeysSz, 1, 0, comm);
01681 par::Mpi_Gather<unsigned int>(&totalKeys, allTotalRecv, 1, 0, comm);
01682 if(!rank) {
01683 for(int i = 0; i < npes; i++) {
01684 std::cout<<"In Flag Nodes: allNumProcsSend["<<i<<"] = "<<allNumProcsSend[i]<<std::endl;
01685 std::cout<<"In Flag Nodes: allNumProcsRecv["<<i<<"] = "<<allNumProcsRecv[i]<<std::endl;
01686 std::cout<<"In Flag Nodes: allKeysSz["<<i<<"] = "<<allKeysSz[i]<<std::endl;
01687 std::cout<<"In Flag Nodes: allTotalRecv["<<i<<"] = "<<allTotalRecv[i]<<std::endl;
01688 }
01689 }
01690
01691 delete [] allNumProcsSend;
01692 delete [] allNumProcsRecv;
01693 delete [] allKeysSz;
01694 delete [] allTotalRecv;
01695 MPI_Barrier(comm);
01696 #endif
01697
01698
01699 ot::TreeNode* sendK = NULL;
01700 if(!(keys.empty())) {
01701 sendK = new ot::TreeNode[keys.size()];
01702 }
01703
01704 ot::TreeNode* recvK = NULL;
01705 if(totalKeys) {
01706 recvK = new ot::TreeNode[totalKeys];
01707 }
01708
01709
01710 unsigned int* comm_map = NULL;
01711 if(!keys.empty()) {
01712 comm_map = new unsigned int[keys.size()];
01713 }
01714
01715
01716 int *sendOffsets = new int[npes]; sendOffsets[0] = 0;
01717 int *recvOffsets = new int[npes]; recvOffsets[0] = 0;
01718 int *numKeysTmp = new int[npes]; numKeysTmp[0] = 0;
01719
01720
01721 for (int i = 1; i < npes; i++) {
01722 sendOffsets[i] = sendOffsets[i-1] + numKeysSend[i-1];
01723 recvOffsets[i] = recvOffsets[i-1] + numKeysRecv[i-1];
01724 numKeysTmp[i] = 0;
01725 }
01726
01727 for (unsigned int i = 0; i < keys.size(); i++) {
01728 unsigned int ni = numKeysTmp[part[i]];
01729 numKeysTmp[part[i]]++;
01730
01731 assert((sendOffsets[part[i]] + ni) < keys.size());
01732 sendK[sendOffsets[part[i]] + ni] = keys[i];
01733
01734 comm_map[sendOffsets[part[i]] + ni] = keys[i].getWeight();
01735 }
01736 unsigned int keysSz = keys.size();
01737 keys.clear();
01738
01739 if(part) {
01740 delete [] part;
01741 part = NULL;
01742 }
01743
01744 delete [] numKeysTmp;
01745 numKeysTmp = NULL;
01746
01747 MPI_Request* requests1 = new MPI_Request[npes<<1];
01748
01749
01750 for(int i = 0; i < rank; i++) {
01751 if(numKeysRecv[i]) {
01752 par::Mpi_Irecv<ot::TreeNode>( (recvK + recvOffsets[i]) , numKeysRecv[i],
01753 i, 1, comm, (requests1 + (i<<1)) );
01754 }
01755 }
01756
01757 for(int i = (rank + 1); i < npes; i++) {
01758 if(numKeysRecv[i]) {
01759 par::Mpi_Irecv<ot::TreeNode>( (recvK + recvOffsets[i]) , numKeysRecv[i],
01760 i, 1, comm, (requests1 + (i<<1)) );
01761 }
01762 }
01763
01764
01765 for(int i = 0; i < rank; i++) {
01766 if(numKeysSend[i]) {
01767 par::Mpi_Issend<ot::TreeNode>( (sendK + sendOffsets[i]), numKeysSend[i],
01768 i, 1, comm, (requests1 + ((i<<1) + 1)) );
01769 }
01770 }
01771
01772 for(int i = (rank + 1); i < npes; i++) {
01773 if(numKeysSend[i]) {
01774 par::Mpi_Issend<ot::TreeNode>( (sendK + sendOffsets[i]), numKeysSend[i],
01775 i, 1, comm, (requests1 + ((i<<1) + 1)) );
01776 }
01777 }
01778
01779 PROF_FLN_STAGE3_END
01780
01781 #ifdef __DEBUG_DA_PUBLIC__
01782 MPI_Barrier(comm);
01783 if(!rank) {
01784 std::cout<<"FLN Stage 3 passed."<<std::endl;
01785 }
01786 MPI_Barrier(comm);
01787 #endif
01788
01789 PROF_FLN_STAGE4_BEGIN
01790
01791 bool* resSend = NULL;
01792 if(totalKeys) {
01793 resSend = new bool[totalKeys];
01794 }
01795
01796 bool* resRecv = NULL;
01797 if(keysSz) {
01798 resRecv = new bool[keysSz];
01799 }
01800
01801 for (unsigned int i = sendOffsets[rank];
01802 i < (sendOffsets[rank] + numKeysSend[rank]); i++) {
01803 unsigned int idx;
01804 resSend[recvOffsets[rank] + (i - sendOffsets[rank])] =
01805 seq::BinarySearch<ot::TreeNode>(inPtr, in.size(), sendK[i], &idx);
01806 }
01807
01808 PROF_FLN_STAGE4_END
01809
01810 #ifdef __DEBUG_DA_PUBLIC__
01811 MPI_Barrier(comm);
01812 if(!rank) {
01813 std::cout<<"FLN Stage 4 passed."<<std::endl;
01814 }
01815 MPI_Barrier(comm);
01816 #endif
01817
01818 PROF_FLN_STAGE5_BEGIN
01819
01820
01821 for(int i = 0; i < rank; i++) {
01822 if(numKeysRecv[i]) {
01823 MPI_Status status;
01824 MPI_Wait( (requests1 + (i<<1)), &status);
01825 }
01826 }
01827
01828 for(int i = (rank + 1); i < npes; i++) {
01829 if(numKeysRecv[i]) {
01830 MPI_Status status;
01831 MPI_Wait( (requests1 + (i<<1)), &status);
01832 }
01833 }
01834
01835 PROF_FLN_STAGE5_END
01836
01837 #ifdef __DEBUG_DA_PUBLIC__
01838 MPI_Barrier(comm);
01839 if(!rank) {
01840 std::cout<<"FLN Stage 5 passed."<<std::endl;
01841 }
01842 MPI_Barrier(comm);
01843 #endif
01844
01845 PROF_FLN_STAGE6_BEGIN
01846
01847 for (unsigned int i = 0; i < recvOffsets[rank]; i++) {
01848 unsigned int idx;
01849 resSend[i] = seq::BinarySearch<ot::TreeNode>(inPtr, in.size(), recvK[i], &idx);
01850 }
01851
01852 for (unsigned int i = (recvOffsets[rank] + numKeysRecv[rank]); i < totalKeys; i++) {
01853 unsigned int idx;
01854 resSend[i] = seq::BinarySearch<ot::TreeNode>(inPtr, in.size(), recvK[i], &idx);
01855 }
01856
01857 PROF_FLN_STAGE6_END
01858
01859 #ifdef __DEBUG_DA_PUBLIC__
01860 MPI_Barrier(comm);
01861 if(!rank) {
01862 std::cout<<"FLN Stage 6 passed."<<std::endl;
01863 }
01864 MPI_Barrier(comm);
01865 #endif
01866
01867 PROF_FLN_STAGE7_BEGIN
01868
01869 MPI_Request* requests2 = new MPI_Request[npes<<1];
01870
01871
01872 for(int i = 0; i < rank; i++) {
01873 if(numKeysSend[i]) {
01874 par::Mpi_Irecv<bool>( (resRecv + sendOffsets[i]) , numKeysSend[i],
01875 i, 1, comm, (requests2 + (i<<1)) );
01876 }
01877 }
01878
01879 for(int i = (rank + 1); i < npes; i++) {
01880 if(numKeysSend[i]) {
01881 par::Mpi_Irecv<bool>( (resRecv + sendOffsets[i]) , numKeysSend[i],
01882 i, 1, comm, (requests2 + (i<<1)) );
01883 }
01884 }
01885
01886
01887 for(int i = 0; i < rank; i++) {
01888 if(numKeysRecv[i]) {
01889 par::Mpi_Issend<bool>( (resSend + recvOffsets[i]), numKeysRecv[i],
01890 i, 1, comm, (requests2 + ((i<<1) + 1)) );
01891 }
01892 }
01893
01894 for(int i = (rank + 1); i < npes; i++) {
01895 if(numKeysRecv[i]) {
01896 par::Mpi_Issend<bool>( (resSend + recvOffsets[i]), numKeysRecv[i],
01897 i, 1, comm, (requests2 + ((i<<1) + 1)) );
01898 }
01899 }
01900
01901 PROF_FLN_STAGE7_END
01902
01903 #ifdef __DEBUG_DA_PUBLIC__
01904 MPI_Barrier(comm);
01905 if(!rank) {
01906 std::cout<<"FLN Stage 7 passed."<<std::endl;
01907 }
01908 MPI_Barrier(comm);
01909 #endif
01910
01911 PROF_FLN_STAGE8_BEGIN
01912 bool* isHanging = new bool[in.size()];
01913 for(unsigned int i = 0; i < in.size(); i++) {
01914 isHanging[i] = false;
01915 }
01916
01917 for(int i = recvOffsets[rank]; i < (recvOffsets[rank] + numKeysRecv[rank]); i++) {
01918 if(resSend[i]) {
01919 isHanging[comm_map[sendOffsets[rank] + i - recvOffsets[rank]]] = true;
01920 }
01921 }
01922
01923 PROF_FLN_STAGE8_END
01924
01925 #ifdef __DEBUG_DA_PUBLIC__
01926 MPI_Barrier(comm);
01927 if(!rank) {
01928 std::cout<<"FLN Stage 8 passed."<<std::endl;
01929 }
01930 MPI_Barrier(comm);
01931 #endif
01932
01933 PROF_FLN_STAGE9_BEGIN
01934
01935
01936 for(int i = 0; i < rank; i++) {
01937 if(numKeysSend[i]) {
01938 MPI_Status status;
01939 MPI_Wait( (requests2 + (i<<1)), &status);
01940 }
01941 }
01942
01943 for(int i = (rank + 1); i < npes; i++) {
01944 if(numKeysSend[i]) {
01945 MPI_Status status;
01946 MPI_Wait( (requests2 + (i<<1)), &status);
01947 }
01948 }
01949
01950 PROF_FLN_STAGE9_END
01951
01952 #ifdef __DEBUG_DA_PUBLIC__
01953 MPI_Barrier(comm);
01954 if(!rank) {
01955 std::cout<<"FLN Stage 9 passed."<<std::endl;
01956 }
01957 MPI_Barrier(comm);
01958 #endif
01959
01960 PROF_FLN_STAGE10_BEGIN
01961
01962 for(int i = 0; i < sendOffsets[rank]; i++) {
01963 if(resRecv[i]) {
01964 isHanging[comm_map[i]] = true;
01965 }
01966 }
01967
01968 for(int i = (sendOffsets[rank] + numKeysSend[rank]); i < keysSz; i++) {
01969 if(resRecv[i]) {
01970 isHanging[comm_map[i]] = true;
01971 }
01972 }
01973
01974 for(unsigned int i = 0; i < in.size(); i++) {
01975 if (!isHanging[i]) {
01976 in[i].orFlag(ot::TreeNode::NODE);
01977 }
01978 }
01979
01980 PROF_FLN_STAGE10_END
01981
01982 #ifdef __DEBUG_DA_PUBLIC__
01983 MPI_Barrier(comm);
01984 if(!rank) {
01985 std::cout<<"FLN Stage 10 passed."<<std::endl;
01986 }
01987 MPI_Barrier(comm);
01988 #endif
01989
01990 PROF_FLN_STAGE11_BEGIN
01991
01992
01993 for(int i = 0; i < rank; i++) {
01994 if(numKeysSend[i]) {
01995 MPI_Status status;
01996 MPI_Wait( (requests1 + ((i<<1) + 1)), &status);
01997 }
01998 }
01999
02000 for(int i = (rank + 1); i < npes; i++) {
02001 if(numKeysSend[i]) {
02002 MPI_Status status;
02003 MPI_Wait( (requests1 + ((i<<1) + 1)), &status);
02004 }
02005 }
02006
02007 for(int i = 0; i < rank; i++) {
02008 if(numKeysRecv[i]) {
02009 MPI_Status status;
02010 MPI_Wait( (requests2 + ((i<<1) + 1)), &status);
02011 }
02012 }
02013
02014 for(int i = (rank + 1); i < npes; i++) {
02015 if(numKeysRecv[i]) {
02016 MPI_Status status;
02017 MPI_Wait( (requests2 + ((i<<1) + 1)), &status);
02018 }
02019 }
02020
02021
02022 if(comm_map) {
02023 delete [] comm_map;
02024 }
02025
02026 if(resRecv) {
02027 delete [] resRecv;
02028 }
02029
02030 if(resSend) {
02031 delete []resSend;
02032 }
02033
02034 if(sendK) {
02035 delete [] sendK;
02036 }
02037
02038 if(recvK) {
02039 delete [] recvK;
02040 }
02041
02042 delete [] sendOffsets;
02043 delete [] recvOffsets;
02044 delete [] numKeysSend;
02045 delete [] numKeysRecv;
02046 delete [] requests1;
02047 delete [] requests2;
02048
02049 PROF_FLN_STAGE11_END
02050
02051 #ifdef __DEBUG_DA_PUBLIC__
02052 MPI_Barrier(comm);
02053 if(!rank) {
02054 std::cout<<"FLN Stage 11 passed."<<std::endl;
02055 }
02056 MPI_Barrier(comm);
02057 #endif
02058
02059 PROF_MARK_HANGING_END
02060 }
02061
02062 }
02063
02064
02065