00001
00009 #include "parUtils.h"
00010 #include "seqUtils.h"
00011 #include "TreeNode.h"
00012 #include "TreeNodePointer.h"
00013 #include "testUtils.h"
00014 #include "dendro.h"
00015
00016 #ifdef __DEBUG__
00017 #ifndef __DEBUG_OCT__
00018 #define __DEBUG_OCT__
00019 #endif
00020 #endif
00021
00022 #ifdef __DEBUG_OCT__
00023 #ifndef __MEASURE_BAL_COMM__
00024 #define __MEASURE_BAL_COMM__
00025 #endif
00026 #endif
00027
00028 namespace ot {
00029
00030
00031 int balanceOctree (std::vector<TreeNode > &in, std::vector<TreeNode > &out,
00032 unsigned int dim, unsigned int maxDepth, bool incCorner,
00033 MPI_Comm comm, MPI_Comm* newCommPtr, bool* iAmActive) {
00034 #ifdef __PROF_WITH_BARRIER__
00035 MPI_Barrier(comm);
00036 #endif
00037 PROF_BAL_BEGIN
00038
00039 int rank, size;
00040 MPI_Comm_size(comm,&size);
00041 out.clear();
00042
00043 if(newCommPtr) {
00044 *newCommPtr = comm;
00045 }
00046
00047 if(iAmActive) {
00048 *iAmActive = true;
00049 }
00050
00052
00053 if (size == 1) {
00054 #ifndef __SILENT_MODE__
00055 std::cout<<"Balance Octree. inpSize: "<<in.size()<<" activeNpes: 1"<<std::endl;
00056 #endif
00057
00058 out = in;
00059 in.clear();
00060 comboRipple(out, incCorner);
00061 PROF_BAL_END
00062 }
00063
00064 MPI_Comm_rank(comm,&rank);
00065
00066
00067 DendroIntL locSize = in.size();
00068 DendroIntL globSize;
00069 PROF_BAL_COMM_BEGIN
00070
00071 par::Mpi_Allreduce<DendroIntL>(&locSize, &globSize, 1, MPI_SUM, comm);
00072
00073 PROF_BAL_COMM_END
00074
00075
00076 const DendroIntL THOUSAND = 1000;
00077 if (globSize < (THOUSAND*size)) {
00078 int splittingSize = (globSize/THOUSAND);
00079 if(splittingSize == 0) {
00080 splittingSize = 1;
00081 }
00082
00083 unsigned int avgLoad = (globSize/splittingSize);
00084 int leftOvers = (globSize - (splittingSize*avgLoad));
00085
00086 PROF_BAL_SCATTER_BEGIN
00087
00088 std::vector<TreeNode> tmpIn;
00089 if(rank >= splittingSize) {
00090 par::scatterValues<ot::TreeNode>(in, tmpIn, 0, comm);
00091 }else if(rank < leftOvers) {
00092 par::scatterValues<ot::TreeNode>(in, tmpIn, (avgLoad+1), comm);
00093 }else {
00094 par::scatterValues<ot::TreeNode>(in, tmpIn, avgLoad, comm);
00095 }
00096
00097 PROF_BAL_SCATTER_END
00098
00099 in.clear();
00100
00101 PROF_BAL_SPLIT_COMM_BEGIN
00102
00103 MPI_Comm newComm;
00104 par::splitCommUsingSplittingRank(splittingSize, &newComm, comm);
00105
00106 PROF_BAL_SPLIT_COMM_END
00107
00108 #ifndef __SILENT_MODE__
00109 if(!rank) {
00110 std::cout<<"Input to Balance is small ("<<globSize
00111 <<"). npes = "<<size<<" Splitting Comm. "<<std::endl;
00112 }
00113 #endif
00114
00115 if(rank < splittingSize) {
00116 balanceOctree (tmpIn, out, dim, maxDepth, incCorner, newComm, NULL, NULL);
00117 } else {
00118 if(iAmActive) {
00119 *iAmActive = false;
00120 }
00121 }
00122 tmpIn.clear();
00123
00124 if(newCommPtr) {
00125 *newCommPtr = newComm;
00126 }
00127
00128 PROF_BAL_END
00129 }
00130
00131 #ifndef __SILENT_MODE__
00132 if(!rank) {
00133 std::cout<<"Balance Octree. inpSize: "<<globSize <<" activeNpes: "<<size<<std::endl;
00134 }
00135 #endif
00136
00138
00139
00140
00141 std::vector<ot::TreeNode> blocks;
00142 std::vector<ot::TreeNode> minsAllBlocks;
00143
00144 #ifdef __PROF_WITH_BARRIER__
00145 MPI_Barrier(comm);
00146 #endif
00147 PROF_BAL_BPART1_BEGIN
00148
00149 blockPartStage1(in, blocks, dim, maxDepth, comm);
00150
00151 PROF_BAL_BPART1_END
00152
00153 #ifdef __PROF_WITH_BARRIER__
00154 MPI_Barrier(comm);
00155 #endif
00156
00157 PROF_BAL_BPART2_BEGIN
00158
00159 blockPartStage2(in, blocks, minsAllBlocks, dim, maxDepth, comm);
00160
00161 PROF_BAL_BPART2_END
00162
00163
00164
00165 assert(!blocks.empty());
00166 TreeNode myFirstBlock = blocks[0];
00167 TreeNode myLastBlock = blocks[blocks.size()-1];
00168
00169 #ifdef __DEBUG_OCT__
00170 assert(areComparable(myFirstBlock, in[0]));
00171 assert(myFirstBlock.isAncestor(in[0]) || myFirstBlock == in[0]);
00172 #endif
00173
00174 #ifdef __USE_AGV_FOR_BAL__
00175
00176
00177
00178
00179
00180 for(unsigned int i = 0; i < blocks.size(); i++) {
00181 blocks[i].setWeight(rank);
00182 }
00183
00184
00185 int totalSize=0;
00186 int *blockSizes = new int [size];
00187 int *blockDisps = new int [size];
00188
00189 int localBlockSize = blocks.size();
00190 PROF_BAL_COMM_BEGIN
00191
00192 par::Mpi_Allgather<int>(&localBlockSize, blockSizes, 1, comm);
00193
00194 PROF_BAL_COMM_END
00195
00196 totalSize += blockSizes[0];
00197 blockDisps[0] = 0;
00198 for (unsigned int i = 1; i < size; i++) {
00199 totalSize += blockSizes[i];
00200 blockDisps[i] = blockDisps[i-1] + blockSizes[i-1];
00201 }
00202
00203
00204 std::vector<TreeNode> allBlocks(totalSize);
00205
00206 ot::TreeNode* blocksPtr = NULL;
00207 ot::TreeNode* allBlocksPtr = NULL;
00208 if(!blocks.empty()) {
00209 blocksPtr = &(*(blocks.begin()));
00210 }
00211 if(!allBlocks.empty()) {
00212 allBlocksPtr = &(*(allBlocks.begin()));
00213 }
00214 PROF_BAL_COMM_BEGIN
00215
00216 par::Mpi_Allgatherv<ot::TreeNode>( blocksPtr, blockSizes[rank],
00217 allBlocksPtr, blockSizes, blockDisps, comm );
00218
00219 PROF_BAL_COMM_END
00220
00221 #ifndef __SILENT_MODE__
00222 if(!rank) {
00223 std::cout<<"# AllBlocks: "<<allBlocks.size()<<std::endl;
00224 for(int i = 0; i < size; i++) {
00225 std::cout<<"blockSizes["<<i<<"] = "<<blockSizes[i]<<std::endl;
00226 }
00227 for(int i = 0; i < allBlocks.size(); i++) {
00228 std::cout<<"allBlocks["<<i<<"].Wt() = "<<(allBlocks[i].getWeight())<<std::endl;
00229 }
00230 }
00231 #endif
00232
00233 #endif
00234
00236
00237
00238
00239 std::vector<ot::TreeNode> allBoundaryLeaves;
00240 std::vector<unsigned int> maxBlockBndVec;
00241
00242 balanceBlocks(in, blocks, out, allBoundaryLeaves, incCorner, &maxBlockBndVec);
00243 in.clear();
00244
00245 #ifdef __USE_AGV_FOR_BAL__
00246 std::vector<TreeNode> myNhBlocks;
00247 selectNeighboringBlocks(allBlocks, blocks, maxBlockBndVec, rank, myNhBlocks);
00248
00249 allBlocks.clear();
00250
00251 #ifdef __MEASURE_BAL_COMM__
00252 MPI_Barrier(comm);
00253 unsigned int localMyNhBlocksSize = myNhBlocks.size();
00254 unsigned int* globalMyNhBlocksSize = new unsigned int[size];
00255 par::Mpi_Gather<unsigned int>(&localMyNhBlocksSize, globalMyNhBlocksSize, 1, 0, comm);
00256 if(!rank) {
00257 for(int i = 0; i < size; i++) {
00258 std::cout<<"globalMyNhBlocksSize["<<i<<"] = "<<globalMyNhBlocksSize[i]<<std::endl;
00259 }
00260 }
00261 delete [] globalMyNhBlocksSize;
00262 MPI_Barrier(comm);
00263 #endif
00264 #endif
00265
00266 blocks.clear();
00267 maxBlockBndVec.clear();
00268
00269 #ifdef __USE_AGV_FOR_BAL__
00270 if(!myNhBlocks.empty()) {
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280 std::vector<ot::TreeNode> boundaryAndOtherBlocks;
00281
00282
00283 if(myNhBlocks[myNhBlocks.size()-1] < myFirstBlock) {
00284
00285
00286 boundaryAndOtherBlocks.insert(boundaryAndOtherBlocks.end(),
00287 myNhBlocks.begin(), myNhBlocks.end());
00288
00289 boundaryAndOtherBlocks.insert(boundaryAndOtherBlocks.end(),
00290 allBoundaryLeaves.begin(), allBoundaryLeaves.end());
00291
00292 } else if(myNhBlocks[0] > myLastBlock) {
00293
00294
00295 boundaryAndOtherBlocks.insert(boundaryAndOtherBlocks.end(),
00296 allBoundaryLeaves.begin(), allBoundaryLeaves.end());
00297
00298 boundaryAndOtherBlocks.insert(boundaryAndOtherBlocks.end(),
00299 myNhBlocks.begin(), myNhBlocks.end());
00300
00301 } else {
00302
00303
00304 unsigned int myStIdxInNhBlocks = 0;
00305 unsigned int myEndIdxInNhBlocks = 0;
00306
00307 for(unsigned int i = 0; i < myNhBlocks.size(); i++) {
00308 if(myNhBlocks[i] > myFirstBlock) {
00309 myStIdxInNhBlocks = i;
00310 break;
00311 }
00312 }
00313
00314
00315
00316 for(unsigned int i = myNhBlocks.size(); i > 0; i--) {
00317 if(myNhBlocks[i-1] < myLastBlock) {
00318 myEndIdxInNhBlocks = i;
00319 break;
00320 }
00321 }
00322
00323 boundaryAndOtherBlocks.insert(boundaryAndOtherBlocks.begin(),
00324 myNhBlocks.begin(), (myNhBlocks.begin() + myStIdxInNhBlocks));
00325
00326 boundaryAndOtherBlocks.insert(boundaryAndOtherBlocks.end(),
00327 allBoundaryLeaves.begin(), allBoundaryLeaves.end());
00328
00329 boundaryAndOtherBlocks.insert(boundaryAndOtherBlocks.end(),
00330 (myNhBlocks.begin() + myEndIdxInNhBlocks), myNhBlocks.end());
00331
00332 }
00333
00334 allBoundaryLeaves.clear();
00335
00336
00337 ripple(boundaryAndOtherBlocks, incCorner);
00338
00339
00340 unsigned int myStartIdx = 0;
00341 unsigned int myEndIdx = 0;
00342
00343 bool foundFirst = false;
00344 bool foundLast = false;
00345 for(unsigned int i = 0; i < boundaryAndOtherBlocks.size(); i++) {
00346 if(boundaryAndOtherBlocks[i] >= myFirstBlock) {
00347 myStartIdx = i;
00348 foundFirst = true;
00349 break;
00350 }
00351 }
00352
00353
00354
00355 for(unsigned int i = boundaryAndOtherBlocks.size(); i > 0; i--) {
00356 if(boundaryAndOtherBlocks[i-1] <= myLastBlock.getDLD()) {
00357 myEndIdx = i;
00358 foundLast = true;
00359 break;
00360 }
00361 }
00362
00363 #ifdef __DEBUG_OCT__
00364
00365 assert(foundFirst && foundLast);
00366 #endif
00367
00368 allBoundaryLeaves.insert(allBoundaryLeaves.begin(),
00369 boundaryAndOtherBlocks.begin() + myStartIdx,
00370 boundaryAndOtherBlocks.begin() + myEndIdx);
00371
00372 boundaryAndOtherBlocks.clear();
00373
00374 } else {
00375
00376 ripple(allBoundaryLeaves, incCorner);
00377 }
00378 myNhBlocks.clear();
00379 #else
00380
00381 ripple(allBoundaryLeaves, incCorner);
00382 #endif
00383
00384 mergeComboBalAndPickBoundary(out, allBoundaryLeaves, myFirstBlock, myLastBlock);
00385
00386 #ifdef __MEASURE_BAL_COMM__
00387 MPI_Barrier(comm);
00388 unsigned int localAllBndSize = allBoundaryLeaves.size();
00389 unsigned int* globalAllBndSize = new unsigned int[size];
00390 par::Mpi_Gather<unsigned int>(&localAllBndSize, globalAllBndSize, 1, 0, comm);
00391 if(!rank) {
00392 for(int i = 0; i < size; i++) {
00393 std::cout<<" globalAllBndSize["<<i<<"]= "<<globalAllBndSize[i]<<std::endl;
00394 }
00395 }
00396 delete [] globalAllBndSize;
00397 MPI_Barrier(comm);
00398 #endif
00399
00400
00401
00403
00404
00405 std::vector<TreeNode> * sendNodes = new std::vector<TreeNode> [size];
00406 std::vector<unsigned int> * sentToPid = NULL;
00407
00408 std::vector<ot::TreeNode> sendK;
00409 int *sendCnt = new int[size];
00410 int *recvCnt = new int[size];
00411 int *sendOffsets = new int[size];
00412
00413
00414
00415
00416 for (int i = 0; i < size; i++) {
00417 sendCnt[i] = 0;
00418 sendNodes[i].clear();
00419 }
00420
00421 if(!allBoundaryLeaves.empty()) {
00422 sentToPid = new std::vector<unsigned int> [allBoundaryLeaves.size()];
00423 } else {
00424 sentToPid = NULL;
00425 }
00426
00427
00428 prepareBalComm1MessagesType2(allBoundaryLeaves, minsAllBlocks, rank, dim, maxDepth,
00429 sendNodes, sentToPid, sendCnt);
00430
00431
00432
00433
00434
00435 PROF_BAL_COMM_BEGIN
00436
00437 par::Mpi_Alltoall<int>(sendCnt, recvCnt, 1, comm);
00438
00439 PROF_BAL_COMM_END
00440
00441 #ifdef __MEASURE_BAL_COMM__
00442 unsigned int numProcsSend1 = 0;
00443 unsigned int numProcsRecv1 = 0;
00444 #endif
00445
00446
00447 unsigned int totalSend = 0;
00448 unsigned int totalRecv = 0;
00449 for (unsigned int i = 0; i < size; i++) {
00450 totalSend += sendCnt[i];
00451 totalRecv += recvCnt[i];
00452 #ifdef __MEASURE_BAL_COMM__
00453 if(sendCnt[i]) {
00454 numProcsSend1++;
00455 }
00456 if(recvCnt[i]) {
00457 numProcsRecv1++;
00458 }
00459 #endif
00460 }
00461
00462
00463
00464 int *recvOffsets1 = new int[size];
00465
00466
00467 sendOffsets[0] = 0;
00468 recvOffsets1[0] = 0;
00469
00470
00471 for (int i = 1; i < size; i++) {
00472 sendOffsets[i] = sendOffsets[i-1] + sendCnt[i-1];
00473 recvOffsets1[i] = recvOffsets1[i-1] + recvCnt[i-1];
00474 }
00475
00476 sendK.resize(totalSend);
00477 for (int i = 0; i < size; i++) {
00478 #ifdef __DEBUG_OCT__
00479 assert(seq::test::isUniqueAndSorted(sendNodes[i]));
00480 #endif
00481 for (unsigned int j = 0; j < sendCnt[i]; j++) {
00482 sendK[sendOffsets[i] + j] = sendNodes[i][j];
00483 }
00484 }
00485
00486 for (unsigned int i = 0; i < size; i++) {
00487 sendNodes[i].clear();
00488 }
00489
00490
00491 std::vector<ot::TreeNode> recvK1(totalRecv);
00492
00493 ot::TreeNode* sendKptr = NULL;
00494 ot::TreeNode* recvK1ptr = NULL;
00495 if(!sendK.empty()) {
00496 sendKptr = &(*(sendK.begin()));
00497 }
00498 if(!recvK1.empty()) {
00499 recvK1ptr = &(*(recvK1.begin()));
00500 }
00501
00502 PROF_BAL_COMM_BEGIN
00503
00504 par::Mpi_Alltoallv_sparse<ot::TreeNode>(sendKptr, sendCnt, sendOffsets,
00505 recvK1ptr, recvCnt, recvOffsets1, comm);
00506
00507 PROF_BAL_COMM_END
00508
00509 sendK.clear();
00510
00511 #ifdef __MEASURE_BAL_COMM__
00512 MPI_Barrier(comm);
00513 unsigned int* allTotalSend1 = new unsigned int[size];
00514 unsigned int* allTotalRecv1 = new unsigned int[size];
00515 unsigned int* allNumProcsSend1 = new unsigned int[size];
00516 unsigned int* allNumProcsRecv1 = new unsigned int[size];
00517 par::Mpi_Gather<unsigned int>(&totalSend, allTotalSend1, 1, 0, comm);
00518 par::Mpi_Gather<unsigned int>(&totalRecv, allTotalRecv1, 1, 0, comm);
00519 par::Mpi_Gather<unsigned int>(&numProcsSend1, allNumProcsSend1, 1, 0, comm);
00520 par::Mpi_Gather<unsigned int>(&numProcsRecv1, allNumProcsRecv1, 1, 0, comm);
00521 if(!rank) {
00522 for(int i = 0; i < size; i++) {
00523 std::cout<<"allTotalSend1["<<i<<"] in bal: "<<allTotalSend1[i]<<std::endl;
00524 std::cout<<"allTotalRecv1["<<i<<"] in bal: "<<allTotalRecv1[i]<<std::endl;
00525 std::cout<<"allNumProcsSend1["<<i<<"] in bal: "<<allNumProcsSend1[i]<<std::endl;
00526 std::cout<<"allNumProcsRecv1["<<i<<"] in bal: "<<allNumProcsRecv1[i]<<std::endl;
00527 }
00528 }
00529 delete [] allTotalSend1;
00530 delete [] allTotalRecv1;
00531 delete [] allNumProcsSend1;
00532 delete [] allNumProcsRecv1;
00533 MPI_Barrier(comm);
00534 #endif
00535
00537
00538
00539
00540
00541 std::vector<ot::TreeNode> wList;
00542 std::vector<std::vector<unsigned int> > wListRanks;
00543
00544 prepareWlistInBal(recvK1, recvCnt, size, myFirstBlock, myLastBlock, wList, wListRanks);
00545
00546 for (int i = 0; i < size; i++) {
00547 sendCnt[i] = 0;
00548 sendNodes[i].clear();
00549 }
00550
00551 prepareBalComm2Messages(allBoundaryLeaves, wList, wListRanks,
00552 sendNodes, sentToPid, sendCnt);
00553
00554 for (int i = 0; i < allBoundaryLeaves.size(); i++) {
00555 sentToPid[i].clear();
00556 }
00557
00558 if(sentToPid) {
00559 delete [] sentToPid;
00560 sentToPid = NULL;
00561 }
00562
00563
00564
00565
00566
00567 PROF_BAL_COMM_BEGIN
00568
00569 par::Mpi_Alltoall<int>(sendCnt, recvCnt, 1, comm);
00570
00571 PROF_BAL_COMM_END
00572
00573 #ifdef __MEASURE_BAL_COMM__
00574 unsigned int numProcsSend2 = 0;
00575 unsigned int numProcsRecv2 = 0;
00576 #endif
00577
00578 totalSend = 0;
00579 totalRecv = 0;
00580 for (unsigned int i = 0; i < size; i++) {
00581 totalSend+= sendCnt[i];
00582 totalRecv+= recvCnt[i];
00583 #ifdef __MEASURE_BAL_COMM__
00584 if(sendCnt[i]) {
00585 numProcsSend2++;
00586 }
00587 if(recvCnt[i]) {
00588 numProcsRecv2++;
00589 }
00590 #endif
00591 }
00592
00593
00594
00595 int *recvOffsets2 = new int[size];
00596
00597 sendOffsets[0] = 0;
00598 recvOffsets2[0] = 0;
00599
00600
00601 for (int i=1; i<size; i++) {
00602 sendOffsets[i] = sendOffsets[i-1] + sendCnt[i-1];
00603 recvOffsets2[i] = recvOffsets2[i-1] + recvCnt[i-1];
00604 }
00605
00606 sendK.resize(totalSend);
00607 for (int i=0; i<size; i++) {
00608 #ifdef __DEBUG_OCT__
00609 assert(seq::test::isUniqueAndSorted(sendNodes[i]));
00610 #endif
00611 for (unsigned int j = 0; j < sendCnt[i]; j++) {
00612 sendK[sendOffsets[i] + j] = sendNodes[i][j];
00613 }
00614 }
00615
00616 for (unsigned int i = 0; i < size; i++) {
00617 sendNodes[i].clear();
00618 }
00619
00620 delete [] sendNodes;
00621 sendNodes = NULL;
00622
00623 std::vector<ot::TreeNode> recvK2(totalRecv);
00624
00625 sendKptr = NULL;
00626 ot::TreeNode* recvK2ptr = NULL;
00627 if(!sendK.empty()) {
00628 sendKptr = &(*(sendK.begin()));
00629 }
00630 if(!recvK2.empty()) {
00631 recvK2ptr = &(*(recvK2.begin()));
00632 }
00633
00634 PROF_BAL_COMM_BEGIN
00635
00636 par::Mpi_Alltoallv_sparse<ot::TreeNode>(sendKptr, sendCnt, sendOffsets,
00637 recvK2ptr, recvCnt, recvOffsets2, comm);
00638
00639 PROF_BAL_COMM_END
00640
00641 sendK.clear();
00642
00643 delete [] sendCnt;
00644 sendCnt = NULL;
00645
00646 delete [] recvCnt;
00647 recvCnt = NULL;
00648
00649 delete [] sendOffsets;
00650 sendOffsets = NULL;
00651
00652
00653 #ifdef __MEASURE_BAL_COMM__
00654 MPI_Barrier(comm);
00655 unsigned int* allTotalSend2 = new unsigned int[size];
00656 unsigned int* allTotalRecv2 = new unsigned int[size];
00657 unsigned int* allNumProcsSend2 = new unsigned int[size];
00658 unsigned int* allNumProcsRecv2 = new unsigned int[size];
00659 par::Mpi_Gather<unsigned int>(&totalSend, allTotalSend2, 1, 0, comm);
00660 par::Mpi_Gather<unsigned int>(&totalRecv, allTotalRecv2, 1, 0, comm);
00661 par::Mpi_Gather<unsigned int>(&numProcsSend2, allNumProcsSend2, 1, 0, comm);
00662 par::Mpi_Gather<unsigned int>(&numProcsRecv2, allNumProcsRecv2, 1, 0, comm);
00663 if(!rank) {
00664 for(int i = 0; i < size; i++) {
00665 std::cout<<"allTotalSend2["<<i<<"] in bal: "<<allTotalSend2[i]<<std::endl;
00666 std::cout<<"allTotalRecv2["<<i<<"] in bal: "<<allTotalRecv2[i]<<std::endl;
00667 std::cout<<"allNumProcsSend2["<<i<<"] in bal: "<<allNumProcsSend2[i]<<std::endl;
00668 std::cout<<"allNumProcsRecv2["<<i<<"] in bal: "<<allNumProcsRecv2[i]<<std::endl;
00669 }
00670 }
00671 delete [] allTotalSend2;
00672 delete [] allTotalRecv2;
00673 delete [] allNumProcsSend2;
00674 delete [] allNumProcsRecv2;
00675 MPI_Barrier(comm);
00676 #endif
00677
00679
00680
00681 std::vector<ot::TreeNode> recvK;
00682
00683 unsigned int myOff1 = recvOffsets1[rank];
00684 unsigned int myOff2 = recvOffsets2[rank];
00685 unsigned int myOff = myOff1 + myOff2;
00686
00687 mergeRecvKeysInBal(recvK1, recvOffsets1, recvK2, recvOffsets2, size, recvK);
00688
00689 delete [] recvOffsets1;
00690 recvOffsets1 = NULL;
00691
00692 recvK1.clear();
00693
00694 delete [] recvOffsets2;
00695 recvOffsets2 = NULL;
00696
00697 recvK2.clear();
00698
00699
00700 unsigned int n = static_cast<unsigned int>(allBoundaryLeaves.size());
00701 std::vector <TreeNode > nodes( recvK.size() + n );
00702
00703
00704
00705
00706
00707
00708 for (int i = 0; i < myOff; i++) {
00709 nodes[i] = recvK[i];
00710 }
00711 for (int i = myOff; i < myOff + n; i++) {
00712 nodes[i] = allBoundaryLeaves[i - myOff];
00713 }
00714 for (int i = myOff + n; i < nodes.size(); i++) {
00715 nodes[i] = recvK[i - n];
00716 }
00717 recvK.clear();
00718 allBoundaryLeaves.clear();
00719
00721
00722
00723 ripple(nodes, incCorner);
00724
00726
00727
00728 unsigned int myStIdxInNodes = 0;
00729 unsigned int myEndIdxInNodes = 0;
00730 bool foundStart = false;
00731 bool foundEnd = false;
00732 for(unsigned int i = 0; i < nodes.size(); i++) {
00733 if(nodes[i] >= myFirstBlock) {
00734 myStIdxInNodes = i;
00735 foundStart = true;
00736 break;
00737 }
00738 }
00739
00740
00741
00742 for(unsigned int i = nodes.size(); i > 0; i--) {
00743 if(nodes[i-1] <= myLastBlock.getDLD()) {
00744 myEndIdxInNodes = i;
00745 foundEnd = true;
00746 break;
00747 }
00748 }
00749
00750 #ifdef __DEBUG_OCT__
00751 assert(foundStart && foundEnd);
00752 #endif
00753
00754 allBoundaryLeaves.insert(allBoundaryLeaves.begin(),
00755 (nodes.begin() + myStIdxInNodes), (nodes.begin() + myEndIdxInNodes));
00756
00757 nodes.clear();
00758
00759
00760 finalMergeInBal(out, allBoundaryLeaves);
00761
00762 PROF_BAL_END
00763 }
00764
00765
00766
00767 int comboRipple(std::vector<TreeNode> & in, bool incCorner, const unsigned int maxNum) {
00768 PROF_COMBO_RIPPLE_BEGIN
00769
00770 if (in.size() < 2) {
00771 PROF_COMBO_RIPPLE_END
00772 }
00773
00774 std::vector<ot::TreeNode> blocks;
00775 std::vector<ot::TreeNode> out;
00776 std::vector<ot::TreeNode> allBoundaryLeaves;
00777
00778 TreeNode nca = getNCA(in[0],in[in.size()-1]);
00779 nca.addChildren(blocks);
00780
00781 assert(maxNum > 0);
00782
00783 if (in.size() > maxNum) {
00784 std::vector<TreeNode> *splitInp = NULL;
00785
00786 if(!blocks.empty()) {
00787 splitInp = new std::vector<TreeNode> [blocks.size()];
00788 }
00789
00790 unsigned int nextPt = 0;
00791 unsigned int nextNode = 0;
00792
00793 while (nextPt < in.size()) {
00794
00795 #ifdef __DEBUG_OCT__
00796 assert(nextNode < blocks.size());
00797 assert(areComparable(blocks[nextNode], in[nextPt]));
00798 #endif
00799 if ((blocks[nextNode].isAncestor(in[nextPt])) ||
00800 (blocks[nextNode] == in[nextPt])) {
00801 splitInp[nextNode].push_back(in[nextPt]);
00802 nextPt++;
00803 } else {
00804 nextNode++;
00805 if (nextNode == blocks.size()) {
00806 assert(false);
00807 }
00808 }
00809 }
00810 for (unsigned int i = 0; i < blocks.size(); i++) {
00811 comboRipple(splitInp[i], incCorner, maxNum);
00812 }
00813
00814 unsigned int allBoundarySz = 0;
00815 unsigned int blockOutSize = 0;
00816
00817 std::vector<TreeNode> *blockBoundaries = NULL;
00818
00819 if(!blocks.empty()) {
00820 blockBoundaries = new std::vector<TreeNode>[blocks.size()];
00821 }
00822
00823 unsigned int numEmptyBlocks=0;
00824 for (unsigned int bi = 0; bi < blocks.size(); bi++) {
00825 if (splitInp[bi].empty()) {
00826 continue;
00827 }
00828 blockOutSize += splitInp[bi].size();
00829 if (splitInp[bi].size() > 1) {
00830 blocks[bi].pickInternalBoundaryCells(splitInp[bi], blockBoundaries[bi]);
00831 }
00832 if (blockBoundaries[bi].empty()) {
00833
00834 blockBoundaries[bi].push_back(blocks[bi]);
00835 numEmptyBlocks++;
00836 }
00837 allBoundarySz += blockBoundaries[bi].size();
00838 }
00839
00840
00841 out.resize(blockOutSize);
00842 allBoundaryLeaves.resize(allBoundarySz);
00843 unsigned int nodeCtr = 0;
00844 unsigned int boundaryCtr = 0;
00845 for (unsigned int bi = 0; bi < blocks.size(); bi++) {
00846 for (unsigned int bj = 0; bj < splitInp[bi].size(); bj++) {
00847 out[nodeCtr] = splitInp[bi][bj];
00848 nodeCtr++;
00849 }
00850 for (unsigned int bj = 0; bj < blockBoundaries[bi].size(); bj++) {
00851 allBoundaryLeaves[boundaryCtr] = blockBoundaries[bi][bj];
00852 boundaryCtr++;
00853 }
00854 }
00855 delete [] splitInp;
00856 splitInp = NULL;
00857
00858 delete [] blockBoundaries;
00859 blockBoundaries = NULL;
00860 } else {
00861 balanceBlocks (in, blocks, out, allBoundaryLeaves, incCorner);
00862 }
00863
00864 in.clear();
00865
00866
00867 ripple(allBoundaryLeaves, incCorner);
00868
00869
00870 in.resize(out.size() + allBoundaryLeaves.size());
00871 unsigned int tmpLsz=0;
00872 unsigned int bndCnt=0;
00873 for (unsigned int i =0;i<out.size();i++) {
00874 if ( bndCnt < allBoundaryLeaves.size() ) {
00875 if ( out[i] == allBoundaryLeaves[bndCnt] ) {
00876 in[tmpLsz++] = out[i];
00877 bndCnt++;
00878 } else if (out[i] < allBoundaryLeaves[bndCnt] ) {
00879 #ifdef __DEBUG_OCT__
00880 assert(areComparable(out[i], allBoundaryLeaves[bndCnt]));
00881 #endif
00882 if (out[i].isAncestor(allBoundaryLeaves[bndCnt]) ) {
00883 while ( (bndCnt < allBoundaryLeaves.size()) &&
00884 out[i].isAncestor(allBoundaryLeaves[bndCnt]) ) {
00885 in[tmpLsz++] = allBoundaryLeaves[bndCnt++];
00886 #ifdef __DEBUG_OCT__
00887 if(bndCnt < allBoundaryLeaves.size()) {
00888 assert(areComparable(out[i], allBoundaryLeaves[bndCnt]));
00889 }
00890 #endif
00891 }
00892 } else {
00893 in[tmpLsz++] = out[i];
00894 }
00895 } else {
00896
00897 in[tmpLsz++] = allBoundaryLeaves[bndCnt++];
00898 }
00899 } else {
00900 in[tmpLsz++] = out[i];
00901 }
00902 }
00903
00904 in.resize(tmpLsz);
00905 out.clear();
00906
00907
00908 PROF_COMBO_RIPPLE_END
00909 }
00910
00911
00912 int balanceBlocks (const std::vector<TreeNode> &inp,
00913 const std::vector<TreeNode> &blocks, std::vector<TreeNode> &nodes,
00914 std::vector<TreeNode> &allBoundaryLeaves, bool incCorner,
00915 std::vector<unsigned int> *maxBlockBndVec ) {
00916 PROF_CON_BAL_BEGIN
00917
00918 if (inp.empty()) {
00919 nodes.clear();
00920 allBoundaryLeaves.clear();
00921 if (maxBlockBndVec != NULL) {
00922 maxBlockBndVec->clear();
00923 }
00924 PROF_CON_BAL_END
00925 }
00926
00927 unsigned int nextPt;
00928 unsigned int nextNode;
00929
00930 assert(!blocks.empty());
00931 std::vector<TreeNode> *blockOut = new std::vector<TreeNode> [blocks.size()];
00932 std::vector<TreeNode> *splitInp = new std::vector<TreeNode> [blocks.size()];
00933
00934 nextPt =0;
00935 nextNode=0;
00936
00937 while (nextPt<inp.size()) {
00938
00939 #ifdef __DEBUG_OCT__
00940 assert(areComparable(blocks[nextNode], inp[nextPt]));
00941 #endif
00942 if ((blocks[nextNode].isAncestor(inp[nextPt])) ||
00943 (blocks[nextNode] == inp[nextPt])) {
00944 splitInp[nextNode].push_back(inp[nextPt]);
00945 nextPt++;
00946 } else {
00947 nextNode++;
00948 if (nextNode == blocks.size()) {
00949 assert(false);
00950 }
00951 }
00952 }
00953
00954
00955 for (unsigned int bi=0;bi<blocks.size();bi++) {
00956
00957 blocks[bi].balanceSubtree(splitInp[bi],blockOut[bi],incCorner,true);
00958
00959 splitInp[bi].clear();
00960
00961 if (blockOut[bi].empty()) {
00962 blockOut[bi].push_back(blocks[bi]);
00963 }
00964 }
00965
00966 delete [] splitInp;
00967 splitInp = NULL;
00968
00969 unsigned int allBoundarySz = 0;
00970 unsigned int blockOutSize = 0;
00971
00972 std::vector<TreeNode> *blockBoundaries =
00973 new std::vector<TreeNode>[blocks.size()];
00974
00975 unsigned int numEmptyBlocks=0;
00976 for (unsigned int bi = 0; bi < blocks.size(); bi++) {
00977 blockOutSize += blockOut[bi].size();
00978 if (blockOut[bi].size() > 1) {
00979 blocks[bi].pickInternalBoundaryCells(blockOut[bi],blockBoundaries[bi]);
00980 }
00981 if (blockBoundaries[bi].empty()) {
00982
00983 blockBoundaries[bi].push_back(blocks[bi]);
00984 numEmptyBlocks++;
00985 }
00986 allBoundarySz += blockBoundaries[bi].size();
00987 }
00988
00989
00990 nodes.resize(blockOutSize);
00991 allBoundaryLeaves.resize(allBoundarySz);
00992 unsigned int nodeCtr = 0;
00993 unsigned int boundaryCtr = 0;
00994
00995 if (maxBlockBndVec != NULL) {
00996 maxBlockBndVec->resize(blocks.size());
00997 unsigned int maxDepth;
00998 if (!blocks.empty()) {
00999 maxDepth = blocks[0].getMaxDepth();
01000 }
01001 for (unsigned int bi = 0; bi < blocks.size(); bi++) {
01002 (*maxBlockBndVec)[bi] = 0;
01003 for (unsigned int bj = 0; bj < blockOut[bi].size(); bj++) {
01004 nodes[nodeCtr] = blockOut[bi][bj];
01005 nodeCtr++;
01006 }
01007 for (unsigned int bj = 0; bj < blockBoundaries[bi].size(); bj++) {
01008 unsigned int len = (1u<<(maxDepth - (blockBoundaries[bi][bj].getLevel())));
01009 if (len > ((*maxBlockBndVec)[bi])) {
01010 (*maxBlockBndVec)[bi] = len;
01011 }
01012 allBoundaryLeaves[boundaryCtr] = blockBoundaries[bi][bj];
01013 boundaryCtr++;
01014 }
01015 }
01016 } else {
01017 for (unsigned int bi = 0; bi < blocks.size(); bi++) {
01018 for (unsigned int bj = 0; bj < blockOut[bi].size(); bj++) {
01019 nodes[nodeCtr] = blockOut[bi][bj];
01020 nodeCtr++;
01021 }
01022 for (unsigned int bj = 0; bj < blockBoundaries[bi].size(); bj++) {
01023 allBoundaryLeaves[boundaryCtr] = blockBoundaries[bi][bj];
01024 boundaryCtr++;
01025 }
01026 }
01027 }
01028
01029 delete [] blockOut;
01030 blockOut = NULL;
01031
01032 delete [] blockBoundaries;
01033 blockBoundaries = NULL;
01034
01035 PROF_CON_BAL_END
01036 }
01037
01038
01039
01040
01041
01042
01043
01044
01045
01046
01047
01048
01049
01050
01051
01052
01053
01054
01055
01056
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071
01072
01073
01074
01075
01076
01077
01078
01079
01080
01081
01082
01083
01084
01085
01086
01087
01088
01089
01090
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140
01141
01142
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162 int ripple(std::vector<TreeNode> & nodes, bool incCorners) {
01163 PROF_RIPPLE_BAL_BEGIN
01164
01165 if (!nodes.size()) {
01166 PROF_RIPPLE_BAL_END
01167 }
01168
01169 unsigned int dim = nodes[0].getDim();
01170 unsigned int maxDepth = nodes[0].getMaxDepth();
01171 TreeNode root(dim,maxDepth);
01172
01173 unsigned int maxLev = 1;
01174 for (unsigned int i=0;i<nodes.size();i++) {
01175 if (nodes[i].getLevel() > maxLev) {
01176 maxLev = nodes[i].getLevel();
01177 }
01178 }
01179
01180 for (unsigned int lev = maxLev; lev > 2; lev--) {
01181 unsigned int mLev = maxDepth;
01182 for (unsigned int i=0;i<nodes.size();i++) {
01183 if (nodes[i].getLevel() < mLev) {
01184 mLev = nodes[i].getLevel();
01185 }
01186 }
01187
01188 if (mLev >= (lev-1)) {
01189
01190 break;
01191 }
01192
01193 std::vector<TreeNode> wList;
01194 std::vector<std::vector<TreeNode> > seedList(nodes.size());
01195 unsigned int wLen = 0;
01196 wList.resize(nodes.size());
01197
01198
01199 for (int i =0;i<nodes.size();i++) {
01200 if (nodes[i].getLevel() == lev) {
01201 wList[wLen++] = nodes[i];
01202 }
01203 }
01204 wList.resize(wLen);
01205
01206 for (unsigned int i=0; i < wList.size(); i++) {
01207 std::vector<TreeNode> tList = wList[i].getSearchKeys(incCorners);
01208 for (int j=0; j<tList.size(); j++) {
01209 unsigned int idx;
01210
01211 bool flag = seq::maxLowerBound<TreeNode>(nodes, tList[j], idx,NULL,NULL);
01212 if (flag) {
01213 #ifdef __DEBUG_OCT__
01214 assert(areComparable(nodes[idx], tList[j]));
01215 #endif
01216 if (nodes[idx].isAncestor(tList[j])) {
01217 if (wList[i].getLevel() > (nodes[idx].getLevel() + 1 )) {
01218 nodes[idx].addBalancingDescendants(wList[i],
01219 seedList[idx], incCorners);
01220 }
01221 }
01222 }
01223 }
01224 tList.clear();
01225 }
01226
01227 wList.clear();
01228 std::vector<TreeNode> tmpList;
01229 std::vector<std::vector<TreeNode> >allInternalLeaves(nodes.size());
01230 unsigned int tmpSz = 0;
01231 for (unsigned int i=0;i<nodes.size();i++) {
01232 seq::makeVectorUnique<TreeNode>(seedList[i],false);
01233 if (!seedList[i].empty()) {
01234 if (seedList[i][0] == root) {
01235 seedList[i].erase(seedList[i].begin());
01236 }
01237 }
01238 if (!seedList[i].empty()) {
01239 nodes[i].completeSubtree(seedList[i], allInternalLeaves[i]);
01240 }
01241 tmpSz += allInternalLeaves[i].size();
01242 if (allInternalLeaves[i].empty()) {
01243 tmpSz++;
01244 }
01245 seedList[i].clear();
01246 }
01247 seedList.clear();
01248 tmpList.resize(tmpSz);
01249 tmpSz=0;
01250 for (unsigned int i=0;i<nodes.size();i++) {
01251 if (!allInternalLeaves[i].empty()) {
01252 for (int k=0;k<allInternalLeaves[i].size();k++) {
01253 tmpList[tmpSz++] = allInternalLeaves[i][k];
01254 }
01255 allInternalLeaves[i].clear();
01256 } else {
01257 tmpList[tmpSz++] = nodes[i];
01258 }
01259 }
01260 allInternalLeaves.clear();
01261 nodes = tmpList;
01262 tmpList.clear();
01263 }
01264
01265 PROF_RIPPLE_BAL_END
01266 }
01267
01268
01269 int pointerBasedRipple(std::vector<ot::TreeNode> & nodes, bool incCorners) {
01270 PROF_PTR_RIPPLE_BAL_BEGIN
01271
01272 if (!nodes.size()) {
01273 PROF_PTR_RIPPLE_BAL_END
01274 }
01275
01276 unsigned int dim = nodes[0].getDim();
01277 unsigned int maxDepth = nodes[0].getMaxDepth();
01278 TreeNode root(dim,maxDepth);
01279
01280 unsigned int maxLev = 1;
01281 for (unsigned int i = 0; i < nodes.size(); i++) {
01282 if (nodes[i].getLevel() > maxLev) {
01283 maxLev = nodes[i].getLevel();
01284 }
01285 }
01286
01287 TreeNodePointer ptrOctree;
01288
01289 convertLinearToPointer(nodes, ptrOctree);
01290
01291 for(unsigned int lev = maxLev; lev > 2; lev--) {
01292
01293
01294
01295 std::vector<ot::TreeNode> wList;
01296
01297 appendOctantsAtLevel(ptrOctree, wList, lev);
01298
01299 for(unsigned int i = 0; i < wList.size(); i++) {
01300 std::vector<TreeNode> tList = wList[i].getSearchKeys(incCorners);
01301 for(int j = 0; j < tList.size(); j++) {
01302 TreeNodePointer* searchResult = NULL;
01303 findOctantOrFinestAncestor(ptrOctree, tList[j], searchResult);
01304 #ifdef __DEBUG_OCT__
01305 assert(searchResult);
01306 assert( ((searchResult->m_tnMe).isAncestor(tList[j]))
01307 || ((searchResult->m_tnMe) == tList[j]) );
01308 assert( (searchResult->m_tnpMyChildren) == NULL );
01309 #endif
01310
01311 if( ((searchResult->m_tnMe).getLevel()) < (lev - 1) ) {
01312 addOctantToTreeNodePointer((*searchResult),
01313 (tList[j].getAncestor((lev - 1))));
01314 }
01315 }
01316 tList.clear();
01317 }
01318
01319 wList.clear();
01320
01321 }
01322
01323 std::vector<ot::TreeNode> linOct;
01324
01325 convertPointerToLinear(linOct, ptrOctree);
01326
01327
01328
01329
01330 std::vector<ot::TreeNode> finalOct;
01331 unsigned int lCtr = 0;
01332 for(unsigned int i = 0; i < nodes.size(); i++) {
01333 while( (lCtr < linOct.size()) && (linOct[lCtr] < nodes[i]) ) {
01334 lCtr++;
01335 }
01336 if(lCtr < linOct.size()) {
01337 if(linOct[lCtr] == nodes[i]) {
01338 finalOct.push_back(nodes[i]);
01339 } else {
01340 #ifdef __DEBUG_OCT__
01341 assert(nodes[i].isAncestor(linOct[lCtr]));
01342 #endif
01343 while( (lCtr < linOct.size()) &&
01344 (nodes[i].isAncestor(linOct[lCtr])) ) {
01345 finalOct.push_back(linOct[lCtr]);
01346 lCtr++;
01347 }
01348 }
01349 } else {
01350
01351 assert(false);
01352 }
01353 }
01354
01355 nodes = finalOct;
01356
01357 finalOct.clear();
01358 linOct.clear();
01359
01360 deleteTreeNodePointer(ptrOctree);
01361
01362 PROF_PTR_RIPPLE_BAL_END
01363 }
01364
01365 int parallelRippleType3(std::vector<TreeNode> & nodes,
01366 bool incCorners, bool checkBailOut, bool rePart,
01367 unsigned int dim, unsigned int maxDepth, MPI_Comm comm)
01368 {
01369 PROF_PAR_RIPPLE_TYPE3_BEGIN
01370
01371 TreeNode root(dim,maxDepth);
01372
01373 unsigned int maxLev = 1;
01374 for(unsigned int i=0; i<nodes.size(); i++) {
01375 if (nodes[i].getLevel() > maxLev) {
01376 maxLev = nodes[i].getLevel();
01377 }
01378 }
01379
01380 int rank,npes;
01381 MPI_Comm_rank(comm,&rank);
01382 MPI_Comm_size(comm,&npes);
01383
01384 #ifdef __DEBUG_OCT__
01385 MPI_Barrier(comm);
01386 if(!rank) {
01387 std::cout<<"Computed local maxLev."<<std::endl;
01388 }
01389 MPI_Barrier(comm);
01390 #endif
01391
01392 unsigned int globalMaxLev = maxLev;
01393 par::Mpi_Allreduce<unsigned int>(&maxLev, &globalMaxLev, 1, MPI_MAX, comm);
01394
01395 #ifdef __DEBUG_OCT__
01396 MPI_Barrier(comm);
01397 if(!rank) {
01398 std::cout<<"Global maxLev: "<<globalMaxLev<<std::endl;
01399 }
01400 MPI_Barrier(comm);
01401 #endif
01402
01403 for (unsigned int lev = globalMaxLev; lev > 2; lev--)
01404 {
01405 #ifdef __DEBUG_OCT__
01406 MPI_Barrier(comm);
01407 if(!rank) {
01408 std::cout<<"Lev: "<<lev<<std::endl;
01409 }
01410 MPI_Barrier(comm);
01411 #endif
01412 if(checkBailOut) {
01413 unsigned int minLev = maxDepth;
01414 for (unsigned int i=0; i<nodes.size(); i++) {
01415 if (nodes[i].getLevel() < minLev) {
01416 minLev = nodes[i].getLevel();
01417 }
01418 }
01419
01420 unsigned int globalMinLev = minLev;
01421 par::Mpi_Allreduce<unsigned int>(&minLev, &globalMinLev, 1, MPI_MIN, comm);
01422
01423 if (globalMinLev >= (lev-1)) {
01424
01425
01426 break;
01427 }
01428 }
01429
01430 #ifdef __DEBUG_OCT__
01431 MPI_Barrier(comm);
01432 if(!rank) {
01433 std::cout<<"Passed Step 1."<<std::endl;
01434 }
01435 MPI_Barrier(comm);
01436 #endif
01437
01438 std::vector<TreeNode> wList;
01439 unsigned int wLen = 0;
01440 wList.resize(nodes.size());
01441
01442 for (int i=0; i<nodes.size(); i++) {
01443 if (nodes[i].getLevel() == lev) {
01444 wList[wLen] = nodes[i];
01445 wLen++;
01446 }
01447 }
01448 wList.resize(wLen);
01449
01450 #ifdef __DEBUG_OCT__
01451 MPI_Barrier(comm);
01452 if(!rank) {
01453 std::cout<<"Passed Step 2."<<std::endl;
01454 }
01455 MPI_Barrier(comm);
01456 #endif
01457
01458 std::vector<std::vector<TreeNode> >tList(wLen);
01459 for (unsigned int i=0; i < wLen; i++) {
01460 tList[i] = wList[i].getSearchKeys(incCorners);
01461 }
01462
01463 wList.clear();
01464
01465 #ifdef __DEBUG_OCT__
01466 MPI_Barrier(comm);
01467 if(!rank) {
01468 std::cout<<"Passed Step 3."<<std::endl;
01469 }
01470 MPI_Barrier(comm);
01471 #endif
01472
01473 std::vector<TreeNode> allKeys;
01474 for (unsigned int i=0; i < wLen; i++) {
01475 for(unsigned int j=0; j < tList[i].size(); j++) {
01476 if(tList[i][j] > root) {
01477 allKeys.push_back(tList[i][j]);
01478 }
01479 }
01480 }
01481 tList.clear();
01482
01483 #ifdef __DEBUG_OCT__
01484 MPI_Barrier(comm);
01485 if(!rank) {
01486 std::cout<<"Passed Step 4."<<std::endl;
01487 }
01488 std::cout<<rank<<": allKeys.size(): "
01489 <<allKeys.size()<<std::endl;
01490 MPI_Barrier(comm);
01491 #endif
01492
01493
01494
01495
01496
01497
01498
01499
01500
01501
01502
01503
01504
01505
01506
01507
01508
01509 seq::makeVectorUnique<TreeNode>(allKeys,false);
01510
01511 unsigned int keyLen = allKeys.size();
01512
01513
01514
01515
01516
01517 std::vector<ot::TreeNode> mins (npes);
01518
01519 ot::TreeNode sendMin;
01520 if(!nodes.empty()) {
01521 sendMin = nodes[0];
01522 }else {
01523 sendMin = root;
01524 }
01525
01526 par::Mpi_Allgather<ot::TreeNode>(&sendMin, &(*mins.begin()), 1, comm);
01527
01528 #ifdef __DEBUG_OCT__
01529 MPI_Barrier(comm);
01530 if(!rank) {
01531 std::cout<<"Passed Step 5."<<std::endl;
01532 }
01533 MPI_Barrier(comm);
01534 #endif
01535
01536
01537
01538 unsigned int *partKeys = NULL;
01539
01540 if(keyLen) {
01541 partKeys = new unsigned int[keyLen];
01542 }
01543
01544 for (unsigned int i=0; i<keyLen; i++) {
01545 unsigned int idx;
01546
01547
01548
01549 bool found = seq::maxLowerBound<TreeNode >(mins,
01550 allKeys[i], idx, NULL, NULL);
01551 if (!found ) {
01552
01553 partKeys[i] = rank;
01554 } else {
01555 partKeys[i] = idx;
01556 }
01557 }
01558
01559 mins.clear();
01560
01561 #ifdef __DEBUG_OCT__
01562 MPI_Barrier(comm);
01563 if(!rank) {
01564 std::cout<<"Passed Step 6."<<std::endl;
01565 }
01566 MPI_Barrier(comm);
01567 #endif
01568
01569 int *numKeysSend = new int[npes];
01570 int *numKeysRecv = new int[npes];
01571
01572 for (int i=0; i<npes; i++) {
01573 numKeysSend[i] = 0;
01574 }
01575
01576
01577
01578 std::vector<std::vector<unsigned int> >sendKtmp(npes);
01579
01580 for (unsigned int i=0; i<keyLen; i++) {
01581 numKeysSend[partKeys[i]]++;
01582 sendKtmp[partKeys[i]].push_back(i);
01583 }
01584
01585
01586
01587 par::Mpi_Alltoall<int>(numKeysSend, numKeysRecv, 1, comm);
01588
01589
01590
01591 int *sendOffsets = new int[npes];
01592 sendOffsets[0] = 0;
01593 int *recvOffsets = new int[npes];
01594 recvOffsets[0] = 0;
01595
01596
01597
01598 for (int i = 1; i < npes; i++) {
01599 sendOffsets[i] = sendOffsets[i-1] +
01600 numKeysSend[i-1];
01601 recvOffsets[i] = recvOffsets[i-1] +
01602 numKeysRecv[i-1];
01603 }
01604
01605 std::vector<ot::TreeNode> sendK(sendOffsets[npes-1] + numKeysSend[npes-1]);
01606
01607 for(unsigned int i = 0; i < npes; i++) {
01608 #ifdef __DEBUG_OCT__
01609 assert(sendKtmp[i].size() == numKeysSend[i]);
01610 #endif
01611 for(unsigned int j = 0; j < numKeysSend[i]; j++) {
01612 sendK[sendOffsets[i] + j] = allKeys[sendKtmp[i][j]];
01613 }
01614 }
01615
01616 if(partKeys) {
01617 delete [] partKeys;
01618 partKeys = NULL;
01619 }
01620
01621 allKeys.clear();
01622 sendKtmp.clear();
01623
01624 std::vector<ot::TreeNode> recvK(recvOffsets[npes-1] + numKeysRecv[npes-1]);
01625
01626 ot::TreeNode* sendKptr = NULL;
01627 ot::TreeNode* recvKptr = NULL;
01628 if(!sendK.empty()) {
01629 sendKptr = &(*(sendK.begin()));
01630 }
01631 if(!recvK.empty()) {
01632 recvKptr = &(*(recvK.begin()));
01633 }
01634 par::Mpi_Alltoallv_sparse<ot::TreeNode>( sendKptr, numKeysSend, sendOffsets,
01635 recvKptr, numKeysRecv, recvOffsets, comm);
01636
01637
01638 sendK.clear();
01639
01640 delete [] sendOffsets;
01641 sendOffsets = NULL;
01642
01643 delete [] recvOffsets;
01644 recvOffsets = NULL;
01645
01646 delete [] numKeysSend;
01647 numKeysSend = NULL;
01648
01649 delete [] numKeysRecv;
01650 numKeysRecv = NULL;
01651
01652 seq::makeVectorUnique<TreeNode>(recvK,false);
01653
01654 keyLen = recvK.size();
01655
01656 #ifdef __DEBUG_OCT__
01657 MPI_Barrier(comm);
01658 if(!rank) {
01659 std::cout<<"Passed Step 7."<<std::endl;
01660 }
01661 MPI_Barrier(comm);
01662 #endif
01663
01664
01665
01666 std::vector<std::vector<TreeNode> > seedList(nodes.size());
01667 unsigned int lastIdx = 0;
01668 for(unsigned int i=0; i < keyLen; i++) {
01669 unsigned int idx;
01670
01671 bool flag = seq::maxLowerBound<TreeNode>(nodes, recvK[i], idx, &lastIdx,NULL);
01672 if (flag) {
01673 lastIdx = idx;
01674 #ifdef __DEBUG_OCT__
01675 assert(areComparable(nodes[idx], recvK[i]));
01676 #endif
01677 if (nodes[idx].isAncestor(recvK[i])) {
01678 if (lev > (nodes[idx].getLevel() + 1)) {
01679 seedList[idx].push_back(recvK[i].getAncestor(lev-1));
01680 }
01681 }
01682 }
01683 }
01684
01685 recvK.clear();
01686
01687 #ifdef __DEBUG_OCT__
01688 MPI_Barrier(comm);
01689 if(!rank) {
01690 std::cout<<"Passed Step 8."<<std::endl;
01691 }
01692 MPI_Barrier(comm);
01693 #endif
01694
01695
01696
01697
01698
01699
01700 std::vector<TreeNode> tmpList;
01701 std::vector<std::vector<TreeNode> >allInternalLeaves(nodes.size());
01702 unsigned int tmpSz = 0;
01703 for (unsigned int i=0;i<nodes.size();i++) {
01704 seq::makeVectorUnique<TreeNode>(seedList[i],true);
01705 if (!seedList[i].empty()) {
01706 nodes[i].completeSubtree(seedList[i], allInternalLeaves[i]);
01707 }
01708 tmpSz += allInternalLeaves[i].size();
01709 if (allInternalLeaves[i].empty()) {
01710 tmpSz++;
01711 }
01712 seedList[i].clear();
01713 }
01714
01715 #ifdef __DEBUG_OCT__
01716 MPI_Barrier(comm);
01717 if(!rank) {
01718 std::cout<<"Passed Step 9."<<std::endl;
01719 }
01720 MPI_Barrier(comm);
01721 #endif
01722
01723 seedList.clear();
01724 tmpList.resize(tmpSz);
01725 tmpSz=0;
01726 for (unsigned int i=0;i<nodes.size();i++) {
01727 if (!allInternalLeaves[i].empty()) {
01728 for (int k=0;k<allInternalLeaves[i].size();k++) {
01729 tmpList[tmpSz++] = allInternalLeaves[i][k];
01730 }
01731 allInternalLeaves[i].clear();
01732 } else {
01733 tmpList[tmpSz++] = nodes[i];
01734 }
01735 }
01736 allInternalLeaves.clear();
01737 nodes = tmpList;
01738 tmpList.clear();
01739
01740 #ifdef __DEBUG_OCT__
01741 MPI_Barrier(comm);
01742 if(!rank) {
01743 std::cout<<"Passed Step 10."<<std::endl;
01744 }
01745 MPI_Barrier(comm);
01746 #endif
01747
01748 if(rePart) {
01749 par::partitionW<ot::TreeNode>(nodes, NULL,comm);
01750 }
01751
01752 #ifdef __DEBUG_OCT__
01753 MPI_Barrier(comm);
01754 if(!rank) {
01755 std::cout<<"Passed Step 11."<<std::endl;
01756 }
01757 MPI_Barrier(comm);
01758 #endif
01759
01760 }
01761
01762 PROF_PAR_RIPPLE_TYPE3_END
01763 }
01764
01765
01766 int parallelRippleType2(std::vector<TreeNode> & nodes,
01767 bool incCorners, bool checkBailOut, bool rePart,
01768 unsigned int dim, unsigned int maxDepth, MPI_Comm comm)
01769 {
01770 PROF_PAR_RIPPLE_TYPE2_BEGIN
01771
01772 TreeNode root(dim,maxDepth);
01773
01774 unsigned int maxLev = 1;
01775 for(unsigned int i=0; i<nodes.size(); i++) {
01776 if (nodes[i].getLevel() > maxLev) {
01777 maxLev = nodes[i].getLevel();
01778 }
01779 }
01780
01781 int rank,npes;
01782 MPI_Comm_rank(comm,&rank);
01783 MPI_Comm_size(comm,&npes);
01784
01785 #ifdef __DEBUG_OCT__
01786 MPI_Barrier(comm);
01787 if(!rank) {
01788 std::cout<<"Computed local maxLev."<<std::endl;
01789 }
01790 MPI_Barrier(comm);
01791 #endif
01792
01793 unsigned int globalMaxLev = maxLev;
01794 par::Mpi_Allreduce<unsigned int>(&maxLev, &globalMaxLev, 1, MPI_MAX, comm);
01795
01796 #ifdef __DEBUG_OCT__
01797 MPI_Barrier(comm);
01798 if(!rank) {
01799 std::cout<<"Global maxLev: "<<globalMaxLev<<std::endl;
01800 }
01801 MPI_Barrier(comm);
01802 #endif
01803
01804 for (unsigned int lev = globalMaxLev; lev > 2; lev--)
01805 {
01806 #ifdef __DEBUG_OCT__
01807 MPI_Barrier(comm);
01808 if(!rank) {
01809 std::cout<<"Lev: "<<lev<<std::endl;
01810 }
01811 MPI_Barrier(comm);
01812 #endif
01813 if(checkBailOut) {
01814 unsigned int minLev = maxDepth;
01815 for (unsigned int i=0; i<nodes.size(); i++) {
01816 if (nodes[i].getLevel() < minLev) {
01817 minLev = nodes[i].getLevel();
01818 }
01819 }
01820
01821 unsigned int globalMinLev = minLev;
01822 par::Mpi_Allreduce<unsigned int>(&minLev, &globalMinLev, 1, MPI_MIN, comm);
01823
01824 if (globalMinLev >= (lev-1)) {
01825
01826
01827 break;
01828 }
01829 }
01830
01831 #ifdef __DEBUG_OCT__
01832 MPI_Barrier(comm);
01833 if(!rank) {
01834 std::cout<<"Passed Step 1."<<std::endl;
01835 }
01836 MPI_Barrier(comm);
01837 #endif
01838
01839 std::vector<TreeNode> wList;
01840 unsigned int wLen = 0;
01841 wList.resize(nodes.size());
01842
01843 for (int i=0; i<nodes.size(); i++) {
01844 if (nodes[i].getLevel() == lev) {
01845 wList[wLen] = nodes[i];
01846 wLen++;
01847 }
01848 }
01849 wList.resize(wLen);
01850
01851 #ifdef __DEBUG_OCT__
01852 MPI_Barrier(comm);
01853 if(!rank) {
01854 std::cout<<"Passed Step 2."<<std::endl;
01855 }
01856 MPI_Barrier(comm);
01857 #endif
01858
01859 std::vector<std::vector<TreeNode> >tList(wLen);
01860 for (unsigned int i=0; i < wLen; i++) {
01861 tList[i] = wList[i].getSearchKeys(incCorners);
01862 }
01863
01864 wList.clear();
01865
01866 #ifdef __DEBUG_OCT__
01867 MPI_Barrier(comm);
01868 if(!rank) {
01869 std::cout<<"Passed Step 3."<<std::endl;
01870 }
01871 MPI_Barrier(comm);
01872 #endif
01873
01874 std::vector<TreeNode> allKeys;
01875 for (unsigned int i=0; i < wLen; i++) {
01876 for(unsigned int j=0; j < tList[i].size(); j++) {
01877 if(tList[i][j] > root) {
01878 allKeys.push_back(tList[i][j]);
01879 }
01880 }
01881 }
01882 tList.clear();
01883
01884 #ifdef __DEBUG_OCT__
01885 MPI_Barrier(comm);
01886 if(!rank) {
01887 std::cout<<"Passed Step 4."<<std::endl;
01888 }
01889 std::cout<<rank<<": allKeys.size(): "
01890 <<allKeys.size()<<std::endl;
01891 MPI_Barrier(comm);
01892 #endif
01893
01894
01895
01896
01897
01898
01899
01900
01901
01902
01903
01904
01905
01906
01907
01908
01909
01910 unsigned int keyLen = allKeys.size();
01911
01912
01913
01914
01915
01916 std::vector<ot::TreeNode> mins (npes);
01917
01918 ot::TreeNode sendMin;
01919 if(!nodes.empty()) {
01920 sendMin = nodes[0];
01921 }else {
01922 sendMin = root;
01923 }
01924
01925 par::Mpi_Allgather<ot::TreeNode>(&sendMin, &(*mins.begin()), 1, comm);
01926
01927 #ifdef __DEBUG_OCT__
01928 MPI_Barrier(comm);
01929 if(!rank) {
01930 std::cout<<"Passed Step 5."<<std::endl;
01931 }
01932 MPI_Barrier(comm);
01933 #endif
01934
01935
01936
01937 unsigned int *partKeys = NULL;
01938
01939 if(keyLen) {
01940 partKeys = new unsigned int[keyLen];
01941 }
01942
01943 for (unsigned int i=0; i<keyLen; i++) {
01944 unsigned int idx;
01945
01946
01947
01948 bool found = seq::maxLowerBound<TreeNode >(mins,
01949 allKeys[i], idx, NULL, NULL);
01950 if (!found ) {
01951
01952 partKeys[i] = rank;
01953 } else {
01954 partKeys[i] = idx;
01955 }
01956 }
01957
01958 mins.clear();
01959
01960 #ifdef __DEBUG_OCT__
01961 MPI_Barrier(comm);
01962 if(!rank) {
01963 std::cout<<"Passed Step 6."<<std::endl;
01964 }
01965 MPI_Barrier(comm);
01966 #endif
01967
01968 int *numKeysSend = new int[npes];
01969 int *numKeysRecv = new int[npes];
01970
01971 for (int i=0; i<npes; i++) {
01972 numKeysSend[i] = 0;
01973 }
01974
01975
01976
01977 std::vector<std::vector<unsigned int> >sendKtmp(npes);
01978
01979 for (unsigned int i=0; i<keyLen; i++) {
01980 numKeysSend[partKeys[i]]++;
01981 sendKtmp[partKeys[i]].push_back(i);
01982 }
01983
01984
01985
01986 par::Mpi_Alltoall<int>(numKeysSend, numKeysRecv, 1, comm);
01987
01988
01989
01990 int *sendOffsets = new int[npes];
01991 sendOffsets[0] = 0;
01992 int *recvOffsets = new int[npes];
01993 recvOffsets[0] = 0;
01994
01995
01996
01997 for (int i = 1; i < npes; i++) {
01998 sendOffsets[i] = sendOffsets[i-1] +
01999 numKeysSend[i-1];
02000 recvOffsets[i] = recvOffsets[i-1] +
02001 numKeysRecv[i-1];
02002 }
02003
02004 std::vector<ot::TreeNode> sendK(sendOffsets[npes-1] + numKeysSend[npes-1]);
02005
02006 for(unsigned int i = 0; i < npes; i++) {
02007 #ifdef __DEBUG_OCT__
02008 assert(sendKtmp[i].size() == numKeysSend[i]);
02009 #endif
02010 for(unsigned int j = 0; j < numKeysSend[i]; j++) {
02011 sendK[sendOffsets[i] + j] = allKeys[sendKtmp[i][j]];
02012 }
02013 }
02014
02015 if(partKeys) {
02016 delete [] partKeys;
02017 partKeys = NULL;
02018 }
02019
02020 allKeys.clear();
02021 sendKtmp.clear();
02022
02023 std::vector<ot::TreeNode> recvK(recvOffsets[npes-1] + numKeysRecv[npes-1]);
02024
02025 ot::TreeNode* sendKptr = NULL;
02026 ot::TreeNode* recvKptr = NULL;
02027 if(!sendK.empty()) {
02028 sendKptr = &(*(sendK.begin()));
02029 }
02030 if(!recvK.empty()) {
02031 recvKptr = &(*(recvK.begin()));
02032 }
02033 par::Mpi_Alltoallv_sparse<ot::TreeNode>(sendKptr, numKeysSend, sendOffsets,
02034 recvKptr, numKeysRecv,recvOffsets, comm);
02035
02036 sendK.clear();
02037
02038 delete [] sendOffsets;
02039 sendOffsets = NULL;
02040
02041 delete [] recvOffsets;
02042 recvOffsets = NULL;
02043
02044 delete [] numKeysSend;
02045 numKeysSend = NULL;
02046
02047 delete [] numKeysRecv;
02048 numKeysRecv = NULL;
02049
02050 keyLen = recvK.size();
02051
02052 #ifdef __DEBUG_OCT__
02053 MPI_Barrier(comm);
02054 if(!rank) {
02055 std::cout<<"Passed Step 7."<<std::endl;
02056 }
02057 MPI_Barrier(comm);
02058 #endif
02059
02060
02061
02062 std::vector<std::vector<TreeNode> > seedList(nodes.size());
02063 for(unsigned int i=0; i < keyLen; i++) {
02064 unsigned int idx;
02065
02066 bool flag = seq::maxLowerBound<TreeNode>(nodes, recvK[i], idx,NULL,NULL);
02067 if (flag) {
02068 #ifdef __DEBUG_OCT__
02069 assert(areComparable(nodes[idx], recvK[i]));
02070 #endif
02071 if (nodes[idx].isAncestor(recvK[i])) {
02072 if (lev > (nodes[idx].getLevel() + 1)) {
02073 seedList[idx].push_back(recvK[i].getAncestor(lev-1));
02074 }
02075 }
02076 }
02077 }
02078
02079 recvK.clear();
02080
02081 #ifdef __DEBUG_OCT__
02082 MPI_Barrier(comm);
02083 if(!rank) {
02084 std::cout<<"Passed Step 8."<<std::endl;
02085 }
02086 MPI_Barrier(comm);
02087 #endif
02088
02089
02090
02091
02092
02093
02094
02095 std::vector<TreeNode> tmpList;
02096 std::vector<std::vector<TreeNode> >allInternalLeaves(nodes.size());
02097 unsigned int tmpSz = 0;
02098 for (unsigned int i=0;i<nodes.size();i++) {
02099 seq::makeVectorUnique<TreeNode>(seedList[i],false);
02100 if (!seedList[i].empty()) {
02101 nodes[i].completeSubtree(seedList[i], allInternalLeaves[i]);
02102 }
02103 tmpSz += allInternalLeaves[i].size();
02104 if (allInternalLeaves[i].empty()) {
02105 tmpSz++;
02106 }
02107 seedList[i].clear();
02108 }
02109
02110 #ifdef __DEBUG_OCT__
02111 MPI_Barrier(comm);
02112 if(!rank) {
02113 std::cout<<"Passed Step 9."<<std::endl;
02114 }
02115 MPI_Barrier(comm);
02116 #endif
02117
02118 seedList.clear();
02119 tmpList.resize(tmpSz);
02120 tmpSz=0;
02121 for (unsigned int i=0;i<nodes.size();i++) {
02122 if (!allInternalLeaves[i].empty()) {
02123 for (int k=0;k<allInternalLeaves[i].size();k++) {
02124 tmpList[tmpSz++] = allInternalLeaves[i][k];
02125 }
02126 allInternalLeaves[i].clear();
02127 } else {
02128 tmpList[tmpSz++] = nodes[i];
02129 }
02130 }
02131 allInternalLeaves.clear();
02132 nodes = tmpList;
02133 tmpList.clear();
02134
02135 #ifdef __DEBUG_OCT__
02136 MPI_Barrier(comm);
02137 if(!rank) {
02138 std::cout<<"Passed Step 10."<<std::endl;
02139 }
02140 MPI_Barrier(comm);
02141 #endif
02142
02143 if(rePart) {
02144 par::partitionW<ot::TreeNode>(nodes, NULL,comm);
02145 }
02146
02147 #ifdef __DEBUG_OCT__
02148 MPI_Barrier(comm);
02149 if(!rank) {
02150 std::cout<<"Passed Step 11."<<std::endl;
02151 }
02152 MPI_Barrier(comm);
02153 #endif
02154
02155 }
02156
02157 PROF_PAR_RIPPLE_TYPE2_END
02158 }
02159
02160 int parallelRippleType1(std::vector<TreeNode> & nodes,
02161 bool incCorners, bool checkBailOut, bool rePart,
02162 unsigned int dim, unsigned int maxDepth, MPI_Comm comm)
02163 {
02164 PROF_PAR_RIPPLE_TYPE1_BEGIN
02165
02166 TreeNode root(dim,maxDepth);
02167
02168 unsigned int maxLev = 1;
02169 for(unsigned int i=0; i<nodes.size(); i++) {
02170 if (nodes[i].getLevel() > maxLev) {
02171 maxLev = nodes[i].getLevel();
02172 }
02173 }
02174
02175 int rank,npes;
02176 MPI_Comm_rank(comm,&rank);
02177 MPI_Comm_size(comm,&npes);
02178
02179 #ifdef __DEBUG_OCT__
02180 MPI_Barrier(comm);
02181 if(!rank) {
02182 std::cout<<"Computed local maxLev."<<std::endl;
02183 }
02184 MPI_Barrier(comm);
02185 #endif
02186
02187 unsigned int globalMaxLev = maxLev;
02188 par::Mpi_Allreduce<unsigned int>(&maxLev,&globalMaxLev, 1, MPI_MAX, comm);
02189
02190 #ifdef __DEBUG_OCT__
02191 MPI_Barrier(comm);
02192 if(!rank) {
02193 std::cout<<"Global maxLev: "<<globalMaxLev<<std::endl;
02194 }
02195 MPI_Barrier(comm);
02196 #endif
02197
02198 for (unsigned int lev = globalMaxLev; lev > 2; lev--)
02199 {
02200 #ifdef __DEBUG_OCT__
02201 MPI_Barrier(comm);
02202 if(!rank) {
02203 std::cout<<"Lev: "<<lev<<std::endl;
02204 }
02205 MPI_Barrier(comm);
02206 #endif
02207 if(checkBailOut) {
02208 unsigned int minLev = maxDepth;
02209 for (unsigned int i=0; i<nodes.size(); i++) {
02210 if (nodes[i].getLevel() < minLev) {
02211 minLev = nodes[i].getLevel();
02212 }
02213 }
02214
02215 unsigned int globalMinLev = minLev;
02216 par::Mpi_Allreduce<unsigned int>(&minLev, &globalMinLev, 1, MPI_MIN, comm);
02217
02218 if (globalMinLev >= (lev-1)) {
02219
02220
02221 break;
02222 }
02223 }
02224
02225 #ifdef __DEBUG_OCT__
02226 MPI_Barrier(comm);
02227 if(!rank) {
02228 std::cout<<"Passed Step 1."<<std::endl;
02229 }
02230 MPI_Barrier(comm);
02231 #endif
02232
02233 std::vector<TreeNode> wList;
02234 unsigned int wLen = 0;
02235 wList.resize(nodes.size());
02236
02237 for (int i=0; i<nodes.size(); i++) {
02238 if (nodes[i].getLevel() == lev) {
02239 wList[wLen] = nodes[i];
02240 wLen++;
02241 }
02242 }
02243 wList.resize(wLen);
02244
02245 #ifdef __DEBUG_OCT__
02246 MPI_Barrier(comm);
02247 if(!rank) {
02248 std::cout<<"Passed Step 2."<<std::endl;
02249 }
02250 MPI_Barrier(comm);
02251 #endif
02252
02253 std::vector<std::vector<TreeNode> >tList(wLen);
02254 for (unsigned int i=0; i < wLen; i++) {
02255 tList[i] = wList[i].getSearchKeys(incCorners);
02256 }
02257
02258 wList.clear();
02259
02260 #ifdef __DEBUG_OCT__
02261 MPI_Barrier(comm);
02262 if(!rank) {
02263 std::cout<<"Passed Step 3."<<std::endl;
02264 }
02265 MPI_Barrier(comm);
02266 #endif
02267
02268 std::vector<TreeNode> allKeys;
02269 for (unsigned int i=0; i < wLen; i++) {
02270 for(unsigned int j=0; j < tList[i].size(); j++) {
02271 if(tList[i][j] > root) {
02272 allKeys.push_back(tList[i][j]);
02273 }
02274 }
02275 }
02276 tList.clear();
02277
02278 #ifdef __DEBUG_OCT__
02279 MPI_Barrier(comm);
02280 if(!rank) {
02281 std::cout<<"Passed Step 4."<<std::endl;
02282 }
02283 std::cout<<rank<<": allKeys.size(): "
02284 <<allKeys.size()<<std::endl;
02285 MPI_Barrier(comm);
02286 #endif
02287
02288
02289
02290
02291
02292
02293
02294
02295
02296
02297
02298
02299
02300
02301
02302
02303
02304
02305
02306 par::removeDuplicates<ot::TreeNode>(allKeys,
02307 false,comm);
02308 unsigned int keyLen = allKeys.size();
02309
02310 #ifdef __DEBUG_OCT__
02311 MPI_Barrier(comm);
02312 if(!rank) {
02313 std::cout<<"Passed Step 5."<<std::endl;
02314 }
02315 MPI_Barrier(comm);
02316 #endif
02317
02318
02319
02320
02321 std::vector<ot::TreeNode> mins (npes);
02322
02323 ot::TreeNode sendMin;
02324 if(!nodes.empty()) {
02325 sendMin = nodes[0];
02326 }else {
02327 sendMin = root;
02328 }
02329
02330 par::Mpi_Allgather<ot::TreeNode>(&sendMin, &(*mins.begin()), 1, comm);
02331
02332 #ifdef __DEBUG_OCT__
02333 MPI_Barrier(comm);
02334 if(!rank) {
02335 std::cout<<"Passed Step 6."<<std::endl;
02336 }
02337 MPI_Barrier(comm);
02338 #endif
02339
02340
02341
02342 unsigned int *partKeys = NULL;
02343
02344 if(keyLen) {
02345 partKeys = new unsigned int[keyLen];
02346 }
02347
02348 for (unsigned int i=0; i<keyLen; i++) {
02349 unsigned int idx;
02350
02351
02352
02353 bool found = seq::maxLowerBound<TreeNode >(mins,
02354 allKeys[i], idx, NULL, NULL);
02355 if (!found ) {
02356
02357 partKeys[i] = rank;
02358 } else {
02359 partKeys[i] = idx;
02360 }
02361 }
02362
02363 mins.clear();
02364
02365 #ifdef __DEBUG_OCT__
02366 MPI_Barrier(comm);
02367 if(!rank) {
02368 std::cout<<"Passed Step 7."<<std::endl;
02369 }
02370 MPI_Barrier(comm);
02371 #endif
02372
02373 int *numKeysSend = new int[npes];
02374 int *numKeysRecv = new int[npes];
02375
02376 for (int i=0; i<npes; i++) {
02377 numKeysSend[i] = 0;
02378 }
02379
02380
02381 for (unsigned int i=0; i<keyLen; i++) {
02382 numKeysSend[partKeys[i]]++;
02383 }
02384
02385
02386
02387 par::Mpi_Alltoall<int>(numKeysSend, numKeysRecv, 1, comm);
02388
02389
02390
02391 int *sendOffsets = new int[npes];
02392 sendOffsets[0] = 0;
02393 int *recvOffsets = new int[npes];
02394 recvOffsets[0] = 0;
02395
02396
02397
02398
02399 for (int i = 1; i < npes; i++) {
02400 sendOffsets[i] = sendOffsets[i-1] +
02401 numKeysSend[i-1];
02402 recvOffsets[i] = recvOffsets[i-1] +
02403 numKeysRecv[i-1];
02404 }
02405
02406
02407
02408
02409
02410
02411 std::vector<ot::TreeNode> recvK(recvOffsets[npes-1]
02412 + numKeysRecv[npes-1]);
02413
02414 if(partKeys) {
02415 delete [] partKeys;
02416 partKeys = NULL;
02417 }
02418
02419 ot::TreeNode* allKeysPtr = NULL;
02420 ot::TreeNode* recvKptr = NULL;
02421 if(!allKeys.empty()) {
02422 allKeysPtr = &(*(allKeys.begin()));
02423 }
02424 if(!recvK.empty()) {
02425 recvKptr = &(*(recvK.begin()));
02426 }
02427 par::Mpi_Alltoallv_sparse<ot::TreeNode>(allKeysPtr, numKeysSend, sendOffsets,
02428 recvKptr, numKeysRecv, recvOffsets, comm);
02429
02430 allKeys.clear();
02431
02432 delete [] sendOffsets;
02433 sendOffsets = NULL;
02434
02435 delete [] recvOffsets;
02436 recvOffsets = NULL;
02437
02438 delete [] numKeysSend;
02439 numKeysSend = NULL;
02440
02441 delete [] numKeysRecv;
02442 numKeysRecv = NULL;
02443
02444 keyLen = recvK.size();
02445
02446 #ifdef __DEBUG_OCT__
02447 MPI_Barrier(comm);
02448 if(!rank) {
02449 std::cout<<"Passed Step 8."<<std::endl;
02450 }
02451 MPI_Barrier(comm);
02452 #endif
02453
02454
02455
02456
02457
02458
02459
02460
02461 std::vector<std::vector<TreeNode> > seedList(nodes.size());
02462 unsigned int lastIdx = 0;
02463 for(unsigned int i=0; i < keyLen; i++) {
02464 unsigned int idx;
02465
02466 bool flag = seq::maxLowerBound<TreeNode>(nodes,
02467 recvK[i], idx,&lastIdx,NULL);
02468 if (flag) {
02469 lastIdx = idx;
02470 #ifdef __DEBUG_OCT__
02471 assert(areComparable(nodes[idx], recvK[i]));
02472 #endif
02473 if (nodes[idx].isAncestor(recvK[i])) {
02474 if (lev > (nodes[idx].getLevel() + 1)) {
02475 seedList[idx].push_back(
02476 recvK[i].getAncestor(lev-1));
02477 }
02478 }
02479 }
02480 }
02481
02482 recvK.clear();
02483
02484 #ifdef __DEBUG_OCT__
02485 MPI_Barrier(comm);
02486 if(!rank) {
02487 std::cout<<"Passed Step 9."<<std::endl;
02488 }
02489 MPI_Barrier(comm);
02490 #endif
02491
02492
02493
02494
02495
02496
02497
02498 std::vector<TreeNode> tmpList;
02499 std::vector<std::vector<TreeNode> >allInternalLeaves(nodes.size());
02500 unsigned int tmpSz = 0;
02501 for (unsigned int i=0;i<nodes.size();i++) {
02502 seq::makeVectorUnique<TreeNode>(seedList[i],true);
02503 if (!seedList[i].empty()) {
02504 nodes[i].completeSubtree(seedList[i], allInternalLeaves[i]);
02505 }
02506 tmpSz += allInternalLeaves[i].size();
02507 if (allInternalLeaves[i].empty()) {
02508 tmpSz++;
02509 }
02510 seedList[i].clear();
02511 }
02512
02513 #ifdef __DEBUG_OCT__
02514 MPI_Barrier(comm);
02515 if(!rank) {
02516 std::cout<<"Passed Step 10."<<std::endl;
02517 }
02518 MPI_Barrier(comm);
02519 #endif
02520
02521 seedList.clear();
02522 tmpList.resize(tmpSz);
02523 tmpSz=0;
02524 for (unsigned int i=0;i<nodes.size();i++) {
02525 if (!allInternalLeaves[i].empty()) {
02526 for (int k=0;k<allInternalLeaves[i].size();k++) {
02527 tmpList[tmpSz++] = allInternalLeaves[i][k];
02528 }
02529 allInternalLeaves[i].clear();
02530 } else {
02531 tmpList[tmpSz++] = nodes[i];
02532 }
02533 }
02534 allInternalLeaves.clear();
02535 nodes = tmpList;
02536 tmpList.clear();
02537
02538 #ifdef __DEBUG_OCT__
02539 MPI_Barrier(comm);
02540 if(!rank) {
02541 std::cout<<"Passed Step 11."<<std::endl;
02542 }
02543 MPI_Barrier(comm);
02544 #endif
02545
02546 if(rePart) {
02547 par::partitionW<ot::TreeNode>(nodes, NULL,comm);
02548 }
02549
02550 #ifdef __DEBUG_OCT__
02551 MPI_Barrier(comm);
02552 if(!rank) {
02553 std::cout<<"Passed Step 12."<<std::endl;
02554 }
02555 MPI_Barrier(comm);
02556 #endif
02557
02558 }
02559
02560 PROF_PAR_RIPPLE_TYPE1_END
02561 }
02562
02563
02564 int selectNeighboringBlocks(const std::vector<TreeNode>& allBlocks,
02565 const std::vector<TreeNode>& blocks, const std::vector<unsigned int>& maxBlockBndVec,
02566 int myRank, std::vector<TreeNode>& myNhBlocks) {
02567 PROF_PICK_NH_BLOCKS_BEGIN
02568
02569 for (int k = 0; k < allBlocks.size(); k++) {
02570 if (allBlocks[k].getWeight() == myRank) {
02571
02572 continue;
02573 }
02574
02575 unsigned int othMinX = allBlocks[k].minX();
02576 unsigned int othMinY = allBlocks[k].minY();
02577 unsigned int othMinZ = allBlocks[k].minZ();
02578 unsigned int othMaxX = allBlocks[k].maxX();
02579 unsigned int othMaxY = allBlocks[k].maxY();
02580 unsigned int othMaxZ = allBlocks[k].maxZ();
02581
02582
02583
02584 for (int j = 0; j < blocks.size(); j++) {
02585 unsigned int myLen = maxBlockBndVec[j];
02586 unsigned int myMinX = blocks[j].minX();
02587 unsigned int myMinY = blocks[j].minY();
02588 unsigned int myMinZ = blocks[j].minZ();
02589 unsigned int myMaxX = blocks[j].maxX();
02590 unsigned int myMaxY = blocks[j].maxY();
02591 unsigned int myMaxZ = blocks[j].maxZ();
02592 unsigned int xlow = ( (myMinX >= myLen) ? (myMinX - myLen) : myMinX );
02593 unsigned int ylow = ( (myMinY >= myLen) ? (myMinY - myLen) : myMinY );
02594 unsigned int zlow = ( (myMinZ >= myLen) ? (myMinZ - myLen) : myMinZ );
02595 unsigned int xhigh = ( myMaxX + myLen );
02596 unsigned int yhigh = ( myMaxY + myLen );
02597 unsigned int zhigh = ( myMaxZ + myLen );
02598
02599 if ( (othMinX < xhigh) && (othMinY < yhigh) && (othMinZ < zhigh)
02600 && (othMaxX > xlow) && (othMaxY > ylow) && (othMaxZ > zlow) ) {
02601 myNhBlocks.push_back(allBlocks[k]);
02602 break;
02603 }
02604 }
02605 }
02606
02607 PROF_PICK_NH_BLOCKS_END
02608 }
02609
02610 int mergeComboBalAndPickBoundary(std::vector<ot::TreeNode>& out,
02611 std::vector<ot::TreeNode>& allBoundaryLeaves,
02612 const ot::TreeNode& firstBlock, const ot::TreeNode& lastBlock) {
02613 PROF_MERGE_COMBO_BAL_BEGIN
02614
02615
02616
02617 std::vector<TreeNode> tmpNodeList(out.size() + allBoundaryLeaves.size());
02618
02619 unsigned int tmpLsz = 0;
02620 unsigned int bndCnt = 0;
02621 unsigned int bndSz = allBoundaryLeaves.size();
02622
02623 for (unsigned int i = 0;i < out.size();i++) {
02624 if ( bndCnt < allBoundaryLeaves.size() ) {
02625 if ( out[i] == allBoundaryLeaves[bndCnt] ) {
02626 tmpNodeList[tmpLsz] = out[i];
02627 tmpLsz++;
02628 bndCnt++;
02629 } else if (out[i] < allBoundaryLeaves[bndCnt] ) {
02630 #ifdef __DEBUG_OCT__
02631 assert(areComparable(out[i], allBoundaryLeaves[bndCnt]));
02632 #endif
02633 if (out[i].isAncestor(allBoundaryLeaves[bndCnt]) ) {
02634 while ( (bndCnt < bndSz) &&
02635 out[i].isAncestor(allBoundaryLeaves[bndCnt]) ) {
02636 tmpNodeList[tmpLsz] = allBoundaryLeaves[bndCnt];
02637 tmpLsz++;
02638 bndCnt++;
02639 #ifdef __DEBUG_OCT__
02640 if(bndCnt < bndSz) {
02641 assert(areComparable(out[i], allBoundaryLeaves[bndCnt]));
02642 }
02643 #endif
02644 }
02645 } else {
02646 tmpNodeList[tmpLsz] = out[i];
02647 tmpLsz++;
02648 }
02649 } else {
02650
02651 tmpNodeList[tmpLsz] = allBoundaryLeaves[bndCnt];
02652 tmpLsz++;
02653 bndCnt++;
02654 }
02655 } else {
02656 tmpNodeList[tmpLsz] = out[i];
02657 tmpLsz++;
02658 }
02659 }
02660
02661 tmpNodeList.resize(tmpLsz);
02662 out = tmpNodeList;
02663
02664 tmpNodeList = allBoundaryLeaves;
02665 pickInterProcessorBoundaryNodes(tmpNodeList, allBoundaryLeaves,
02666 firstBlock, lastBlock);
02667 tmpNodeList.clear();
02668
02669 PROF_MERGE_COMBO_BAL_END
02670 }
02671
02672 int finalMergeInBal(std::vector<ot::TreeNode>& out, std::vector<ot::TreeNode>& allBoundaryLeaves) {
02673 PROF_FINAL_MERGE_IN_BAL_BEGIN
02674
02675 std::vector<ot::TreeNode> tmpNodeList(out.size() + allBoundaryLeaves.size());
02676
02677 unsigned int tmpLsz = 0;
02678 unsigned int bndSz = allBoundaryLeaves.size();
02679 unsigned int bndCnt = 0;
02680 for (unsigned int i = 0; i < out.size(); i++) {
02681 if ( bndCnt < bndSz ) {
02682 if ( out[i] == allBoundaryLeaves[bndCnt] ) {
02683 tmpNodeList[tmpLsz++] = out[i];
02684 bndCnt++;
02685 } else if (out[i] < allBoundaryLeaves[bndCnt] ) {
02686 #ifdef __DEBUG_OCT__
02687 assert(areComparable(out[i], allBoundaryLeaves[bndCnt]));
02688 #endif
02689 if (out[i].isAncestor(allBoundaryLeaves[bndCnt]) ) {
02690 while ( (bndCnt < bndSz ) && out[i].isAncestor(allBoundaryLeaves[bndCnt]) ) {
02691 tmpNodeList[tmpLsz++] = allBoundaryLeaves[bndCnt++];
02692 #ifdef __DEBUG_OCT__
02693 if(bndCnt < bndSz) {
02694 assert(areComparable(out[i], allBoundaryLeaves[bndCnt]));
02695 }
02696 #endif
02697 }
02698 } else {
02699 tmpNodeList[tmpLsz++] = out[i];
02700 }
02701 } else {
02702
02703 tmpNodeList[tmpLsz++] = allBoundaryLeaves[bndCnt++];
02704 }
02705 } else {
02706 tmpNodeList[tmpLsz++] = out[i];
02707 }
02708 }
02709
02710 tmpNodeList.resize(tmpLsz);
02711 out = tmpNodeList;
02712 tmpNodeList.clear();
02713 allBoundaryLeaves.clear();
02714
02715 PROF_FINAL_MERGE_IN_BAL_END
02716 }
02717
02718 int prepareBalComm1MessagesType1(const std::vector<ot::TreeNode>& allBoundaryLeaves,
02719 const std::vector<ot::TreeNode>& myNhBlocks, int npes, unsigned int maxDepth,
02720 std::vector<TreeNode>* sendNodes, std::vector<unsigned int>* sentToPid, int* sendCnt) {
02721 PROF_PREP_BAL_COMM1_MSSG_BEGIN
02722
02723 unsigned int allBndSz = allBoundaryLeaves.size();
02724 for (int j = 0; j < allBndSz; j++) {
02725 unsigned int myLen = (1u << (maxDepth - (allBoundaryLeaves[j].getLevel())));
02726 unsigned int myMinX = allBoundaryLeaves[j].minX();
02727 unsigned int myMinY = allBoundaryLeaves[j].minY();
02728 unsigned int myMinZ = allBoundaryLeaves[j].minZ();
02729 unsigned int xlow = ( (myMinX >= myLen) ? (myMinX - myLen) : myMinX );
02730 unsigned int ylow = ( (myMinY >= myLen) ? (myMinY - myLen) : myMinY );
02731 unsigned int zlow = ( (myMinZ >= myLen) ? (myMinZ - myLen) : myMinZ );
02732 unsigned int xhigh = ( myMinX + (2*myLen) );
02733 unsigned int yhigh = ( myMinY + (2*myLen) );
02734 unsigned int zhigh = ( myMinZ + (2*myLen) );
02735
02736 unsigned int lastP = npes;
02737
02738
02739
02740 for (int k = 0; k < myNhBlocks.size(); k++) {
02741 if (myNhBlocks[k].getWeight() == lastP) {
02742
02743
02744
02745 continue;
02746 }
02747
02748 unsigned int othMinX = myNhBlocks[k].minX();
02749 unsigned int othMinY = myNhBlocks[k].minY();
02750 unsigned int othMinZ = myNhBlocks[k].minZ();
02751 unsigned int othMaxX = myNhBlocks[k].maxX();
02752 unsigned int othMaxY = myNhBlocks[k].maxY();
02753 unsigned int othMaxZ = myNhBlocks[k].maxZ();
02754
02755 if ( (othMinX < xhigh) && (othMinY < yhigh) && (othMinZ < zhigh)
02756 && (othMaxX > xlow) && (othMaxY > ylow) && (othMaxZ > zlow) ) {
02757 sendNodes[myNhBlocks[k].getWeight()].push_back(allBoundaryLeaves[j]);
02758 sentToPid[j].push_back(myNhBlocks[k].getWeight());
02759 sendCnt[myNhBlocks[k].getWeight()]++;
02760 lastP = myNhBlocks[k].getWeight();
02761 }
02762 }
02763 }
02764
02765 PROF_PREP_BAL_COMM1_MSSG_END
02766 }
02767
02768 int prepareWlistInBal(const std::vector<ot::TreeNode>& recvK1,
02769 const int* recvCnt, int npes, const ot::TreeNode& myFirstBlock,
02770 const ot::TreeNode& myLastBlock, std::vector<TreeNode>& wList,
02771 std::vector<std::vector<unsigned int> >& wListRanks) {
02772 PROF_PREP_BAL_WLIST_BEGIN
02773
02774 std::vector<ot::TreeNode> recvKtemp = recvK1;
02775
02776 int counter = 0;
02777 for(int i = 0; i < npes; i++) {
02778 for(int j = counter; j < (counter + recvCnt[i]); j++) {
02779 recvKtemp[j].setWeight(i);
02780 }
02781 counter += recvCnt[i];
02782 }
02783
02784 std::vector<std::vector<ot::TreeNode> > wListTmp(recvKtemp.size());
02785 for (int i = 0; i < recvKtemp.size(); i++) {
02786 std::vector<TreeNode> myNh = recvKtemp[i].getAllNeighbours();
02787 unsigned int myNhSz = myNh.size();
02788 for (int k = 0; k < myNhSz; k++) {
02789
02790
02791
02792
02793
02794
02795
02796
02797
02798
02799
02800
02801 #ifdef __DEBUG_OCT__
02802 assert(areComparable(myNh[k], myFirstBlock));
02803 #endif
02804 if ( (myNh[k].isAncestor(myFirstBlock)) ||
02805 ( (myNh[k] >= myFirstBlock) && (myNh[k] <= myLastBlock.getDLD()) ) ) {
02806 myNh[k].setWeight(recvKtemp[i].getWeight());
02807 wListTmp[i].push_back(myNh[k]);
02808 }
02809 }
02810 myNh.clear();
02811 }
02812
02813 recvKtemp.clear();
02814
02815 for (int i = 0;i < wListTmp.size(); i++) {
02816 wList.insert(wList.end(), wListTmp[i].begin(), wListTmp[i].end());
02817 wListTmp[i].clear();
02818 }
02819 wListTmp.clear();
02820
02821 std::sort(wList.begin(),wList.end());
02822
02823 unsigned int wCtr=0;
02824 unsigned int wListSz = wList.size();
02825 if (wCtr < wListSz) {
02826 std::vector<unsigned int> tmpWr;
02827 tmpWr.push_back(wList[0].getWeight());
02828 wList[0].setWeight(1);
02829 wListRanks.push_back (tmpWr);
02830 tmpWr.clear();
02831 wCtr++;
02832 }
02833
02834 unsigned int rowCtr = 0;
02835 while (wCtr < wListSz) {
02836 if (wList[wCtr] == wList[wCtr-1]) {
02837 wListRanks[rowCtr].push_back(wList[wCtr].getWeight());
02838 wList[wCtr].setWeight(1);
02839 } else {
02840 seq::makeVectorUnique<unsigned int>(wListRanks[rowCtr],false);
02841 std::vector<unsigned int> tmpWr;
02842 tmpWr.push_back(wList[wCtr].getWeight());
02843 wList[wCtr].setWeight(1);
02844 wListRanks.push_back(tmpWr);
02845 tmpWr.clear();
02846 rowCtr++;
02847 }
02848 wCtr++;
02849 }
02850
02851 seq::makeVectorUnique<TreeNode>(wList, true);
02852
02853 PROF_PREP_BAL_WLIST_END
02854 }
02855
02856
02857
02858
02859
02860
02861
02862
02863
02864
02865
02866
02867
02868
02869 int prepareBalComm2Messages(const std::vector<ot::TreeNode>& allBoundaryLeaves,
02870 const std::vector<ot::TreeNode>& wList,
02871 const std::vector<std::vector<unsigned int> >& wListRanks,
02872 std::vector<TreeNode>* sendNodes, std::vector<unsigned int>* sentToPid, int* sendCnt) {
02873 PROF_PREP_BAL_COMM2_MSSG_BEGIN
02874
02875
02876
02877
02878
02879
02880
02881
02882
02883
02884
02885
02886
02887
02888
02889
02890
02891
02892
02893
02894
02895
02896
02897
02898
02899
02900
02901
02902
02903
02904
02905
02906
02907
02908
02909 int lastStart = 0;
02910 unsigned int wListSz = wList.size();
02911 unsigned int allBndSz = allBoundaryLeaves.size();
02912 for (int ii = 0; ii < wListSz; ii++) {
02913 for (int j = lastStart; j < allBndSz; j++) {
02914
02915 #ifdef __DEBUG_OCT__
02916 assert(areComparable(wList[ii], allBoundaryLeaves[j]));
02917 #endif
02918 if ((wList[ii] == allBoundaryLeaves[j]) || (wList[ii].isAncestor(allBoundaryLeaves[j]))
02919 || (allBoundaryLeaves[j].isAncestor(wList[ii]))) {
02920
02921 for (int jj = 0; jj < wListRanks[ii].size(); jj++) {
02922 bool sentAlready = false;
02923 for (int kk = 0; kk < sentToPid[j].size(); kk++) {
02924
02925 if (sentToPid[j][kk] == wListRanks[ii][jj]) {
02926 sentAlready = true;
02927 break;
02928 }
02929 }
02930 if (!sentAlready) {
02931 sendNodes[wListRanks[ii][jj]].push_back(allBoundaryLeaves[j]);
02932 sendCnt[wListRanks[ii][jj]]++;
02933 sentToPid[j].push_back(wListRanks[ii][jj]);
02934 }
02935 }
02936 } else if (wList[ii] < allBoundaryLeaves[j]) {
02937
02938
02939
02940 break;
02941 } else {
02942
02943
02944
02945
02946 lastStart++;
02947 }
02948 }
02949 }
02950
02951 PROF_PREP_BAL_COMM2_MSSG_END
02952 }
02953
02954 int mergeRecvKeysInBal(const std::vector<ot::TreeNode>& recvK1, const int* recvOffsets1,
02955 const std::vector<ot::TreeNode>& recvK2, const int* recvOffsets2,
02956 int npes, std::vector<ot::TreeNode>& recvK) {
02957 PROF_MERGE_RECV_KEYS_BAL_BEGIN
02958
02959 recvK.resize(recvK1.size() + recvK2.size());
02960
02961
02962
02963
02964
02965
02966 unsigned int recvKcnt = 0;
02967 for (int i = 0; i < npes; i++) {
02968 unsigned int nextFrom1 = recvOffsets1[i];
02969 unsigned int nextFrom2 = recvOffsets2[i];
02970 unsigned int end1 = ((i <(npes - 1)) ? recvOffsets1[i+1] : recvK1.size());
02971 unsigned int end2 = ((i <(npes - 1)) ? recvOffsets2[i+1] : recvK2.size());
02972 while ( (nextFrom1 < end1) || (nextFrom2 < end2) ) {
02973
02974 if (nextFrom1 < end1 && nextFrom2 < end2) {
02975 while (recvK1[nextFrom1] <= recvK2[nextFrom2]) {
02976 recvK[recvKcnt++] = recvK1[nextFrom1];
02977 nextFrom1++;
02978 if (nextFrom1 >= end1) {
02979 break;
02980 }
02981 }
02982 }
02983
02984 if (nextFrom1 < end1 && nextFrom2 < end2) {
02985 while (recvK1[nextFrom1] > recvK2[nextFrom2]) {
02986 recvK[recvKcnt++] = recvK2[nextFrom2];
02987 nextFrom2++;
02988 if (nextFrom2 >= end2) {
02989 break;
02990 }
02991 }
02992 }
02993
02994 if (nextFrom2 >= end2) {
02995 while (nextFrom1 < end1) {
02996 recvK[recvKcnt++] = recvK1[nextFrom1];
02997 nextFrom1++;
02998 }
02999 }
03000
03001 if (nextFrom1 >= end1) {
03002 while (nextFrom2 < end2) {
03003 recvK[recvKcnt++] = recvK2[nextFrom2];
03004 nextFrom2++;
03005 }
03006 }
03007
03008 }
03009 }
03010
03011 PROF_MERGE_RECV_KEYS_BAL_END
03012 }
03013
03014 int prepareBalComm1MessagesType2(const std::vector<ot::TreeNode>& allBoundaryLeaves,
03015 const std::vector<ot::TreeNode>& minsAllBlocks, int rank, unsigned int dim,
03016 unsigned int maxDepth, std::vector<TreeNode>* sendNodes,
03017 std::vector<unsigned int>* sentToPid, int* sendCnt) {
03018 PROF_PREP_BAL_COMM1_MSSG_BEGIN
03019
03020
03021
03022
03023
03024
03025 unsigned int allBndSz = allBoundaryLeaves.size();
03026 ot::TreeNode rootNode(dim, maxDepth);
03027 for (int j = 0; j < allBndSz; j++) {
03028 std::vector<ot::TreeNode> myNh = allBoundaryLeaves[j].getAllNeighbours();
03029
03030 seq::makeVectorUnique<ot::TreeNode>(myNh, false);
03031 unsigned int stIdx = ( (myNh[0] == rootNode) ? 1 : 0 );
03032
03033 std::vector<unsigned int> pIds;
03034 for(unsigned int i = stIdx; i < myNh.size(); i++) {
03035 unsigned int idx1;
03036 unsigned int idx2;
03037 seq::maxLowerBound<ot::TreeNode>(minsAllBlocks, myNh[i].getDFD(), idx1, NULL, NULL);
03038 seq::maxLowerBound<ot::TreeNode>(minsAllBlocks, myNh[i].getDLD(), idx2, NULL, NULL);
03039 for(int k = idx1; k <= idx2; k++) {
03040 pIds.push_back(k);
03041 }
03042 }
03043
03044 #ifdef __DEBUG_OCT__
03045
03046
03047
03048
03049
03050 assert(seq::test::isSorted<unsigned int>(pIds));
03051 #endif
03052
03053 seq::makeVectorUnique<unsigned int>(pIds, true);
03054
03055 for(int i = 0; i < pIds.size(); i++) {
03056 if(pIds[i] != rank) {
03057 sendNodes[pIds[i]].push_back(allBoundaryLeaves[j]);
03058 sentToPid[j].push_back(pIds[i]);
03059 sendCnt[pIds[i]]++;
03060 }
03061 }
03062 }
03063
03064 PROF_PREP_BAL_COMM1_MSSG_END
03065 }
03066
03067 }
03068
03069
03070