00001
00009 #include "TreeNode.h"
00010 #include "parUtils.h"
00011
00012 #ifdef __DEBUG__
00013 #ifndef __DEBUG_OCT__
00014 #define __DEBUG_OCT__
00015 #endif
00016 #endif
00017
00018 #ifdef __DEBUG_OCT__
00019 #ifndef __MEASURE_BPART_COMM__
00020 #define __MEASURE_BPART_COMM__
00021 #endif
00022 #endif
00023
00024 namespace ot {
00025
00026
00027
00028 int blockPartStage1_p2o(std::vector<TreeNode> &nodes, std::vector<TreeNode> &blocks,
00029 unsigned int dim, unsigned int maxDepth, MPI_Comm comm) {
00030 #ifdef __PROF_WITH_BARRIER__
00031 MPI_Barrier(comm);
00032 #endif
00033 PROF_BLKPART1_BEGIN
00034 int npes, rank;
00035 MPI_Comm_rank(comm, &rank);
00036 MPI_Comm_size(comm, &npes);
00037
00038 par::partitionW<ot::TreeNode>(nodes, NULL, comm);
00039
00040 assert(nodes.size() > (1 << dim) );
00041
00042
00043 std::vector<TreeNode> localCoarse;
00044
00045
00046
00047 appendCompleteRegion(nodes[0], nodes[nodes.size()-1], localCoarse, true, true);
00048
00049
00050
00051 std::vector<TreeNode> localBlocks;
00052
00053
00054 unsigned int minLevel = maxDepth;
00055 for(int i = 0; i < localCoarse.size(); i++) {
00056 if(localCoarse[i].getLevel() < minLevel) {
00057 minLevel = localCoarse[i].getLevel();
00058 }
00059 }
00060
00061 for(int i = 0; i < localCoarse.size(); i++) {
00062 if(localCoarse[i].getLevel() == minLevel) {
00063 localBlocks.push_back(localCoarse[i]);
00064 }
00065 }
00066
00067 localCoarse.clear();
00068
00069 for (unsigned int i = 0; i < localBlocks.size(); i++) {
00070 localBlocks[i].setWeight(1);
00071 }
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081 completeOctree(localBlocks, blocks, dim, maxDepth, false, true, false, comm);
00082
00083 localBlocks.clear();
00084
00085 PROF_BLKPART1_END
00086 }
00087
00088 int blockPartStage2_p2o(std::vector<TreeNode> &nodes, std::vector<TreeNode> &globalCoarse,
00089 std::vector<ot::TreeNode>& minsAllBlocks, unsigned int dim, unsigned int maxDepth,
00090 MPI_Comm comm) {
00091 #ifdef __PROF_WITH_BARRIER__
00092 MPI_Barrier(comm);
00093 #endif
00094 PROF_BLKPART2_BEGIN
00095
00096 ot::TreeNode rootNode (dim,maxDepth);
00097
00098 int npes, rank;
00099 MPI_Comm_rank(comm,&rank);
00100 MPI_Comm_size(comm,&npes);
00101
00102 int *sendCnt = new int[npes];
00103 int *recvCnt = new int[npes];
00104 int *sendOffsets = new int[npes];
00105 int *recvOffsets = new int[npes];
00106
00107
00108
00109 std::vector<TreeNode> _mins_maxs(2*npes);
00110
00111
00112 ot::TreeNode sendMinMax[2];
00113
00114 if (!nodes.empty()) {
00115 sendMinMax[0] = nodes[0];
00116 sendMinMax[1] = nodes[nodes.size()-1];
00117 } else {
00118 sendMinMax[0] = rootNode;
00119 sendMinMax[1] = rootNode;
00120 }
00121
00122 par::Mpi_Allgather<ot::TreeNode>(sendMinMax, &(*_mins_maxs.begin()), 2, comm);
00123
00124 std::vector<std::vector<TreeNode> > sendNodes(npes);
00125 std::vector<std::vector<unsigned int> > keymap(npes);
00126
00127 for (int i=0; i<npes; i++) {
00128 sendCnt[i] = 0;
00129 }
00130
00131
00132
00133
00134
00135
00136 for (unsigned int i=0; i<globalCoarse.size(); i++) {
00137 for (int p=0;p<npes;p++) {
00138 #ifdef __DEBUG_OCT__
00139 assert(areComparable(globalCoarse[i], _mins_maxs[2*p]));
00140 #endif
00141 if ( (globalCoarse[i].isAncestor(_mins_maxs[2*p])) ||
00142 ( (globalCoarse[i] >= _mins_maxs[2*p]) && (globalCoarse[i] <=_mins_maxs[(2*p)+1]) ) ) {
00143 sendNodes[p].push_back(globalCoarse[i]);
00144
00145 keymap[p].push_back(i);
00146 sendCnt[p]++;
00147 }
00148 }
00149 }
00150
00151 _mins_maxs.clear();
00152
00153
00154
00155
00156
00157
00158
00159 par::Mpi_Alltoall<int>( sendCnt, recvCnt, 1, comm);
00160
00161
00162 unsigned int totalSend = 0;
00163 unsigned int totalRecv = 0;
00164 #ifdef __MEASURE_BPART_COMM__
00165 unsigned int numProcsSendI = 0;
00166 unsigned int numProcsRecvI = 0;
00167 #endif
00168 for (unsigned int i = 0; i < npes; i++) {
00169 totalSend+= sendCnt[i];
00170 totalRecv+= recvCnt[i];
00171 #ifdef __MEASURE_BPART_COMM__
00172 if(sendCnt[i]) {
00173 numProcsSendI++;
00174 }
00175 if(recvCnt[i]) {
00176 numProcsRecvI++;
00177 }
00178 #endif
00179 }
00180
00181
00182 std::vector<ot::TreeNode> sendK (totalSend);
00183 std::vector<ot::TreeNode> recvK (totalRecv);
00184
00185
00186 sendOffsets[0] = 0;
00187 recvOffsets[0] = 0;
00188
00189
00190 for (int i=1; i<npes; i++) {
00191 sendOffsets[i] = sendOffsets[i-1] + sendCnt[i-1];
00192 recvOffsets[i] = recvOffsets[i-1] + recvCnt[i-1];
00193 }
00194
00195 for (int i=0; i<npes; i++) {
00196 for (unsigned int j=0; j<sendCnt[i]; j++) {
00197 sendK[sendOffsets[i] + j] = sendNodes[i][j];
00198 }
00199 }
00200
00201
00202
00203 ot::TreeNode* sendKptr = NULL;
00204 ot::TreeNode* recvKptr = NULL;
00205 if(!sendK.empty()) {
00206 sendKptr = &(*(sendK.begin()));
00207 }
00208 if(!recvK.empty()) {
00209 recvKptr = &(*(recvK.begin()));
00210 }
00211
00212 par::Mpi_Alltoallv_sparse<ot::TreeNode>(sendKptr, sendCnt, sendOffsets,
00213 recvKptr, recvCnt, recvOffsets, comm);
00214
00215 sendK.clear();
00216
00217 #ifdef __MEASURE_BPART_COMM__
00218 MPI_Barrier(comm);
00219 unsigned int* allTotalSendI = new unsigned int[npes];
00220 unsigned int* allTotalRecvI = new unsigned int[npes];
00221 unsigned int* allNumProcsSendI = new unsigned int[npes];
00222 unsigned int* allNumProcsRecvI = new unsigned int[npes];
00223 par::Mpi_Gather<unsigned int>(&totalSend, allTotalSendI, 1, 0, comm);
00224 par::Mpi_Gather<unsigned int>(&totalRecv, allTotalRecvI, 1, 0, comm);
00225 par::Mpi_Gather<unsigned int>(&numProcsSendI, allNumProcsSendI, 1, 0, comm);
00226 par::Mpi_Gather<unsigned int>(&numProcsRecvI, allNumProcsRecvI, 1, 0, comm);
00227 if(!rank) {
00228 for(int i = 0; i < npes; i++) {
00229 std::cout<<" allTotalSendI["<<i<<"] in Bpart: "<<allTotalSendI[i]<<std::endl;
00230 std::cout<<" allTotalRecvI["<<i<<"] in Bpart: "<<allTotalRecvI[i]<<std::endl;
00231 std::cout<<" allNumProcsSendI["<<i<<"] in Bpart: "<<allNumProcsSendI[i]<<std::endl;
00232 std::cout<<" allNumProcsRecvI["<<i<<"] in Bpart: "<<allNumProcsRecvI[i]<<std::endl;
00233 }
00234 }
00235 delete [] allTotalSendI;
00236 delete [] allTotalRecvI;
00237 delete [] allNumProcsSendI;
00238 delete [] allNumProcsRecvI;
00239 MPI_Barrier(comm);
00240 #endif
00241
00242
00243
00244 unsigned int *wts = NULL;
00245 if(totalRecv) {
00246 wts = new unsigned int [totalRecv];
00247 }
00248 for (unsigned int i=0; i<totalRecv; i++) {
00249 wts[i] = 0;
00250 }
00251
00252
00253 unsigned int nextPt = 0;
00254 unsigned int nextNode = 0;
00255
00256 while (nextPt < nodes.size()) {
00257
00258 #ifdef __DEBUG_OCT__
00259 assert(areComparable(recvK[nextNode], nodes[nextPt]));
00260 #endif
00261 if ((recvK[nextNode].isAncestor(nodes[nextPt])) ||
00262 (recvK[nextNode] == nodes[nextPt])) {
00263 wts[nextNode]++;
00264 nextPt++;
00265 } else {
00266 nextNode++;
00267 if (nextNode == totalRecv) {
00268
00269
00270 assert(false);
00271 }
00272 }
00273 }
00274
00275 recvK.clear();
00276
00277
00278 unsigned int *recvWts = NULL;
00279 if(totalSend) {
00280 recvWts = new unsigned int[totalSend];
00281 }
00282
00283 par::Mpi_Alltoallv_sparse<unsigned int>( wts, recvCnt, recvOffsets,
00284 recvWts, sendCnt, sendOffsets, comm);
00285
00286
00287 for (int i=0;i<globalCoarse.size();i++) {
00288 globalCoarse[i].setWeight(0);
00289 }
00290
00291 for (int i=0; i<npes; i++) {
00292 for (int j=0; j<sendCnt[i]; j++) {
00293 globalCoarse[keymap[i][j]].addWeight(recvWts[sendOffsets[i] + j]);
00294 }
00295 }
00296
00297 for (unsigned int i=0; i<npes; i++) {
00298 keymap[i].clear();
00299 sendNodes[i].clear();
00300 }
00301 sendNodes.clear();
00302 keymap.clear();
00303
00304 if(recvWts) {
00305 delete [] recvWts;
00306 recvWts = NULL;
00307 }
00308
00309 if(wts) {
00310 delete [] wts;
00311 wts = NULL;
00312 }
00313
00314 par::partitionW<ot::TreeNode>(globalCoarse,getNodeWeight,comm);
00315
00316
00317 for (unsigned int i=0;i<globalCoarse.size(); i++) {
00318 globalCoarse[i].setWeight(1);
00319 }
00320
00321
00322
00323
00324
00325
00326 std::vector<TreeNode> vtkDist(npes);
00327
00328 ot::TreeNode *sendMin = NULL;
00329 if (!globalCoarse.empty()) {
00330 sendMin = (ot::TreeNode *)&(*(globalCoarse.begin()));
00331 } else {
00332 sendMin = &(rootNode);
00333 }
00334
00335 par::Mpi_Allgather<ot::TreeNode>(sendMin, &(* vtkDist.begin()), 1, comm);
00336
00337 minsAllBlocks.clear();
00338 for(int j = 0; j < npes; j++) {
00339 if(vtkDist[j] != rootNode) {
00340 minsAllBlocks.push_back(vtkDist[j]);
00341 }
00342 }
00343
00344 for (unsigned int j = 1; j < npes ; j++) {
00345 if (vtkDist[j] == rootNode) {
00346 vtkDist[j] = vtkDist[j-1];
00347 }
00348 }
00349
00350
00351 if (npes>1) {
00352 if (vtkDist[npes-1] == vtkDist[npes-2]) {
00353 vtkDist[npes-1] = rootNode;
00354 }
00355
00356 for (unsigned int i=npes-2; i>0; i--) {
00357 if (vtkDist[i] == vtkDist[i-1]) {
00358 vtkDist[i] = vtkDist[i+1];
00359 }
00360 }
00361 }
00362
00363 unsigned int *part = NULL;
00364 if(!nodes.empty()) {
00365 part = new unsigned int[nodes.size()];
00366 }
00367
00368 if (npes > 1) {
00369 unsigned int pCnt=0;
00370 for (unsigned int i=0; i< nodes.size(); i++) {
00371 if ( (nodes[i] >= vtkDist[pCnt]) && ( (pCnt == (npes-1))
00372 || ( nodes[i] < vtkDist[pCnt+1] ) || (vtkDist[pCnt+1] == rootNode) ) ) {
00373 part[i] = pCnt;
00374 } else {
00375 while ( (pCnt < (npes -1)) && (nodes[i] >= vtkDist[pCnt+1])
00376 && (vtkDist[pCnt+1] != rootNode) ) {
00377 pCnt++;
00378 }
00379 part[i] = pCnt;
00380 }
00381 }
00382 }
00383
00384 vtkDist.clear();
00385
00386
00387
00388
00389
00390
00391 for (int i=0; i<npes; i++) {
00392 sendCnt[i]=0;
00393 recvCnt[i]=0;
00394 }
00395
00396 if (npes > 1) {
00397 for (unsigned int i=0; i<nodes.size(); i++) {
00398 sendCnt[part[i]]++;
00399 }
00400 } else {
00401 sendCnt[0] += (nodes.size());
00402 }
00403
00404 if(part) {
00405 delete [] part;
00406 part = NULL;
00407 }
00408
00409
00410
00411
00412 par::Mpi_Alltoall<int>( sendCnt, recvCnt, 1, comm);
00413
00414 totalRecv=0;
00415 for (unsigned int i=0; i<npes; i++) {
00416 totalRecv += recvCnt[i];
00417 }
00418
00419 sendOffsets[0] = 0;
00420 recvOffsets[0] = 0;
00421
00422
00423 for (int i=1; i<npes; i++) {
00424 sendOffsets[i] = sendOffsets[i-1] + sendCnt[i-1];
00425 recvOffsets[i] = recvOffsets[i-1] + recvCnt[i-1];
00426 }
00427
00428
00429 std::vector<ot::TreeNode > newNodes(totalRecv);
00430
00431
00432 ot::TreeNode* nodesPtr = NULL;
00433 ot::TreeNode* newNodesPtr = NULL;
00434 if(!nodes.empty()) {
00435 nodesPtr = &(*(nodes.begin()));
00436 }
00437 if(!newNodes.empty()) {
00438 newNodesPtr = &(*(newNodes.begin()));
00439 }
00440
00441 #ifdef __MEASURE_BPART_COMM__
00442 unsigned int totalSendSize = nodes.size();
00443 unsigned int totalRecvSize = newNodes.size();
00444 unsigned int numProcsSendF = 0;
00445 unsigned int numProcsRecvF = 0;
00446 for(int i = 0; i < npes; i++) {
00447 if(sendCnt[i]) {
00448 numProcsSendF++;
00449 }
00450 if(recvCnt[i]) {
00451 numProcsRecvF++;
00452 }
00453 }
00454 #endif
00455
00456 par::Mpi_Alltoallv_sparse<ot::TreeNode>( nodesPtr, sendCnt, sendOffsets,
00457 newNodesPtr, recvCnt, recvOffsets, comm);
00458
00459
00460 nodes = newNodes;
00461 newNodes.clear();
00462
00463
00464 delete [] sendCnt;
00465 sendCnt = NULL;
00466
00467 delete [] recvCnt;
00468 recvCnt = NULL;
00469
00470 delete [] sendOffsets;
00471 sendOffsets = NULL;
00472
00473 delete [] recvOffsets;
00474 recvOffsets = NULL;
00475
00476 #ifdef __MEASURE_BPART_COMM__
00477 MPI_Barrier(comm);
00478 unsigned int* allTotalSendF = new unsigned int[npes];
00479 unsigned int* allTotalRecvF = new unsigned int[npes];
00480 unsigned int* allNumProcsSendF = new unsigned int[npes];
00481 unsigned int* allNumProcsRecvF = new unsigned int[npes];
00482 par::Mpi_Gather<unsigned int>(&totalSendSize, allTotalSendF, 1, 0, comm);
00483 par::Mpi_Gather<unsigned int>(&totalRecvSize, allTotalRecvF, 1, 0, comm);
00484 par::Mpi_Gather<unsigned int>(&numProcsSendF, allNumProcsSendF, 1, 0, comm);
00485 par::Mpi_Gather<unsigned int>(&numProcsRecvF, allNumProcsRecvF, 1, 0, comm);
00486 if(!rank) {
00487 for(int i = 0; i < npes; i++) {
00488 std::cout<<" allTotalSendF["<<i<<"] in Bpart: "<<allTotalSendF[i]<<std::endl;
00489 std::cout<<" allTotalRecvF["<<i<<"] in Bpart: "<<allTotalRecvF[i]<<std::endl;
00490 std::cout<<" allNumProcsSendF["<<i<<"] in Bpart: "<<allNumProcsSendF[i]<<std::endl;
00491 std::cout<<" allNumProcsRecvF["<<i<<"] in Bpart: "<<allNumProcsRecvF[i]<<std::endl;
00492 }
00493 }
00494 delete [] allTotalSendF;
00495 delete [] allTotalRecvF;
00496 delete [] allNumProcsSendF;
00497 delete [] allNumProcsRecvF;
00498 MPI_Barrier(comm);
00499 #endif
00500
00501 PROF_BLKPART2_END
00502 }
00503
00504
00505
00506
00507 int blockPartStage1(std::vector<TreeNode> &nodes, std::vector<TreeNode> &blocks,
00508 unsigned int dim, unsigned int maxDepth, MPI_Comm comm) {
00509 #ifdef __PROF_WITH_BARRIER__
00510 MPI_Barrier(comm);
00511 #endif
00512 PROF_BLKPART1_BEGIN
00513 int npes, rank;
00514 const double thFac = 0.5;
00515 MPI_Comm_rank(comm,&rank);
00516 MPI_Comm_size(comm,&npes);
00517
00518 par::partitionW<ot::TreeNode>(nodes, NULL,comm);
00519
00520 assert(nodes.size() > (1 << dim) );
00521
00522 #ifdef __BLOCK_PART_EQUALS_MORTON_PART__
00523
00524 blocks.clear();
00525
00526
00527
00528 appendCompleteRegion(nodes[0], nodes[nodes.size()-1], blocks, true, true);
00529
00530 #else
00531
00532
00533 std::vector<TreeNode> localCoarse;
00534
00535
00536 appendCompleteRegion(nodes[0], nodes[nodes.size()-1], localCoarse, true, true);
00537
00538
00539
00540
00541 std::vector<TreeNode> localBlocks;
00542
00543
00544
00545
00546
00547 for (unsigned int i = 0; i < localCoarse.size(); i++) {
00548 localCoarse[i].setWeight(0);
00549 }
00550
00551
00552 unsigned int nextPt = 0;
00553 unsigned int nextNode = 0;
00554
00555 while (nextPt < nodes.size()) {
00556
00557 #ifdef __DEBUG_OCT__
00558 assert(areComparable(localCoarse[nextNode], nodes[nextPt]));
00559 #endif
00560 if ((localCoarse[nextNode].isAncestor(nodes[nextPt])) ||
00561 (localCoarse[nextNode] == nodes[nextPt])) {
00562 localCoarse[nextNode].addWeight(1);
00563 nextPt++;
00564 } else {
00565 nextNode++;
00566 if (nextNode == localCoarse.size()) {
00567
00568
00569 assert(false);
00570 }
00571 }
00572 }
00573
00574 sort(localCoarse.begin(), localCoarse.end(), ot::bPartComparator);
00575
00576 long localWt = 0;
00577 unsigned int cnt = 0;
00578 while ( ( cnt < localCoarse.size() ) &&
00579 (localWt <= ( (long)( thFac*( (double)(nodes.size()) ) ) ) ) ) {
00580 localBlocks.push_back(localCoarse[cnt]);
00581 localWt += (long)(localCoarse[cnt].getWeight());
00582 cnt++;
00583 }
00584
00585 localCoarse.clear();
00586
00587 for (unsigned int i = 0; i < localBlocks.size(); i++) {
00588 localBlocks[i].setWeight(1);
00589 }
00590
00591
00592 sort(localBlocks.begin(), localBlocks.end());
00593
00594
00595
00596
00597
00598
00599 completeOctree(localBlocks, blocks, dim, maxDepth, true, true, true, comm);
00600
00601 localBlocks.clear();
00602
00603 #endif
00604
00605 PROF_BLKPART1_END
00606 }
00607
00608 int blockPartStage2(std::vector<TreeNode> &nodes, std::vector<TreeNode> &globalCoarse,
00609 std::vector<ot::TreeNode>& minsAllBlocks, unsigned int dim, unsigned int maxDepth,
00610 MPI_Comm comm) {
00611 #ifdef __PROF_WITH_BARRIER__
00612 MPI_Barrier(comm);
00613 #endif
00614 PROF_BLKPART2_BEGIN
00615
00616 ot::TreeNode rootNode (dim,maxDepth);
00617
00618 int npes, rank;
00619 MPI_Comm_rank(comm,&rank);
00620 MPI_Comm_size(comm,&npes);
00621
00622 #ifdef __BLOCK_PART_EQUALS_MORTON_PART__
00623
00624
00625
00626 std::vector<TreeNode> vtkDist(npes);
00627
00628 ot::TreeNode *sendMin = NULL;
00629 if (!globalCoarse.empty()) {
00630 sendMin = (ot::TreeNode *)&(*(globalCoarse.begin()));
00631 } else {
00632 sendMin = &(rootNode);
00633 }
00634
00635 par::Mpi_Allgather<ot::TreeNode>(sendMin, &(* vtkDist.begin()), 1, comm);
00636
00637 minsAllBlocks.clear();
00638 for(int j = 0; j < npes; j++) {
00639 if(vtkDist[j] != rootNode) {
00640 minsAllBlocks.push_back(vtkDist[j]);
00641 }
00642 }
00643
00644 #else
00645
00646 int *sendCnt = new int[npes];
00647 int *recvCnt = new int[npes];
00648 int *sendOffsets = new int[npes];
00649 int *recvOffsets = new int[npes];
00650
00651
00652
00653 std::vector<TreeNode> _mins_maxs(2*npes);
00654
00655
00656 ot::TreeNode sendMinMax[2];
00657
00658 if (!nodes.empty()) {
00659 sendMinMax[0] = nodes[0];
00660 sendMinMax[1] = nodes[nodes.size()-1];
00661 } else {
00662 sendMinMax[0] = rootNode;
00663 sendMinMax[1] = rootNode;
00664 }
00665
00666 par::Mpi_Allgather<ot::TreeNode>(sendMinMax, &(*_mins_maxs.begin()), 2, comm);
00667
00668 std::vector<std::vector<TreeNode> > sendNodes(npes);
00669 std::vector<std::vector<unsigned int> > keymap(npes);
00670
00671 for (int i=0; i<npes; i++) {
00672 sendCnt[i] = 0;
00673 }
00674
00675
00676
00677
00678
00679
00680 for (unsigned int i=0; i<globalCoarse.size(); i++) {
00681 for (int p=0;p<npes;p++) {
00682 #ifdef __DEBUG_OCT__
00683 assert(areComparable(globalCoarse[i], _mins_maxs[2*p]));
00684 #endif
00685 if ( (globalCoarse[i].isAncestor(_mins_maxs[2*p])) ||
00686 ( (globalCoarse[i] >= _mins_maxs[2*p]) && (globalCoarse[i] <=_mins_maxs[(2*p)+1]) ) ) {
00687 sendNodes[p].push_back(globalCoarse[i]);
00688
00689 keymap[p].push_back(i);
00690 sendCnt[p]++;
00691 }
00692 }
00693 }
00694
00695 _mins_maxs.clear();
00696
00697
00698
00699
00700
00701
00702
00703 par::Mpi_Alltoall<int>( sendCnt, recvCnt, 1, comm);
00704
00705
00706 unsigned int totalSend = 0;
00707 unsigned int totalRecv = 0;
00708 #ifdef __MEASURE_BPART_COMM__
00709 unsigned int numProcsSendI = 0;
00710 unsigned int numProcsRecvI = 0;
00711 #endif
00712 for (unsigned int i = 0; i < npes; i++) {
00713 totalSend+= sendCnt[i];
00714 totalRecv+= recvCnt[i];
00715 #ifdef __MEASURE_BPART_COMM__
00716 if(sendCnt[i]) {
00717 numProcsSendI++;
00718 }
00719 if(recvCnt[i]) {
00720 numProcsRecvI++;
00721 }
00722 #endif
00723 }
00724
00725
00726 std::vector<ot::TreeNode> sendK (totalSend);
00727 std::vector<ot::TreeNode> recvK (totalRecv);
00728
00729
00730 sendOffsets[0] = 0;
00731 recvOffsets[0] = 0;
00732
00733
00734 for (int i=1; i<npes; i++) {
00735 sendOffsets[i] = sendOffsets[i-1] + sendCnt[i-1];
00736 recvOffsets[i] = recvOffsets[i-1] + recvCnt[i-1];
00737 }
00738
00739 for (int i=0; i<npes; i++) {
00740 for (unsigned int j=0; j<sendCnt[i]; j++) {
00741 sendK[sendOffsets[i] + j] = sendNodes[i][j];
00742 }
00743 }
00744
00745
00746
00747 ot::TreeNode* sendKptr = NULL;
00748 ot::TreeNode* recvKptr = NULL;
00749 if(!sendK.empty()) {
00750 sendKptr = &(*(sendK.begin()));
00751 }
00752 if(!recvK.empty()) {
00753 recvKptr = &(*(recvK.begin()));
00754 }
00755
00756 par::Mpi_Alltoallv_sparse<ot::TreeNode>(sendKptr, sendCnt, sendOffsets,
00757 recvKptr, recvCnt, recvOffsets, comm);
00758
00759 sendK.clear();
00760
00761 #ifdef __MEASURE_BPART_COMM__
00762 MPI_Barrier(comm);
00763 unsigned int* allTotalSendI = new unsigned int[npes];
00764 unsigned int* allTotalRecvI = new unsigned int[npes];
00765 unsigned int* allNumProcsSendI = new unsigned int[npes];
00766 unsigned int* allNumProcsRecvI = new unsigned int[npes];
00767 par::Mpi_Gather<unsigned int>(&totalSend, allTotalSendI, 1, 0, comm);
00768 par::Mpi_Gather<unsigned int>(&totalRecv, allTotalRecvI, 1, 0, comm);
00769 par::Mpi_Gather<unsigned int>(&numProcsSendI, allNumProcsSendI, 1, 0, comm);
00770 par::Mpi_Gather<unsigned int>(&numProcsRecvI, allNumProcsRecvI, 1, 0, comm);
00771 if(!rank) {
00772 for(int i = 0; i < npes; i++) {
00773 std::cout<<" allTotalSendI["<<i<<"] in Bpart: "<<allTotalSendI[i]<<std::endl;
00774 std::cout<<" allTotalRecvI["<<i<<"] in Bpart: "<<allTotalRecvI[i]<<std::endl;
00775 std::cout<<" allNumProcsSendI["<<i<<"] in Bpart: "<<allNumProcsSendI[i]<<std::endl;
00776 std::cout<<" allNumProcsRecvI["<<i<<"] in Bpart: "<<allNumProcsRecvI[i]<<std::endl;
00777 }
00778 }
00779 delete [] allTotalSendI;
00780 delete [] allTotalRecvI;
00781 delete [] allNumProcsSendI;
00782 delete [] allNumProcsRecvI;
00783 MPI_Barrier(comm);
00784 #endif
00785
00786
00787
00788 unsigned int *wts = NULL;
00789 if(totalRecv) {
00790 wts = new unsigned int [totalRecv];
00791 }
00792 for (unsigned int i=0; i<totalRecv; i++) {
00793 wts[i] = 0;
00794 }
00795
00796
00797 unsigned int nextPt = 0;
00798 unsigned int nextNode = 0;
00799
00800 while (nextPt < nodes.size()) {
00801
00802 #ifdef __DEBUG_OCT__
00803 assert(areComparable(recvK[nextNode], nodes[nextPt]));
00804 #endif
00805 if ((recvK[nextNode].isAncestor(nodes[nextPt])) ||
00806 (recvK[nextNode] == nodes[nextPt])) {
00807 wts[nextNode]++;
00808 nextPt++;
00809 } else {
00810 nextNode++;
00811 if (nextNode == totalRecv) {
00812
00813
00814 assert(false);
00815 }
00816 }
00817 }
00818
00819 recvK.clear();
00820
00821
00822 unsigned int *recvWts = NULL;
00823 if(totalSend) {
00824 recvWts = new unsigned int[totalSend];
00825 }
00826
00827 par::Mpi_Alltoallv_sparse<unsigned int>( wts, recvCnt, recvOffsets,
00828 recvWts, sendCnt, sendOffsets, comm);
00829
00830
00831 for (int i=0;i<globalCoarse.size();i++) {
00832 globalCoarse[i].setWeight(0);
00833 }
00834
00835 for (int i=0; i<npes; i++) {
00836 for (int j=0; j<sendCnt[i]; j++) {
00837 globalCoarse[keymap[i][j]].addWeight(recvWts[sendOffsets[i] + j]);
00838 }
00839 }
00840
00841 for (unsigned int i=0; i<npes; i++) {
00842 keymap[i].clear();
00843 sendNodes[i].clear();
00844 }
00845 sendNodes.clear();
00846 keymap.clear();
00847
00848 if(recvWts) {
00849 delete [] recvWts;
00850 recvWts = NULL;
00851 }
00852
00853 if(wts) {
00854 delete [] wts;
00855 wts = NULL;
00856 }
00857
00858 par::partitionW<ot::TreeNode>(globalCoarse,getNodeWeight,comm);
00859
00860
00861 for (unsigned int i=0;i<globalCoarse.size(); i++) {
00862 globalCoarse[i].setWeight(1);
00863 }
00864
00865
00866
00867
00868
00869
00870 std::vector<TreeNode> vtkDist(npes);
00871
00872 ot::TreeNode *sendMin = NULL;
00873 if (!globalCoarse.empty()) {
00874 sendMin = (ot::TreeNode *)&(*(globalCoarse.begin()));
00875 } else {
00876 sendMin = &(rootNode);
00877 }
00878
00879 par::Mpi_Allgather<ot::TreeNode>(sendMin, &(* vtkDist.begin()), 1, comm);
00880
00881 minsAllBlocks.clear();
00882 for(int j = 0; j < npes; j++) {
00883 if(vtkDist[j] != rootNode) {
00884 minsAllBlocks.push_back(vtkDist[j]);
00885 }
00886 }
00887
00888 for (unsigned int j = 1; j < npes ; j++) {
00889 if (vtkDist[j] == rootNode) {
00890 vtkDist[j] = vtkDist[j-1];
00891 }
00892 }
00893
00894
00895 if (npes>1) {
00896 if (vtkDist[npes-1] == vtkDist[npes-2]) {
00897 vtkDist[npes-1] = rootNode;
00898 }
00899
00900 for (unsigned int i=npes-2; i>0; i--) {
00901 if (vtkDist[i] == vtkDist[i-1]) {
00902 vtkDist[i] = vtkDist[i+1];
00903 }
00904 }
00905 }
00906
00907 unsigned int *part = NULL;
00908 if(!nodes.empty()) {
00909 part = new unsigned int[nodes.size()];
00910 }
00911
00912 if (npes > 1) {
00913 unsigned int pCnt=0;
00914 for (unsigned int i=0; i< nodes.size(); i++) {
00915 if ( (nodes[i] >= vtkDist[pCnt]) && ( (pCnt == (npes-1))
00916 || ( nodes[i] < vtkDist[pCnt+1] ) || (vtkDist[pCnt+1] == rootNode) ) ) {
00917 part[i] = pCnt;
00918 } else {
00919 while ( (pCnt < (npes -1)) && (nodes[i] >= vtkDist[pCnt+1])
00920 && (vtkDist[pCnt+1] != rootNode) ) {
00921 pCnt++;
00922 }
00923 part[i] = pCnt;
00924 }
00925 }
00926 }
00927
00928 vtkDist.clear();
00929
00930
00931
00932
00933
00934
00935 for (int i=0; i<npes; i++) {
00936 sendCnt[i]=0;
00937 recvCnt[i]=0;
00938 }
00939
00940 if (npes > 1) {
00941 for (unsigned int i=0; i<nodes.size(); i++) {
00942 sendCnt[part[i]]++;
00943 }
00944 } else {
00945 sendCnt[0] += (nodes.size());
00946 }
00947
00948 if(part) {
00949 delete [] part;
00950 part = NULL;
00951 }
00952
00953
00954
00955
00956 par::Mpi_Alltoall<int>( sendCnt, recvCnt, 1, comm);
00957
00958 totalRecv=0;
00959 for (unsigned int i=0; i<npes; i++) {
00960 totalRecv += recvCnt[i];
00961 }
00962
00963 sendOffsets[0] = 0;
00964 recvOffsets[0] = 0;
00965
00966
00967 for (int i=1; i<npes; i++) {
00968 sendOffsets[i] = sendOffsets[i-1] + sendCnt[i-1];
00969 recvOffsets[i] = recvOffsets[i-1] + recvCnt[i-1];
00970 }
00971
00972
00973 std::vector<ot::TreeNode > newNodes(totalRecv);
00974
00975
00976 ot::TreeNode* nodesPtr = NULL;
00977 ot::TreeNode* newNodesPtr = NULL;
00978 if(!nodes.empty()) {
00979 nodesPtr = &(*(nodes.begin()));
00980 }
00981 if(!newNodes.empty()) {
00982 newNodesPtr = &(*(newNodes.begin()));
00983 }
00984
00985 #ifdef __MEASURE_BPART_COMM__
00986 unsigned int totalSendSize = nodes.size();
00987 unsigned int totalRecvSize = newNodes.size();
00988 unsigned int numProcsSendF = 0;
00989 unsigned int numProcsRecvF = 0;
00990 for(int i = 0; i < npes; i++) {
00991 if(sendCnt[i]) {
00992 numProcsSendF++;
00993 }
00994 if(recvCnt[i]) {
00995 numProcsRecvF++;
00996 }
00997 }
00998 #endif
00999
01000 par::Mpi_Alltoallv_sparse<ot::TreeNode>( nodesPtr, sendCnt, sendOffsets,
01001 newNodesPtr, recvCnt, recvOffsets, comm);
01002
01003
01004 nodes = newNodes;
01005 newNodes.clear();
01006
01007
01008 delete [] sendCnt;
01009 sendCnt = NULL;
01010
01011 delete [] recvCnt;
01012 recvCnt = NULL;
01013
01014 delete [] sendOffsets;
01015 sendOffsets = NULL;
01016
01017 delete [] recvOffsets;
01018 recvOffsets = NULL;
01019
01020 #ifdef __MEASURE_BPART_COMM__
01021 MPI_Barrier(comm);
01022 unsigned int* allTotalSendF = new unsigned int[npes];
01023 unsigned int* allTotalRecvF = new unsigned int[npes];
01024 unsigned int* allNumProcsSendF = new unsigned int[npes];
01025 unsigned int* allNumProcsRecvF = new unsigned int[npes];
01026 par::Mpi_Gather<unsigned int>(&totalSendSize, allTotalSendF, 1, 0, comm);
01027 par::Mpi_Gather<unsigned int>(&totalRecvSize, allTotalRecvF, 1, 0, comm);
01028 par::Mpi_Gather<unsigned int>(&numProcsSendF, allNumProcsSendF, 1, 0, comm);
01029 par::Mpi_Gather<unsigned int>(&numProcsRecvF, allNumProcsRecvF, 1, 0, comm);
01030 if(!rank) {
01031 for(int i = 0; i < npes; i++) {
01032 std::cout<<" allTotalSendF["<<i<<"] in Bpart: "<<allTotalSendF[i]<<std::endl;
01033 std::cout<<" allTotalRecvF["<<i<<"] in Bpart: "<<allTotalRecvF[i]<<std::endl;
01034 std::cout<<" allNumProcsSendF["<<i<<"] in Bpart: "<<allNumProcsSendF[i]<<std::endl;
01035 std::cout<<" allNumProcsRecvF["<<i<<"] in Bpart: "<<allNumProcsRecvF[i]<<std::endl;
01036 }
01037 }
01038 delete [] allTotalSendF;
01039 delete [] allTotalRecvF;
01040 delete [] allNumProcsSendF;
01041 delete [] allNumProcsRecvF;
01042 MPI_Barrier(comm);
01043 #endif
01044
01045 #endif
01046
01047 PROF_BLKPART2_END
01048 }
01049
01050 }
01051
01052