00001
00009 #include "parUtils.h"
00010 #include "TreeNode.h"
00011 #include <cassert>
00012 #include "nodeAndValues.h"
00013 #include "binUtils.h"
00014 #include "dendro.h"
00015
00016 #ifdef __DEBUG__
00017 #ifndef __DEBUG_OCT__
00018 #define __DEBUG_OCT__
00019 #endif
00020 #endif
00021
00022 namespace ot {
00023
00024 int regularGrid2Octree(const std::vector<double>& elementValues,
00025 unsigned int N, unsigned int nx, unsigned int ny, unsigned int nz,
00026 unsigned int xs, unsigned int ys, unsigned int zs, std::vector<TreeNode>& linOct,
00027 unsigned int dim, unsigned int maxDepth, double thresholdFac, MPI_Comm comm) {
00028 PROF_RG2O_BEGIN
00029
00030 int rank;
00031 int npes;
00032 MPI_Comm_rank(comm, &rank);
00033 MPI_Comm_size(comm, &npes);
00034
00035 const int MIN_GRAIN_SIZE = 10;
00036
00037 bool isNvalid = binOp::isPowerOfTwo(N);
00038
00039 assert(isNvalid);
00040
00041 unsigned int rgLevel = binOp::fastLog2(N);
00042 unsigned int elemLen = (1u << (maxDepth - rgLevel));
00043
00044
00045 std::vector<ot::NodeAndValues<double, 1> > tmpList(nx*ny*nz);
00046
00047 for(int k = 0; k < nz; k++) {
00048 for(int j = 0; j < ny; j++) {
00049 for(int i = 0; i < nx; i++) {
00050 unsigned int currX = ((i + xs)*elemLen);
00051 unsigned int currY = ((j + ys)*elemLen);
00052 unsigned int currZ = ((k + zs)*elemLen);
00053 unsigned int idx = ((nx*(j + (ny*k))) + i);
00054 tmpList[idx].node = ot::TreeNode(currX, currY, currZ, rgLevel, dim, maxDepth);
00055 tmpList[idx].values[0] = elementValues[idx];
00056 }
00057 }
00058 }
00059
00060
00061 std::vector<ot::NodeAndValues<double, 1> > tnAndValsList;
00062 par::sampleSort<ot::NodeAndValues<double, 1> >(tmpList, tnAndValsList, comm);
00063 tmpList.clear();
00064
00065 DendroIntL inSz = tnAndValsList.size();
00066 DendroIntL globInSize;
00067 par::Mpi_Allreduce<DendroIntL>(&inSz, &globInSize, 1, MPI_SUM, comm);
00068
00069 bool repeatLoop = true;
00070 int npesCurr = npes;
00071 MPI_Comm commCurr = comm;
00072 if(globInSize < (MIN_GRAIN_SIZE*npes)) {
00073 int splittingSize = (globInSize/MIN_GRAIN_SIZE);
00074 if(splittingSize == 0) {
00075 splittingSize = 1;
00076 }
00077
00078 unsigned int avgLoad = (globInSize/splittingSize);
00079 int leftOvers = (globInSize - (splittingSize*avgLoad));
00080
00081 if(rank >= splittingSize) {
00082 par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, 0, commCurr);
00083 }else if(rank < leftOvers) {
00084 par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, (avgLoad + 1), commCurr);
00085 }else {
00086 par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, avgLoad, commCurr);
00087 }
00088 tnAndValsList = tmpList;
00089 tmpList.clear();
00090
00091 MPI_Comm newComm;
00092 par::splitCommUsingSplittingRank(splittingSize, &newComm, commCurr);
00093 commCurr = newComm;
00094 npesCurr = splittingSize;
00095
00096 if(rank >= splittingSize) {
00097 repeatLoop = false;
00098 }
00099 } else {
00100 par::partitionW<ot::NodeAndValues<double, 1> >(tnAndValsList, NULL, comm);
00101 }
00102
00103 if(globInSize < MIN_GRAIN_SIZE) {
00104 repeatLoop = false;
00105 }
00106
00107 while(repeatLoop) {
00108
00109 inSz = tnAndValsList.size();
00110
00111 assert(inSz >= MIN_GRAIN_SIZE);
00112
00113 MPI_Request requests[4];
00114
00115
00116
00117 ot::NodeAndValues<double, 1> prevOcts[7];
00118 if( rank ) {
00119 par::Mpi_Irecv<ot::NodeAndValues<double, 1> >(prevOcts, 7, (rank - 1),
00120 1, commCurr, &(requests[0]));
00121 }
00122
00123 ot::NodeAndValues<double, 1> nextOcts[7];
00124 if( rank < (npesCurr - 1) ) {
00125 par::Mpi_Irecv<ot::NodeAndValues<double, 1> >(nextOcts, 7, (rank + 1),
00126 1, commCurr, &(requests[1]));
00127 }
00128
00129 if( rank ) {
00130 par::Mpi_Issend<ot::NodeAndValues<double, 1> >((&(*(tnAndValsList.begin()))), 7,
00131 (rank - 1), 1, commCurr, &(requests[2]));
00132 }
00133
00134 if( rank < (npesCurr - 1) ) {
00135 par::Mpi_Issend<ot::NodeAndValues<double, 1> >((&(*(tnAndValsList.end() - 7))), 7,
00136 (rank + 1), 1, commCurr, &(requests[3]));
00137 }
00138
00139 MPI_Status statuses[4];
00140
00141 if( rank ) {
00142 MPI_Wait(&(requests[0]), &(statuses[0]));
00143 MPI_Wait(&(requests[2]), &(statuses[2]));
00144 }
00145
00146 if( rank < (npesCurr - 1) ) {
00147 MPI_Wait(&(requests[1]), &(statuses[1]));
00148 MPI_Wait(&(requests[3]), &(statuses[3]));
00149 }
00150
00151
00152 int idxOfPrevFC = -1;
00153 if( rank ) {
00154 for(int i = 0; i < 7; i++) {
00155
00156 if(prevOcts[i].node.getChildNumber() == 0) {
00157 idxOfPrevFC = i;
00158 }
00159 }
00160 }
00161
00162 int idxOfNextFC = -1;
00163 if( rank < (npesCurr - 1) ) {
00164 for(int i = 0; i < 7; i++) {
00165
00166 if(nextOcts[i].node.getChildNumber() == 0) {
00167 idxOfNextFC = i;
00168 break;
00169 }
00170 }
00171 }
00172
00173 int myFirstFC = -1;
00174 for(int i = 0; i < inSz; i++) {
00175 if(tnAndValsList[i].node.getChildNumber() == 0) {
00176 myFirstFC = i;
00177 break;
00178 }
00179 }
00180
00181 int myLastFC = -1;
00182 if( myFirstFC >= 0 ) {
00183 for(int i = (inSz - 1); i >= 0; i--) {
00184 if(tnAndValsList[i].node.getChildNumber() == 0) {
00185 myLastFC = i;
00186 break;
00187 }
00188 }
00189 }
00190
00191
00192
00193
00194 if( (myFirstFC >= 0) && (idxOfPrevFC >= 0) ) {
00195 int fcGap = (myFirstFC + 7 - idxOfPrevFC);
00196 if( fcGap < 8 ) {
00197
00198 if(myFirstFC) {
00199 tmpList.insert(tmpList.end(), tnAndValsList.begin(),
00200 tnAndValsList.begin() + myFirstFC);
00201 }
00202 } else {
00203
00204
00205 double minVal = prevOcts[idxOfPrevFC].values[0];
00206 double maxVal = prevOcts[idxOfPrevFC].values[0];
00207 for(int i = idxOfPrevFC; i < 7; i++){
00208 double currVal = prevOcts[i].values[0];
00209 if(currVal < minVal) {
00210 minVal = currVal;
00211 }
00212 if(currVal > maxVal) {
00213 maxVal = currVal;
00214 }
00215 }
00216 for(int i = 0; i < (1 + idxOfPrevFC); i++){
00217 double currVal = tnAndValsList[i].values[0];
00218 if(currVal < minVal) {
00219 minVal = currVal;
00220 }
00221 if(currVal > maxVal) {
00222 maxVal = currVal;
00223 }
00224 }
00225 if((maxVal - minVal) >= thresholdFac) {
00226
00227 if(myFirstFC) {
00228 tmpList.insert(tmpList.end(), tnAndValsList.begin(),
00229 tnAndValsList.begin() + myFirstFC);
00230 }
00231 } else {
00232
00233 if((1 + idxOfPrevFC) < myFirstFC) {
00234 tmpList.insert(tmpList.end(), tnAndValsList.begin() + (1 + idxOfPrevFC),
00235 tnAndValsList.begin() + myFirstFC);
00236 }
00237 }
00238 }
00239 } else {
00240 if(myFirstFC >= 0) {
00241
00242
00243
00244 if(myFirstFC) {
00245 tmpList.insert(tmpList.end(), tnAndValsList.begin(),
00246 tnAndValsList.begin() + myFirstFC);
00247 }
00248 } else if (idxOfPrevFC >= 0) {
00249
00250 double minVal = prevOcts[idxOfPrevFC].values[0];
00251 double maxVal = prevOcts[idxOfPrevFC].values[0];
00252 for(int i = idxOfPrevFC; i < 7; i++){
00253 double currVal = prevOcts[i].values[0];
00254 if(currVal < minVal) {
00255 minVal = currVal;
00256 }
00257 if(currVal > maxVal) {
00258 maxVal = currVal;
00259 }
00260 }
00261 for(int i = 0; i < (1 + idxOfPrevFC); i++){
00262 double currVal = tnAndValsList[i].values[0];
00263 if(currVal < minVal) {
00264 minVal = currVal;
00265 }
00266 if(currVal > maxVal) {
00267 maxVal = currVal;
00268 }
00269 }
00270 if((maxVal - minVal) >= thresholdFac) {
00271
00272 tmpList = tnAndValsList;
00273 } else {
00274
00275 tmpList.insert(tmpList.end(), tnAndValsList.begin() + (1 + idxOfPrevFC),
00276 tnAndValsList.end());
00277 }
00278 } else {
00279
00280 tmpList = tnAndValsList;
00281 }
00282 }
00283
00284
00285 int prevFCidx = myFirstFC;
00286 for(int idx = myFirstFC + 1; idx <= myLastFC; idx++) {
00287 if(tnAndValsList[idx].node.getChildNumber() == 0) {
00288 int fcGap = (idx - prevFCidx);
00289 if(fcGap < 8) {
00290
00291 tmpList.insert(tmpList.end(), tnAndValsList.begin() + prevFCidx,
00292 tnAndValsList.begin() + idx);
00293 } else {
00294
00295
00296 double minVal = tnAndValsList[prevFCidx].values[0];
00297 double maxVal = tnAndValsList[prevFCidx].values[0];
00298 double sumVal = 0;
00299 for(int j = prevFCidx; j < (8 + prevFCidx); j++) {
00300 double currVal = tnAndValsList[j].values[0];
00301 if(currVal < minVal) {
00302 minVal = currVal;
00303 }
00304 if(currVal > maxVal) {
00305 maxVal = currVal;
00306 }
00307 sumVal += currVal;
00308 }
00309 if((maxVal - minVal) >= thresholdFac) {
00310
00311 tmpList.insert(tmpList.end(), tnAndValsList.begin() + prevFCidx,
00312 tnAndValsList.begin() + idx);
00313 } else {
00314
00315 ot::NodeAndValues<double, 1> tmpObj;
00316 tmpObj.node = tnAndValsList[prevFCidx].node.getParent();
00317 tmpObj.values[0] = (sumVal/8.0);
00318 tmpList.push_back(tmpObj);
00319 if((prevFCidx + 8) < idx) {
00320 tmpList.insert(tmpList.end(), tnAndValsList.begin() + prevFCidx + 8,
00321 tnAndValsList.begin() + idx);
00322 }
00323 }
00324 }
00325 prevFCidx = idx;
00326 }
00327 }
00328
00329
00330 if( (myLastFC >= 0) && (idxOfNextFC >= 0) ) {
00331 int fcGap = inSz + idxOfNextFC - myLastFC;
00332 if(fcGap < 8) {
00333
00334 tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC,
00335 tnAndValsList.end());
00336 } else {
00337
00338
00339 double minVal = tnAndValsList[myLastFC].values[0];
00340 double maxVal = tnAndValsList[myLastFC].values[0];
00341 double sumVal = 0;
00342 if((myLastFC + 8) > inSz) {
00343 for(int i = myLastFC; i < inSz; i++){
00344 double currVal = tnAndValsList[i].values[0];
00345 if(currVal < minVal) {
00346 minVal = currVal;
00347 }
00348 if(currVal > maxVal) {
00349 maxVal = currVal;
00350 }
00351 sumVal += currVal;
00352 }
00353 for(int i = 0; i < (myLastFC + 8 - inSz); i++){
00354 double currVal = nextOcts[i].values[0];
00355 if(currVal < minVal) {
00356 minVal = currVal;
00357 }
00358 if(currVal > maxVal) {
00359 maxVal = currVal;
00360 }
00361 sumVal += currVal;
00362 }
00363 } else {
00364 for(int i = myLastFC; i < (myLastFC + 8); i++){
00365 double currVal = tnAndValsList[i].values[0];
00366 if(currVal < minVal) {
00367 minVal = currVal;
00368 }
00369 if(currVal > maxVal) {
00370 maxVal = currVal;
00371 }
00372 sumVal += currVal;
00373 }
00374 }
00375 if((maxVal - minVal) >= thresholdFac) {
00376
00377 tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC,
00378 tnAndValsList.end());
00379 } else {
00380
00381 ot::NodeAndValues<double, 1> tmpObj;
00382 tmpObj.node = tnAndValsList[myLastFC].node.getParent();
00383 tmpObj.values[0] = (sumVal/8.0);
00384 tmpList.push_back(tmpObj);
00385 if((myLastFC + 8) < inSz) {
00386 tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC + 8,
00387 tnAndValsList.end());
00388 }
00389 }
00390 }
00391 } else {
00392 if(myLastFC >= 0) {
00393
00394 double minVal = tnAndValsList[myLastFC].values[0];
00395 double maxVal = tnAndValsList[myLastFC].values[0];
00396 double sumVal = 0;
00397 if((myLastFC + 8) > inSz) {
00398 for(int i = myLastFC; i < inSz; i++){
00399 double currVal = tnAndValsList[i].values[0];
00400 if(currVal < minVal) {
00401 minVal = currVal;
00402 }
00403 if(currVal > maxVal) {
00404 maxVal = currVal;
00405 }
00406 sumVal += currVal;
00407 }
00408 if(rank < (npesCurr - 1)) {
00409 for(int i = 0; i < (myLastFC + 8 - inSz); i++){
00410 double currVal = nextOcts[i].values[0];
00411 if(currVal < minVal) {
00412 minVal = currVal;
00413 }
00414 if(currVal > maxVal) {
00415 maxVal = currVal;
00416 }
00417 sumVal += currVal;
00418 }
00419 }
00420 } else {
00421 for(int i = myLastFC; i < (myLastFC + 8); i++){
00422 double currVal = tnAndValsList[i].values[0];
00423 if(currVal < minVal) {
00424 minVal = currVal;
00425 }
00426 if(currVal > maxVal) {
00427 maxVal = currVal;
00428 }
00429 sumVal += currVal;
00430 }
00431 }
00432 if((maxVal - minVal) >= thresholdFac) {
00433
00434 tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC,
00435 tnAndValsList.end());
00436 } else {
00437
00438 ot::NodeAndValues<double, 1> tmpObj;
00439 tmpObj.node = tnAndValsList[myLastFC].node.getParent();
00440 tmpObj.values[0] = (sumVal/8.0);
00441 tmpList.push_back(tmpObj);
00442 if((myLastFC + 8) < inSz) {
00443 tmpList.insert(tmpList.end(), tnAndValsList.begin() + myLastFC + 8,
00444 tnAndValsList.end());
00445 }
00446 }
00447 } else {
00448
00449
00450 }
00451 }
00452
00453 DendroIntL outSz = tmpList.size();
00454 DendroIntL globOutSize;
00455 par::Mpi_Allreduce<DendroIntL>(&outSz, &globOutSize, 1, MPI_SUM, commCurr);
00456
00457
00458 if(!rank) {
00459 std::cout<<"In RG2O: globInSize: "<<globInSize<<" globOutSize: "
00460 <<globOutSize<<" npesCurr: "<<npesCurr<<std::endl;
00461 }
00462
00463 if(globOutSize == globInSize) {
00464 repeatLoop = false;
00465 }
00466
00467 if(globOutSize < MIN_GRAIN_SIZE) {
00468 repeatLoop = false;
00469 }
00470
00471
00472 globInSize = globOutSize;
00473 tnAndValsList = tmpList;
00474 tmpList.clear();
00475
00476 if(globInSize < (MIN_GRAIN_SIZE*npesCurr)) {
00477 int splittingSize = (globInSize/MIN_GRAIN_SIZE);
00478 if(splittingSize == 0) {
00479 splittingSize = 1;
00480 }
00481
00482 unsigned int avgLoad = (globInSize/splittingSize);
00483 int leftOvers = (globInSize - (splittingSize*avgLoad));
00484
00485 if(rank >= splittingSize) {
00486 par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, 0, commCurr);
00487 }else if(rank < leftOvers) {
00488 par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, (avgLoad + 1), commCurr);
00489 }else {
00490 par::scatterValues<ot::NodeAndValues<double, 1> >(tnAndValsList, tmpList, avgLoad, commCurr);
00491 }
00492 tnAndValsList = tmpList;
00493 tmpList.clear();
00494
00495 MPI_Comm newComm;
00496 par::splitCommUsingSplittingRank(splittingSize, &newComm, commCurr);
00497 commCurr = newComm;
00498 npesCurr = splittingSize;
00499
00500 if(rank >= splittingSize) {
00501 repeatLoop = false;
00502 }
00503 } else {
00504 par::partitionW<ot::NodeAndValues<double, 1> >(tnAndValsList, NULL, commCurr);
00505 }
00506
00507 }
00508
00509 linOct.clear();
00510 for(int i = 0; i < tnAndValsList.size(); i++) {
00511 linOct.push_back(tnAndValsList[i].node);
00512 }
00513
00514 PROF_RG2O_END
00515 }
00516
00517
00518 int completeOctree(const std::vector<TreeNode > & inp,
00519 std::vector<TreeNode > & out,
00520 unsigned int dim, unsigned int maxDepth, bool isUnique,
00521 bool isSorted, bool assertNoEmptyProcs, MPI_Comm comm) {
00522
00523 PROF_N2O_BEGIN
00524
00525 TreeNode root (dim, maxDepth);
00526
00527 int size;
00528 MPI_Comm_size(comm, &size);
00529
00530 if(size == 1) {
00531 completeSubtree(root, inp, out, dim, maxDepth, isUnique, isSorted);
00532 PROF_N2O_END
00533 }
00534
00535 out = inp;
00536
00537
00538 if (!isUnique) {
00539 par::removeDuplicates<ot::TreeNode>(out, isSorted, comm);
00540 } else if (!isSorted) {
00541 std::vector<TreeNode> tmpOut;
00542 par::sampleSort<ot::TreeNode>(out, tmpOut,comm);
00543 out = tmpOut;
00544 tmpOut.clear();
00545 }
00546
00547
00548 MPI_Comm new_comm;
00549 if(assertNoEmptyProcs) {
00550 new_comm = comm;
00551 assert(!out.empty());
00552 } else {
00553 par::splitComm2way(out.empty(), &new_comm, comm);
00554 }
00555
00556 if(!(out.empty())) {
00557 int new_rank, new_size;
00558
00559 MPI_Comm_rank (new_comm, &new_rank);
00560 MPI_Comm_size (new_comm, &new_size);
00561
00562 MPI_Request requestSend;
00563 MPI_Request requestRecv;
00564
00565 ot::TreeNode begBuf;
00566 ot::TreeNode lastElem;
00567
00568 if(new_rank) {
00569
00570 par::Mpi_Irecv<ot::TreeNode>(&begBuf, 1, (new_rank-1) , 1, new_comm, &requestRecv);
00571 }
00572
00573 if (new_rank < (new_size-1)) {
00574 lastElem = out[out.size() - 1];
00575
00576 par::Mpi_Issend<ot::TreeNode>(&lastElem, 1, (new_rank+1), 1, new_comm, &requestSend);
00577 }
00578
00579
00580
00581
00582 if (new_rank == 0) {
00583 ot::TreeNode minCorner(0,0,0,maxDepth,dim,maxDepth);
00584 #ifdef __DEBUG_OCT__
00585 assert(areComparable(out[0], minCorner));
00586 #endif
00587 if ( (out[0] != minCorner) && (!out[0].isAncestor(minCorner)) ) {
00588 ot::TreeNode ncaTmp = getNCA(out[0],minCorner);
00589 std::vector<ot::TreeNode> kids;
00590 ncaTmp.addChildren(kids);
00591 out.insert(out.begin(),kids[0]);
00592 kids.clear();
00593 }
00594 }
00595
00596
00597 if (new_rank == (new_size-1) ) {
00598 ot::TreeNode maxCorner(((1u << maxDepth)-1),
00599 ((1u << maxDepth)-1),((1u << maxDepth)-1),maxDepth,dim,maxDepth);
00600 #ifdef __DEBUG_OCT__
00601 assert(areComparable(out[out.size()-1], maxCorner));
00602 #endif
00603 if ( (out[out.size()-1] != maxCorner) &&
00604 (!out[out.size()-1].isAncestor(maxCorner)) ) {
00605 ot::TreeNode ncaTmp = getNCA(out[out.size()-1],maxCorner);
00606 std::vector<ot::TreeNode> kids;
00607 ncaTmp.addChildren(kids);
00608 out.insert(out.end(),kids[(1 << dim)-1]);
00609 kids.clear();
00610 }
00611 }
00612
00613 std::vector<ot::TreeNode> tmpList;
00614 for(unsigned int i = 0; i < (out.size()-1); i++) {
00615 #ifdef __DEBUG_OCT__
00616 assert(areComparable(out[i], out[i+1]));
00617 #endif
00618 if(out[i].isAncestor(out[i+1])) {
00619 appendCompleteRegion(out[i], out[i+1], tmpList, false, false);
00620 } else {
00621 appendCompleteRegion(out[i], out[i+1], tmpList, true, false);
00622 }
00623 }
00624
00625
00626
00627
00628 if(new_rank == (new_size - 1) ) {
00629 tmpList.push_back(out[out.size() - 1]);
00630 }
00631
00632 if(new_rank) {
00633 MPI_Status statusWait;
00634 MPI_Wait(&requestRecv, &statusWait);
00635
00636 std::vector<ot::TreeNode> begList;
00637 #ifdef __DEBUG_OCT__
00638 assert(areComparable(begBuf, out[0]));
00639 #endif
00640 if(begBuf.isAncestor(out[0])) {
00641 appendCompleteRegion(begBuf, out[0], begList, false, false);
00642 } else {
00643 appendCompleteRegion(begBuf, out[0], begList, true, false);
00644 }
00645 out = begList;
00646 begList.clear();
00647 out.insert(out.end(), tmpList.begin(), tmpList.end());
00648 } else {
00649 out = tmpList;
00650 }
00651
00652 tmpList.clear();
00653
00654 if(new_rank < (new_size-1)) {
00655 MPI_Status statusWait;
00656 MPI_Wait(&requestSend, &statusWait);
00657 }
00658
00659 }
00660
00661
00662 PROF_N2O_END
00663
00664 }
00665
00666
00667 int completeSubtree(TreeNode block, const std::vector<TreeNode > & inp, std::vector<TreeNode > & out,
00668 unsigned int dim, unsigned int maxDepth, bool isUnique, bool isSorted) {
00669
00670 PROF_N2O_SEQ_BEGIN
00671
00672 out = inp;
00673
00674 if (!isUnique) {
00675 seq::makeVectorUnique<TreeNode>(out, isSorted) ;
00676 } else if (!isSorted) {
00677 sort(out.begin(),out.end());
00678 }
00679
00680 if (!out.empty()) {
00681
00682
00683 ot::TreeNode minCorner = block.getDFD();
00684 #ifdef __DEBUG_OCT__
00685 assert(areComparable(out[0], minCorner));
00686 #endif
00687 if ( (out[0] != minCorner) && (!out[0].isAncestor(minCorner)) ) {
00688 ot::TreeNode ncaTmp = getNCA(out[0],minCorner);
00689 std::vector<ot::TreeNode> kids;
00690 ncaTmp.addChildren(kids);
00691 out.insert(out.begin(),kids[0]);
00692 kids.clear();
00693 }
00694
00695
00696 ot::TreeNode maxCorner = block.getDLD();
00697 #ifdef __DEBUG_OCT__
00698 assert(areComparable(out[out.size()-1], maxCorner));
00699 #endif
00700 if ( (out[out.size()-1] != maxCorner) && (!out[out.size()-1].isAncestor(maxCorner)) ) {
00701 ot::TreeNode ncaTmp = getNCA(out[out.size()-1],maxCorner);
00702 std::vector<ot::TreeNode> kids;
00703 ncaTmp.addChildren(kids);
00704 out.insert(out.end(),kids[(1 << dim)-1]);
00705 kids.clear();
00706 }
00707
00708 std::vector<ot::TreeNode> tmpList;
00709 for(unsigned int i = 0; i < (out.size()-1); i++) {
00710 #ifdef __DEBUG_OCT__
00711 assert(areComparable(out[i], out[i+1]));
00712 #endif
00713 if(out[i].isAncestor(out[i+1])) {
00714 appendCompleteRegion(out[i], out[i+1], tmpList, false, false);
00715 } else {
00716 appendCompleteRegion(out[i], out[i+1], tmpList, true, false);
00717 }
00718 }
00719
00720 tmpList.push_back(out[out.size() - 1]);
00721
00722 out = tmpList;
00723 tmpList.clear();
00724 }
00725
00726 PROF_N2O_SEQ_END
00727
00728 }
00729
00730 int points2Octree(std::vector<double>& pts, double * gLens, std::vector<TreeNode> & nodes,
00731 unsigned int dim, unsigned int maxDepth, unsigned int maxNumPts, MPI_Comm comm ) {
00732
00733 PROF_P2O_BEGIN
00734
00735 int size;
00736 MPI_Comm_size(comm, &size);
00737
00738
00739 if (size == 1) {
00740 points2OctreeSeq(pts, gLens, nodes, dim, maxDepth, maxNumPts);
00741 PROF_P2O_END
00742 }
00743
00744 int rank;
00745 MPI_Comm_rank(comm, &rank);
00746
00747 TreeNode root (dim, maxDepth);
00748
00749 if (maxDepth == 0) {
00750 if (!rank) {
00751 nodes.resize(1);
00752 nodes[0] = root;
00753 } else {
00754 nodes.resize(0);
00755 }
00756 PROF_P2O_END
00757 }
00758
00759 unsigned int ptsLen = pts.size();
00760 assert( (dim == 1) || (dim == 2) || (dim == 3 ));
00761 assert( (ptsLen%dim) == 0);
00762
00763 DendroIntL numNodes = (ptsLen/dim);
00764
00765 DendroIntL totSize;
00766 par::Mpi_Allreduce<DendroIntL>(&numNodes, &totSize, 1, MPI_SUM, comm);
00767
00768 if (totSize <= static_cast<DendroIntL>(maxNumPts)) {
00769 if (!rank) {
00770 nodes.resize(1);
00771 nodes[0] = root;
00772 } else {
00773 nodes.resize(0);
00774 }
00775 PROF_P2O_END
00776 }
00777
00778
00779
00780 const DendroIntL THOUSAND = 1000;
00781 if (totSize < (THOUSAND*size)) {
00782 int splittingSize = (totSize/THOUSAND);
00783
00784 if(splittingSize == 0) {
00785 splittingSize = 1;
00786 }
00787
00788 unsigned int avgLoad = (totSize/splittingSize);
00789 int leftOvers = (totSize - (splittingSize*avgLoad));
00790
00791 std::vector<double> tmpPts;
00792 if(rank >= splittingSize) {
00793 par::scatterValues<double>(pts, tmpPts, 0, comm);
00794 }else if(rank < leftOvers) {
00795 par::scatterValues<double>(pts, tmpPts, (dim*(avgLoad+1)), comm);
00796 }else {
00797 par::scatterValues<double>(pts, tmpPts, (dim*avgLoad), comm);
00798 }
00799 pts.clear();
00800
00801 MPI_Comm newComm;
00802 par::splitCommUsingSplittingRank(splittingSize, &newComm, comm);
00803
00804 #ifndef __SILENT_MODE__
00805 if(!rank) {
00806 std::cout<<"Input to p2o is small ("<<totSize
00807 <<"). npes = "<<size<<" Splitting Comm. "<<std::endl;
00808 }
00809 #endif
00810
00811 if(rank < splittingSize) {
00812 points2Octree(tmpPts, gLens, nodes, dim, maxDepth, maxNumPts, newComm);
00813 }
00814 tmpPts.clear();
00815
00816 PROF_P2O_END
00817 }
00818
00819
00820
00821 nodes.resize(numNodes);
00822
00823 for (DendroIntL i = 0; i < numNodes; i++) {
00824
00825
00826 unsigned int px = (unsigned int)(pts[i*dim]*((double)(1u << maxDepth))/(gLens[0]));
00827 unsigned int py, pz = 0;
00828 if(dim > 1) {
00829 py = (unsigned int)(pts[(i*dim)+1]*((double)(1u << maxDepth))/gLens[1]);
00830 if(dim > 2) {
00831 pz = (unsigned int)(pts[(i*dim)+2]*((double)(1u << maxDepth))/gLens[2]);
00832 }
00833 }
00834 nodes[i] = TreeNode(px, py, pz, maxDepth, dim, maxDepth);
00835 }
00836
00837 pts.clear();
00838
00839 std::vector<ot::TreeNode> tmpNodes ;
00840
00841
00842 par::sampleSort<ot::TreeNode>(nodes, tmpNodes, comm);
00843 nodes = tmpNodes;
00844 tmpNodes.clear();
00845
00846 std::vector<ot::TreeNode> leaves;
00847 std::vector<ot::TreeNode> minsAllBlocks;
00848
00849 blockPartStage1_p2o(nodes, leaves, dim, maxDepth, comm);
00850 blockPartStage2_p2o(nodes, leaves, minsAllBlocks, dim, maxDepth, comm);
00851
00852
00853 p2oLocal(nodes, leaves, maxNumPts, dim, maxDepth);
00854
00855 PROF_P2O_END
00856
00857 }
00858
00859
00860 int points2OctreeSeq(std::vector<double>& pts, double * gLens, std::vector<TreeNode> & nodes,
00861 unsigned int dim, unsigned int maxDepth, unsigned int maxNumPts) {
00862
00863 PROF_P2O_SEQ_BEGIN
00864
00865 if (maxDepth == 0) {
00866 nodes.resize(1);
00867 nodes[0] = TreeNode(dim,maxDepth);
00868 PROF_P2O_SEQ_END
00869 }
00870
00871 unsigned int ptsLen = pts.size();
00872 assert( (dim == 1) || (dim == 2) || (dim == 3 ));
00873 assert( (ptsLen%dim) == 0);
00874
00875 TreeNode root (dim,maxDepth);
00876 unsigned int numNodes = ptsLen/dim;
00877 nodes.resize(numNodes);
00878
00879 for (unsigned int i = 0; i < numNodes; i++) {
00880
00881
00882 unsigned int px = (unsigned int)(pts[i*dim]*((double)(1u << maxDepth))/(gLens[0]));
00883 unsigned int py, pz = 0;
00884 if(dim > 1) {
00885 py = (unsigned int)(pts[(i*dim)+1]*((double)(1u << maxDepth))/gLens[1]);
00886 if(dim > 2) {
00887 pz = (unsigned int)(pts[(i*dim)+2]*((double)(1u << maxDepth))/gLens[2]);
00888 }
00889 }
00890 nodes[i] = TreeNode(px, py, pz, maxDepth, dim, maxDepth);
00891 }
00892
00893 pts.clear();
00894
00895 unsigned int totSize = nodes.size();
00896
00897 if (totSize <= maxNumPts) {
00898 nodes.resize(1);
00899 nodes[0] = root;
00900 PROF_P2O_SEQ_END
00901 }
00902
00903
00904 sort(nodes.begin(), nodes.end());
00905
00906 std::vector<TreeNode> leaves;
00907 leaves.push_back(root);
00908
00909 p2oLocal(nodes, leaves, maxNumPts, dim, maxDepth);
00910
00911 PROF_P2O_SEQ_END
00912
00913 }
00914
00915 int p2oLocal(std::vector<TreeNode> & nodes, std::vector<TreeNode>& leaves,
00916 unsigned int maxNumPts, unsigned int dim, unsigned int maxDepth) {
00917 PROF_P2O_LOCAL_BEGIN
00918
00919 std::vector<TreeNode> nodesDup = nodes;
00920 nodes.clear();
00921
00922 std::vector<TreeNode> addList;
00923 std::vector<TreeNode> nodesTmp;
00924
00925 std::vector<TreeNode> *nodeSrc = &nodes;
00926 std::vector<TreeNode> *nodeDest = &nodesTmp;
00927 std::vector<TreeNode> *leafSrc = &leaves;
00928 std::vector<TreeNode> *leafDest = &addList;
00929
00930
00931 do {
00932
00933
00934 for (unsigned int i = 0; i < leafSrc->size(); i++) {
00935 (*leafSrc)[i].setWeight(0);
00936 }
00937
00938
00939 unsigned int nextNode = 0;
00940 unsigned int nextPt = 0;
00941
00942
00943
00944
00945
00946
00947
00948
00949
00950 while (nextPt < nodesDup.size()) {
00951 if (nodesDup[nextPt] >= (*leafSrc)[nextNode]) {
00952 #ifdef __DEBUG_OCT__
00953 assert(areComparable((*leafSrc)[nextNode], nodesDup[nextPt]));
00954 #endif
00955 if (((*leafSrc)[nextNode].isAncestor(nodesDup[nextPt])) ||
00956 ((*leafSrc)[nextNode] == nodesDup[nextPt])) {
00957 (*leafSrc)[nextNode].addWeight(1);
00958 nextPt++;
00959 } else {
00960 nextNode++;
00961 if (nextNode == leafSrc->size()) {
00962
00963 break;
00964 }
00965 }
00966 } else {
00967 nextPt++;
00968 }
00969 }
00970
00971 leafDest->resize((1 << dim)*(leafSrc->size()));
00972 unsigned int addListSz=0;
00973 unsigned int nodesPrevSz = nodeSrc->size();
00974 nodeSrc->resize(nodesPrevSz + (leafSrc->size()));
00975 nodeDest->resize(nodeSrc->size());
00976 unsigned int tmpCtr=0;
00977 unsigned int nodesCtr=0;
00978
00979 for (unsigned int i = 0; i < leafSrc->size(); i++) {
00980 if ( ((*leafSrc)[i].getWeight() > maxNumPts) &&
00981 ((*leafSrc)[i].getLevel() < maxDepth) ) {
00982 TreeNode thisNode = (*leafSrc)[i];
00983 std::vector<TreeNode> children;
00984 thisNode.addChildren(children);
00985 for (int ci = 0; ci < (1 << dim); ci++) {
00986 (*leafDest)[addListSz] = children[ci];
00987 addListSz++;
00988 }
00989 children.clear();
00990 } else {
00991 while ((nodesCtr < nodesPrevSz) &&
00992 ((*nodeSrc)[nodesCtr] < (*leafSrc)[i]) ) {
00993 (*nodeDest)[tmpCtr++] = (*nodeSrc)[nodesCtr++];
00994 }
00995 (*nodeDest)[tmpCtr++] = (*leafSrc)[i];
00996 }
00997 }
00998
00999 for (unsigned int i = nodesCtr; i < nodesPrevSz; i++) {
01000 (*nodeDest)[tmpCtr++] = (*nodeSrc)[i];
01001 }
01002
01003 leafDest->resize(addListSz);
01004 nodeDest->resize(tmpCtr);
01005
01006
01007 std::vector<TreeNode> *tmpPtr = nodeSrc;
01008 nodeSrc = nodeDest;
01009 nodeDest = tmpPtr;
01010 tmpPtr = leafSrc;
01011 leafSrc = leafDest;
01012 leafDest = tmpPtr;
01013 leafDest->clear();
01014 nodeDest->clear();
01015 } while (!leafSrc->empty());
01016
01017 if ( nodeSrc != &nodes ) {
01018 nodes = nodesTmp;
01019 }
01020 nodesTmp.clear();
01021
01022
01023 for (unsigned int i = 0; i < nodes.size(); i++) {
01024 nodes[i].setWeight(1);
01025 }
01026
01027 PROF_P2O_LOCAL_END
01028 }
01029
01030
01031
01032 int appendCompleteRegion(TreeNode first, TreeNode second,
01033 std::vector<ot::TreeNode>& newNodes, bool includeMin, bool includeMax) {
01034
01035 PROF_COMPLETE_REGION_BEGIN
01036
01037 unsigned int dim = first.getDim();
01038 unsigned int maxDepth = first.getMaxDepth();
01039
01040 TreeNode min = ((first < second) ? first : second);
01041
01042 if(includeMin) {
01043 newNodes.push_back(min);
01044 }
01045
01046 if (first == second) {
01047 PROF_COMPLETE_REGION_END
01048 }
01049
01050 TreeNode max = ((first > second) ? first : second);
01051
01052
01053 TreeNode nca = getNCA(min,max);
01054
01055 if(min == nca) {
01056
01057 ot::TreeNode tmpAncestor = min;
01058 bool repeatLoop;
01059 do {
01060 repeatLoop = false;
01061 std::vector<ot::TreeNode> tmpChildList;
01062 tmpAncestor.addChildren(tmpChildList);
01063 for(unsigned int j = 0; j < tmpChildList.size(); j++) {
01064 #ifdef __DEBUG_OCT__
01065 assert(areComparable(tmpChildList[j], max));
01066 #endif
01067 if( (tmpChildList[j] < max) &&
01068 (!(tmpChildList[j].isAncestor(max))) ) {
01069 newNodes.push_back(tmpChildList[j]);
01070 } else if(tmpChildList[j].isAncestor(max)) {
01071 tmpAncestor = tmpChildList[j];
01072 repeatLoop = true;
01073 break;
01074 } else {
01075 assert(tmpChildList[j] == max);
01076 break;
01077 }
01078 }
01079 } while (repeatLoop);
01080 } else {
01081 TreeNode currentNode = min;
01082 while(currentNode > nca) {
01083 TreeNode parentOfCurrent = currentNode.getParent();
01084 std::vector<ot::TreeNode> myBros;
01085 parentOfCurrent.addChildren(myBros);
01086 for(unsigned int i = 0; i < myBros.size(); i++) {
01087 #ifdef __DEBUG_OCT__
01088 assert(areComparable(myBros[i], max));
01089 #endif
01090 if( (myBros[i] > min) &&
01091 (myBros[i] < max) && (!(myBros[i].isAncestor(max))) ) {
01092
01093 newNodes.push_back(myBros[i]);
01094 } else if(myBros[i].isAncestor(max)) {
01095
01096
01097
01098
01099
01100
01101 ot::TreeNode tmpAncestor = myBros[i];
01102 bool repeatLoop;
01103 do {
01104 repeatLoop = false;
01105 std::vector<ot::TreeNode> tmpChildList;
01106 tmpAncestor.addChildren(tmpChildList);
01107 for(unsigned int j = 0; j < tmpChildList.size(); j++) {
01108 #ifdef __DEBUG_OCT__
01109 assert(areComparable(tmpChildList[j], max));
01110 #endif
01111 if( (tmpChildList[j] < max) &&
01112 (!(tmpChildList[j].isAncestor(max))) ) {
01113 newNodes.push_back(tmpChildList[j]);
01114 } else if(tmpChildList[j].isAncestor(max)) {
01115 tmpAncestor = tmpChildList[j];
01116 repeatLoop = true;
01117 break;
01118 } else {
01119 assert(tmpChildList[j] == max);
01120 break;
01121 }
01122 }
01123 } while (repeatLoop);
01124 break;
01125 }
01126 }
01127 currentNode = parentOfCurrent;
01128 }
01129 }
01130
01131 if(includeMax) {
01132 newNodes.push_back(max);
01133 }
01134
01135 PROF_COMPLETE_REGION_END
01136 }
01137
01138
01139
01140
01141
01142
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206 }
01207