00001
00008 #include "TreeNode.h"
00009 #include "parUtils.h"
00010 #include "dendro.h"
00011
00012 #ifdef __DEBUG__
00013 #ifndef __DEBUG_OCT__
00014 #define __DEBUG_OCT__
00015 #endif
00016 #endif
00017
00018 namespace ot {
00019
00020 int simpleCoarsen(const std::vector<TreeNode > &in, std::vector<TreeNode> &out, MPI_Comm comm) {
00021 PROF_SIMPLE_COARSE_BEGIN
00022
00023 out.clear();
00024
00025 std::vector<TreeNode> tmpIn = in;
00026
00027 MPI_Comm new_comm;
00028 par::splitComm2way(tmpIn.empty(), &new_comm, comm);
00029
00030 if(!tmpIn.empty()) {
00031 for(int i = 0; i < tmpIn.size(); i++) {
00032 out.push_back(tmpIn[i].getParent());
00033 }
00034
00035 std::vector<TreeNode> tmpOut;
00036 unsigned int dim = tmpIn[0].getDim();
00037 unsigned int maxDepth = tmpIn[0].getMaxDepth();
00038
00039
00040
00041
00042 completeOctree(out, tmpOut, dim, maxDepth, false, false, false, new_comm);
00043
00044 out = tmpOut;
00045 tmpOut.clear();
00046 }
00047
00048 PROF_SIMPLE_COARSE_END
00049 }
00050
00051
00052
00053 int coarsenOctree(const std::vector<TreeNode > &in, std::vector<TreeNode> &out) {
00054 PROF_COARSE_SEQ_BEGIN
00055 unsigned int inSz = static_cast<unsigned int>(in.size());
00056 out.clear();
00057
00058 unsigned int dim;
00059 if(inSz) {
00060 dim = in[0].getDim();
00061 }else {
00062 PROF_COARSE_SEQ_END
00063 }
00064
00065 if (inSz < (1 << dim)) {
00066 out = in;
00067 std::cout<<"WARNING: Too Few Octants to Coarse."<<std::endl;
00068 PROF_COARSE_SEQ_END
00069 }
00070
00071 assert(in[0].getChildNumber() == 0);
00072
00073 int prevFCidx = 0;
00074
00075 while (prevFCidx < inSz) {
00076 int currFCidx = (prevFCidx + 1);
00077
00078
00079 while( (currFCidx < inSz) && (in[currFCidx].getChildNumber() != 0) ) {
00080 currFCidx++;
00081 }
00082 if(currFCidx >= (prevFCidx + (1 << dim))) {
00083
00084
00085 out.push_back(in[prevFCidx].getParent());
00086 if( (prevFCidx + (1 << dim)) < currFCidx ) {
00087 out.insert(out.end(),
00088 (in.begin() + (prevFCidx + (1 << dim))), (in.begin() + currFCidx));
00089 }
00090 } else {
00091
00092 if(prevFCidx < currFCidx) {
00093 out.insert(out.end(),
00094 (in.begin() + prevFCidx), (in.begin() + currFCidx));
00095 }
00096 }
00097 prevFCidx = currFCidx;
00098 }
00099
00100 PROF_COARSE_SEQ_END
00101 }
00102
00103
00104
00105
00106 int coarsenOctree(const std::vector<TreeNode > &in, std::vector<TreeNode> &out,
00107 unsigned int dim, unsigned int maxDepth, MPI_Comm comm,
00108 bool skipPartition, MPI_Comm* newCommPtr, bool* iAmActive) {
00109 PROF_COARSE_BEGIN
00110
00111 int size;
00112 MPI_Comm_size(comm, &size);
00113 out.clear();
00114
00115 if(newCommPtr) {
00116 *newCommPtr = comm;
00117 }
00118
00119 if(iAmActive) {
00120 *iAmActive = true;
00121 }
00122
00124
00125 if (size == 1) {
00126
00127 #ifndef __SILENT_MODE__
00128 std::cout<<"Coarsen Octree. inpSize: "<<in.size()<<" activeNpes: 1"<<std::endl;
00129 #endif
00130
00131 coarsenOctree(in,out);
00132 PROF_COARSE_END
00133 }
00134
00135 std::vector<TreeNode> tmpIn = in;
00136 DendroIntL inSz = tmpIn.size();
00137
00139
00140 DendroIntL globSize;
00141 par::Mpi_Allreduce<DendroIntL>(&inSz, &globSize, 1, MPI_SUM, comm);
00142
00143 int rank;
00144 MPI_Comm_rank(comm, &rank);
00145
00146
00147 const DendroIntL THOUSAND = 1000;
00148 if (globSize < (THOUSAND*size)) {
00149 int splittingSize = (globSize/THOUSAND);
00150 if(splittingSize == 0) {
00151 splittingSize = 1;
00152 }
00153
00154 unsigned int avgLoad = (globSize/splittingSize);
00155 int leftOvers = (globSize - (splittingSize*avgLoad));
00156
00157 std::vector<ot::TreeNode> tmpIn2;
00158 if(rank >= splittingSize) {
00159 par::scatterValues<ot::TreeNode>(tmpIn, tmpIn2, 0, comm);
00160 }else if(rank < leftOvers) {
00161 par::scatterValues<ot::TreeNode>(tmpIn, tmpIn2, (avgLoad+1), comm);
00162 }else {
00163 par::scatterValues<ot::TreeNode>(tmpIn, tmpIn2, avgLoad, comm);
00164 }
00165 tmpIn.clear();
00166
00167 MPI_Comm newComm;
00168 par::splitCommUsingSplittingRank(splittingSize, &newComm, comm);
00169
00170 #ifndef __SILENT_MODE__
00171 if(!rank) {
00172 std::cout<<"Input to Coarsen is small ("<<globSize
00173 <<"). npes = "<<size<<" Splitting Comm. "<<std::endl;
00174 }
00175 #endif
00176
00177 if(rank < splittingSize) {
00178 coarsenOctree(tmpIn2, out, dim, maxDepth, newComm, true, NULL, NULL);
00179 } else {
00180 if(iAmActive) {
00181 *iAmActive = false;
00182 }
00183 }
00184 tmpIn2.clear();
00185
00186 if(newCommPtr) {
00187 *newCommPtr = newComm;
00188 }
00189
00190 PROF_COARSE_END
00191 }
00192
00193 #ifndef __SILENT_MODE__
00194 if(!rank) {
00195 std::cout<<"Coarsen Octree. inpSize: "<<globSize <<" activeNpes: "<<size<<std::endl;
00196 }
00197 #endif
00198
00200
00201
00202 if(!skipPartition) {
00203 par::partitionW<ot::TreeNode>(tmpIn, NULL, comm);
00204 }
00205
00206 inSz = tmpIn.size();
00207
00208 int firstFCidx = 0;
00209
00210 while( (firstFCidx < inSz) && (tmpIn[firstFCidx].getChildNumber() != 0) ) {
00211 firstFCidx++;
00212 }
00213
00214 if(firstFCidx == inSz) {
00215 firstFCidx = -1;
00216 }
00217
00218 int lastIdxToSend = -1;
00219
00220 int lastFCidx = -1;
00221
00222 if(firstFCidx >= 0) {
00223 if(rank < (size-1)) {
00224 lastFCidx = (inSz - 1);
00225 while( (lastFCidx >= 0) && (tmpIn[lastFCidx].getChildNumber() != 0) ) {
00226 lastFCidx--;
00227 }
00228
00229 if(lastFCidx >= 0) {
00230 lastIdxToSend = (inSz - lastFCidx);
00231 }
00232 } else {
00233 lastFCidx = inSz;
00234 }
00235 }
00236
00237 MPI_Request sendFirstIdxRequest;
00238 MPI_Request sendLastIdxRequest;
00239 MPI_Request recvFirstIdxRequest;
00240 MPI_Request recvLastIdxRequest;
00241
00242 int lastFCidxOfPrevP;
00243 int firstFCidxOfNextP;
00244
00245 if(rank) {
00246
00247 par::Mpi_Irecv<int>(&lastFCidxOfPrevP, 1, (rank-1),
00248 1, comm, &recvLastIdxRequest);
00249
00250
00251 par::Mpi_Issend<int>(&firstFCidx, 1, (rank-1),
00252 2, comm, &sendFirstIdxRequest);
00253 }
00254
00255 if(rank < (size-1)) {
00256
00257 par::Mpi_Irecv<int>(&firstFCidxOfNextP, 1, (rank+1),
00258 2, comm, &recvFirstIdxRequest);
00259
00260
00261 par::Mpi_Issend<int>(&lastIdxToSend, 1, (rank+1),
00262 1, comm, &sendLastIdxRequest);
00263 }
00264
00265
00266 int prevFCidx = firstFCidx;
00267
00268
00269
00270
00271
00272
00273
00274
00275 while (prevFCidx < lastFCidx) {
00276 int currFCidx = (prevFCidx + 1);
00277 while( (currFCidx < lastFCidx) &&
00278 (tmpIn[currFCidx].getChildNumber() != 0) ) {
00279 currFCidx++;
00280 }
00281 if(currFCidx >= (prevFCidx + (1 << dim))) {
00282
00283
00284 out.push_back(tmpIn[prevFCidx].getParent());
00285 if((prevFCidx + (1 << dim)) < currFCidx) {
00286 out.insert(out.end(),
00287 (tmpIn.begin() + (prevFCidx + (1 << dim))),
00288 (tmpIn.begin() + currFCidx));
00289 }
00290 } else {
00291
00292 if(prevFCidx < currFCidx) {
00293 out.insert(out.end(),
00294 (tmpIn.begin() + prevFCidx), (tmpIn.begin() + currFCidx));
00295 }
00296 }
00297 prevFCidx = currFCidx;
00298 }
00299
00300 if(rank) {
00301 MPI_Status statusWait;
00302 MPI_Wait(&recvLastIdxRequest, &statusWait);
00303
00304 if ( (lastFCidxOfPrevP >= 0) && (firstFCidx >= 0) ) {
00305 if ( (lastFCidxOfPrevP + firstFCidx) >= (1 << dim) ) {
00306 int stIdx = ((1 << dim) - lastFCidxOfPrevP);
00307 if(stIdx < 0) {
00308 stIdx = 0;
00309 }
00310
00311
00312 if(stIdx < firstFCidx) {
00313 out.insert(out.begin(),
00314 (tmpIn.begin() + stIdx), (tmpIn.begin() + firstFCidx) );
00315 }
00316 } else {
00317
00318 if(firstFCidx) {
00319 out.insert(out.begin(), tmpIn.begin(),
00320 (tmpIn.begin() + firstFCidx) );
00321 }
00322 }
00323 } else {
00324
00325 if (lastFCidxOfPrevP < 0) {
00326
00327
00328
00329
00330
00331
00332 if (firstFCidx >= 0) {
00333 if (firstFCidx) {
00334 out.insert(out.begin(), tmpIn.begin(),
00335 (tmpIn.begin() + firstFCidx));
00336 }
00337 } else {
00338
00339
00340 assert(out.empty());
00341 out = tmpIn;
00342 }
00343 } else {
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355 assert(out.empty());
00356 if (lastFCidxOfPrevP < (1 << dim) ) {
00357 int stIdx = ((1 << dim) - lastFCidxOfPrevP);
00358 out.insert(out.begin(), (tmpIn.begin() + stIdx), tmpIn.end());
00359 } else {
00360 out = tmpIn;
00361 }
00362 }
00363 }
00364 }
00365
00366 if (rank < (size-1)) {
00367 MPI_Status statusWait;
00368 MPI_Wait(&recvFirstIdxRequest, &statusWait);
00369
00370 if ( (lastIdxToSend >= 0) && (firstFCidxOfNextP >= 0) ) {
00371 if ( (lastIdxToSend + firstFCidxOfNextP) >= (1 << dim) ) {
00372 out.push_back(tmpIn[lastFCidx].getParent());
00373 if ( (lastFCidx + (1 << dim)) < inSz ) {
00374 out.insert(out.end(),
00375 (tmpIn.begin() + (lastFCidx + (1 << dim))),
00376 (tmpIn.begin() + inSz));
00377 }
00378 } else {
00379 out.insert(out.end(), (tmpIn.begin() + lastFCidx),
00380 (tmpIn.begin() + inSz));
00381 }
00382 } else {
00383
00384 if (lastIdxToSend < 0) {
00385
00386
00387
00388
00389
00390 } else {
00391
00392
00393
00394
00395 out.push_back(tmpIn[lastFCidx].getParent());
00396 if ( (lastFCidx + (1 << dim)) < inSz ) {
00397 out.insert(out.end(),
00398 (tmpIn.begin() + (lastFCidx + (1 << dim))),
00399 (tmpIn.begin() + inSz));
00400 }
00401 }
00402 }
00403 }
00404
00405 if (rank) {
00406 MPI_Status statusWait;
00407 MPI_Wait(&sendFirstIdxRequest, &statusWait);
00408 }
00409
00410 if (rank < (size-1)) {
00411 MPI_Status statusWait;
00412 MPI_Wait(&sendLastIdxRequest, &statusWait);
00413 }
00414
00415 PROF_COARSE_END
00416 }
00417
00418 }
00419
00420