Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

odaUtils.C

Go to the documentation of this file.
00001 
00008 #include "mpi.h"
00009 #include "odaUtils.h"
00010 #include "TreeNode.h"
00011 #include "nodeAndValues.h"
00012 #include <cstdio>
00013 #include <iostream>
00014 #include <cassert>
00015 #include "oda.h"
00016 #include "parUtils.h"
00017 #include "seqUtils.h"
00018 
00019 #ifdef __DEBUG__
00020 #ifndef __DEBUG_DA__
00021 #define __DEBUG_DA__
00022 #endif
00023 #endif
00024 
00025 #ifdef __DEBUG_DA__
00026 #ifndef __DEBUG_DA_PUBLIC__
00027 #define __DEBUG_DA_PUBLIC__
00028 #endif
00029 #endif
00030 
00031 namespace ot {
00032 
00033   extern double**** ShapeFnCoeffs; 
00034 
00035   void interpolateData(ot::DA* da, Vec in, Vec out, Vec* gradOut,
00036       unsigned int dof, std::vector<double>& pts) {
00037 
00038     int rank = da->getRankAll();
00039     int npes = da->getNpesAll();
00040     MPI_Comm comm = da->getComm();
00041 
00042     std::vector<ot::TreeNode> minBlocks;
00043     unsigned int maxDepth;
00044 
00045     int npesActive;
00046     if(!rank) {
00047       minBlocks = da->getMinAllBlocks();
00048       maxDepth = da->getMaxDepth();
00049       npesActive = da->getNpesActive();
00050     }
00051 
00052     par::Mpi_Bcast<unsigned int>(&maxDepth, 1, 0, comm);
00053     par::Mpi_Bcast<int>(&npesActive, 1, 0, comm);
00054 
00055     unsigned int balOctMaxD = (maxDepth - 1);
00056 
00057     if(rank) {
00058       minBlocks.resize(npesActive);
00059     }
00060 
00061     par::Mpi_Bcast<ot::TreeNode>(&(*(minBlocks.begin())), npesActive, 0, comm);
00062 
00063     std::vector<ot::NodeAndValues<double, 3> > ptsWrapper;
00064 
00065     int numPts = (pts.size())/3;
00066     double xyzFac = static_cast<double>(1u << balOctMaxD);
00067     for(int i = 0; i < numPts; i++) {
00068       ot::NodeAndValues<double, 3> tmpObj;
00069       unsigned int xint = static_cast<unsigned int>(pts[(3*i)]*xyzFac);
00070       unsigned int yint = static_cast<unsigned int>(pts[(3*i) + 1]*xyzFac);
00071       unsigned int zint = static_cast<unsigned int>(pts[(3*i) + 2]*xyzFac);
00072       tmpObj.node = ot::TreeNode(xint, yint, zint, maxDepth, 3, maxDepth);
00073       tmpObj.values[0] = pts[(3*i)];
00074       tmpObj.values[1] = pts[(3*i) + 1];
00075       tmpObj.values[2] = pts[(3*i) + 2];
00076       ptsWrapper.push_back(tmpObj);
00077     }//end for i
00078 
00079     //Re-distribute ptsWrapper to align with minBlocks
00080     int* sendCnts = new int[npes];
00081     int* tmpSendCnts = new int[npes];
00082     for(int i = 0; i < npes; i++) {
00083       sendCnts[i] = 0;
00084       tmpSendCnts[i] = 0;
00085     }//end for i
00086 
00087     unsigned int *part = new unsigned int[numPts];
00088     for(int i = 0; i < numPts; i++) {
00089       bool found = seq::maxLowerBound<ot::TreeNode>(minBlocks,
00090           ptsWrapper[i].node, part[i], NULL, NULL);
00091       assert(found);
00092       sendCnts[part[i]]++;
00093     }//end for i
00094 
00095     int* sendDisps = new int[npes];
00096     sendDisps[0] = 0;
00097     for(int i = 1; i < npes; i++) {
00098       sendDisps[i] = sendDisps[i-1] + sendCnts[i-1];
00099     }//end for i
00100 
00101     std::vector<ot::NodeAndValues<double, 3> > sendList(numPts);
00102     unsigned int *commMap = new unsigned int[numPts];
00103     for(int i = 0; i < numPts; i++) {
00104       unsigned int pId = part[i];
00105       unsigned int sId = (sendDisps[pId] + tmpSendCnts[pId]);
00106       sendList[sId] = ptsWrapper[i];
00107       commMap[sId] = i;
00108       tmpSendCnts[pId]++;
00109     }//end for i
00110     delete [] part;
00111     delete [] tmpSendCnts;
00112     ptsWrapper.clear();
00113 
00114     int* recvCnts = new int[npes];
00115     par::Mpi_Alltoall<int>(sendCnts, recvCnts, 1, comm);
00116 
00117     int* recvDisps = new int[npes];
00118     recvDisps[0] = 0;
00119     for(int i = 1; i < npes; i++) {
00120       recvDisps[i] = recvDisps[i-1] + recvCnts[i-1];
00121     }//end for i
00122 
00123     std::vector<ot::NodeAndValues<double, 3> > recvList(recvDisps[npes - 1] + recvCnts[npes - 1]);
00124     par::Mpi_Alltoallv_sparse<ot::NodeAndValues<double, 3> >(&(*(sendList.begin())), 
00125         sendCnts, sendDisps, &(*(recvList.begin())), recvCnts, recvDisps, comm);
00126     sendList.clear();
00127 
00128     //Sort recvList but also store the mapping to the original order
00129     std::vector<seq::IndexHolder<ot::NodeAndValues<double, 3> > > localList(recvList.size());
00130     for(unsigned int i = 0; i < recvList.size(); i++) {
00131       localList[i].index = i;
00132       localList[i].value = &(*(recvList.begin() + i));
00133     }//end for i
00134 
00135     sort(localList.begin(), localList.end());
00136 
00137     bool computeGradient = (gradOut != NULL);
00138 
00139     std::vector<double> tmpOut(dof*localList.size());
00140     std::vector<double> tmpGradOut;
00141     if(computeGradient) {
00142       tmpGradOut.resize(3*dof*localList.size());
00143     }
00144 
00145     PetscScalar* inArr;
00146     da->vecGetBuffer(in, inArr, false, false, true, dof);
00147     da->ReadFromGhostsBegin<PetscScalar>(inArr, dof);
00148     da->ReadFromGhostsEnd<PetscScalar>(inArr);
00149 
00150     if(da->iAmActive()) {
00151       //interpolate at the received points
00152       //The pts must be inside the domain and not on the positive boundaries
00153       unsigned int ptsCtr = 0;
00154       double hxFac = (1.0/static_cast<double>(1u << balOctMaxD));
00155       for(da->init<ot::DA_FLAGS::WRITABLE>();
00156           (da->curr() < da->end<ot::DA_FLAGS::WRITABLE>()) && 
00157           (ptsCtr < localList.size()); da->next<ot::DA_FLAGS::WRITABLE>()) {
00158 
00159         Point pt = da->getCurrentOffset();
00160         unsigned int currLev = da->getLevel(da->curr());
00161 
00162         ot::TreeNode currOct(pt.xint(), pt.yint(), pt.zint(), currLev, 3, maxDepth);
00163 
00164         unsigned int indices[8];
00165         da->getNodeIndices(indices);
00166 
00167         unsigned char childNum = da->getChildNumber();
00168         unsigned char hnMask = da->getHangingNodeIndex(da->curr());
00169         unsigned char elemType = 0;
00170         GET_ETYPE_BLOCK(elemType, hnMask, childNum)
00171 
00172           double x0 = (pt.x())*hxFac;
00173         double y0 = (pt.y())*hxFac;
00174         double z0 = (pt.z())*hxFac;
00175         double hxOct = (static_cast<double>(1u << (maxDepth - currLev)))*hxFac;
00176 
00177         //All the recieved points lie within some octant or the other.
00178         //So the ptsCtr will be incremented properly inside this loop.
00179         //Evaluate at all points within this octant
00180         while( (ptsCtr < localList.size()) &&
00181             ( (currOct == ((localList[ptsCtr].value)->node)) ||
00182               (currOct.isAncestor(((localList[ptsCtr].value)->node))) ) ) {
00183 
00184           double px = ((localList[ptsCtr].value)->values)[0];
00185           double py = ((localList[ptsCtr].value)->values)[1];
00186           double pz = ((localList[ptsCtr].value)->values)[2];
00187           double xloc =  (2.0*(px - x0)/hxOct) - 1.0;
00188           double yloc =  (2.0*(py - y0)/hxOct) - 1.0;
00189           double zloc =  (2.0*(pz - z0)/hxOct) - 1.0;
00190 
00191           double ShFnVals[8];
00192           for(int j = 0; j < 8; j++) {
00193             ShFnVals[j] = ( ShapeFnCoeffs[childNum][elemType][j][0] + 
00194                 (ShapeFnCoeffs[childNum][elemType][j][1]*xloc) +
00195                 (ShapeFnCoeffs[childNum][elemType][j][2]*yloc) +
00196                 (ShapeFnCoeffs[childNum][elemType][j][3]*zloc) +
00197                 (ShapeFnCoeffs[childNum][elemType][j][4]*xloc*yloc) +
00198                 (ShapeFnCoeffs[childNum][elemType][j][5]*yloc*zloc) +
00199                 (ShapeFnCoeffs[childNum][elemType][j][6]*zloc*xloc) +
00200                 (ShapeFnCoeffs[childNum][elemType][j][7]*xloc*yloc*zloc) );
00201           }//end for j
00202 
00203           unsigned int outIdx = localList[ptsCtr].index;
00204 
00205           for(int k = 0; k < dof; k++) {
00206             tmpOut[(dof*outIdx) + k] = 0.0;
00207             for(int j = 0; j < 8; j++) {
00208               tmpOut[(dof*outIdx) + k] += (inArr[(dof*indices[j]) + k]*ShFnVals[j]);
00209             }//end for j
00210           }//end for k
00211 
00212           if(computeGradient) {
00213 
00214             double GradShFnVals[8][3];
00215             for(int j = 0; j < 8; j++) {
00216               GradShFnVals[j][0] = ( ShapeFnCoeffs[childNum][elemType][j][1] +
00217                   (ShapeFnCoeffs[childNum][elemType][j][4]*yloc) +
00218                   (ShapeFnCoeffs[childNum][elemType][j][6]*zloc) +
00219                   (ShapeFnCoeffs[childNum][elemType][j][7]*yloc*zloc) );
00220 
00221               GradShFnVals[j][1] = ( ShapeFnCoeffs[childNum][elemType][j][2] +
00222                   (ShapeFnCoeffs[childNum][elemType][j][4]*xloc) +
00223                   (ShapeFnCoeffs[childNum][elemType][j][5]*zloc) +
00224                   (ShapeFnCoeffs[childNum][elemType][j][7]*xloc*zloc) );
00225 
00226               GradShFnVals[j][2] = ( ShapeFnCoeffs[childNum][elemType][j][3] +
00227                   (ShapeFnCoeffs[childNum][elemType][j][5]*yloc) +
00228                   (ShapeFnCoeffs[childNum][elemType][j][6]*xloc) +
00229                   (ShapeFnCoeffs[childNum][elemType][j][7]*xloc*yloc) );            
00230             }//end for j
00231 
00232             double gradFac = (2.0/hxOct);
00233 
00234             for(int k = 0; k < dof; k++) {
00235               for(int l = 0; l < 3; l++) {
00236                 tmpGradOut[(3*dof*outIdx) + (3*k) + l] = 0.0;
00237                 for(int j = 0; j < 8; j++) {
00238                   tmpGradOut[(3*dof*outIdx) + (3*k) + l] += (inArr[(dof*indices[j]) + k]*GradShFnVals[j][l]);
00239                 }//end for j
00240                 tmpGradOut[(3*dof*outIdx) + (3*k) + l] *= gradFac;
00241               }//end for l
00242             }//end for k
00243 
00244           }//end if need grad
00245 
00246           ptsCtr++;
00247         }//end while
00248 
00249       }//end writable loop
00250     } else {
00251       assert(localList.empty());
00252     }//end if active
00253 
00254     da->vecRestoreBuffer(in, inArr, false, false, true, dof);
00255 
00256     recvList.clear();
00257     localList.clear();
00258 
00259     //Return the results. This communication is the exact reverse of the earlier
00260     //communication.
00261 
00262     std::vector<double> results(dof*numPts);
00263     for(int i = 0; i < npes; i++) {
00264       sendCnts[i] *= dof;
00265       sendDisps[i] *= dof;
00266       recvCnts[i] *= dof;
00267       recvDisps[i] *= dof;
00268     }//end for i
00269     par::Mpi_Alltoallv_sparse<double >(&(*(tmpOut.begin())), 
00270         recvCnts, recvDisps, &(*(results.begin())), sendCnts, sendDisps, comm);
00271     tmpOut.clear();
00272 
00273     std::vector<double> gradResults;
00274     if(computeGradient) {
00275       for(int i = 0; i < npes; i++) {
00276         sendCnts[i] *= 3;
00277         sendDisps[i] *= 3;
00278         recvCnts[i] *= 3;
00279         recvDisps[i] *= 3;
00280       }//end for i
00281       gradResults.resize(3*dof*numPts);
00282       par::Mpi_Alltoallv_sparse<double >(&(*(tmpGradOut.begin())), 
00283           recvCnts, recvDisps, &(*(gradResults.begin())), sendCnts, sendDisps, comm);
00284       tmpGradOut.clear();
00285     }
00286 
00287     delete [] sendCnts;
00288     delete [] sendDisps;
00289     delete [] recvCnts;
00290     delete [] recvDisps;
00291 
00292     //Use commMap and re-order the results in the same order as the original
00293     //points
00294     PetscInt outSz;
00295     VecGetLocalSize(out, &outSz);
00296     assert(outSz == (dof*numPts));
00297     PetscScalar* outArr;
00298     VecGetArray(out, &outArr);
00299     for(int i = 0; i < numPts; i++) {
00300       for(int j = 0; j < dof; j++) {
00301         outArr[(dof*commMap[i]) + j] = results[(dof*i) + j];
00302       }//end for j
00303     }//end for i
00304     VecRestoreArray(out, &outArr);
00305 
00306     if(computeGradient) {
00307       PetscInt gradOutSz;
00308       VecGetLocalSize((*gradOut), &gradOutSz);
00309       assert(gradOutSz == (3*dof*numPts));
00310       PetscScalar* gradOutArr;
00311       VecGetArray((*gradOut), &gradOutArr);
00312       for(int i = 0; i < numPts; i++) {
00313         for(int j = 0; j < (3*dof); j++) {
00314           gradOutArr[(3*dof*commMap[i]) + j] = gradResults[(3*dof*i) + j];
00315         }//end for j
00316       }//end for i
00317       VecRestoreArray((*gradOut), &gradOutArr);
00318     }
00319 
00320     delete [] commMap;
00321 
00322   }
00323 
00324   void interpolateData(ot::DA* da, std::vector<double> & in,
00325       std::vector<double> & out, std::vector<double> * gradOut,
00326       unsigned int dof, std::vector<double> & pts) {
00327 
00328     int rank = da->getRankAll();
00329     int npes = da->getNpesAll();
00330     MPI_Comm comm = da->getComm();
00331 
00332     std::vector<ot::TreeNode> minBlocks;
00333     unsigned int maxDepth;
00334 
00335     int npesActive;
00336     if(!rank) {
00337       minBlocks = da->getMinAllBlocks();
00338       maxDepth = da->getMaxDepth();
00339       npesActive = da->getNpesActive();
00340     }
00341 
00342     par::Mpi_Bcast<unsigned int>(&maxDepth, 1, 0, comm);
00343     par::Mpi_Bcast<int>(&npesActive, 1, 0, comm);
00344 
00345     unsigned int balOctMaxD = (maxDepth - 1);
00346 
00347     if(rank) {
00348       minBlocks.resize(npesActive);
00349     }
00350 
00351     par::Mpi_Bcast<ot::TreeNode>(&(*(minBlocks.begin())), npesActive, 0, comm);
00352 
00353     std::vector<ot::NodeAndValues<double, 3> > ptsWrapper;
00354 
00355     int numPts = (pts.size())/3;
00356     double xyzFac = static_cast<double>(1u << balOctMaxD);
00357     for(int i = 0; i < numPts; i++) {
00358       ot::NodeAndValues<double, 3> tmpObj;
00359       unsigned int xint = static_cast<unsigned int>(pts[(3*i)]*xyzFac);
00360       unsigned int yint = static_cast<unsigned int>(pts[(3*i) + 1]*xyzFac);
00361       unsigned int zint = static_cast<unsigned int>(pts[(3*i) + 2]*xyzFac);
00362       tmpObj.node = ot::TreeNode(xint, yint, zint, maxDepth, 3, maxDepth);
00363       tmpObj.values[0] = pts[(3*i)];
00364       tmpObj.values[1] = pts[(3*i) + 1];
00365       tmpObj.values[2] = pts[(3*i) + 2];
00366       ptsWrapper.push_back(tmpObj);
00367     }//end for i
00368 
00369     //Re-distribute ptsWrapper to align with minBlocks
00370     int* sendCnts = new int[npes];
00371     int* tmpSendCnts = new int[npes];
00372     for(int i = 0; i < npes; i++) {
00373       sendCnts[i] = 0;
00374       tmpSendCnts[i] = 0;
00375     }//end for i
00376 
00377     unsigned int *part = new unsigned int[numPts];
00378     for(int i = 0; i < numPts; i++) {
00379       bool found = seq::maxLowerBound<ot::TreeNode>(minBlocks,
00380           ptsWrapper[i].node, part[i], NULL, NULL);
00381       assert(found);
00382       sendCnts[part[i]]++;
00383     }//end for i
00384 
00385     int* sendDisps = new int[npes];
00386     sendDisps[0] = 0;
00387     for(int i = 1; i < npes; i++) {
00388       sendDisps[i] = sendDisps[i-1] + sendCnts[i-1];
00389     }//end for i
00390 
00391     std::vector<ot::NodeAndValues<double, 3> > sendList(numPts);
00392     unsigned int *commMap = new unsigned int[numPts];
00393     for(int i = 0; i < numPts; i++) {
00394       unsigned int pId = part[i];
00395       unsigned int sId = (sendDisps[pId] + tmpSendCnts[pId]);
00396       sendList[sId] = ptsWrapper[i];
00397       commMap[sId] = i;
00398       tmpSendCnts[pId]++;
00399     }//end for i
00400     delete [] part;
00401     delete [] tmpSendCnts;
00402     ptsWrapper.clear();
00403 
00404     int* recvCnts = new int[npes];
00405     par::Mpi_Alltoall<int>(sendCnts, recvCnts, 1, comm);
00406 
00407     int* recvDisps = new int[npes];
00408     recvDisps[0] = 0;
00409     for(int i = 1; i < npes; i++) {
00410       recvDisps[i] = recvDisps[i-1] + recvCnts[i-1];
00411     }//end for i
00412 
00413     std::vector<ot::NodeAndValues<double, 3> > recvList(recvDisps[npes - 1] + recvCnts[npes - 1]);
00414     par::Mpi_Alltoallv_sparse<ot::NodeAndValues<double, 3> >(&(*(sendList.begin())), 
00415         sendCnts, sendDisps, &(*(recvList.begin())), recvCnts, recvDisps, comm);
00416     sendList.clear();
00417 
00418     //Sort recvList but also store the mapping to the original order
00419     std::vector<seq::IndexHolder<ot::NodeAndValues<double, 3> > > localList(recvList.size());
00420     for(unsigned int i = 0; i < recvList.size(); i++) {
00421       localList[i].index = i;
00422       localList[i].value = &(*(recvList.begin() + i));
00423     }//end for i
00424 
00425     sort(localList.begin(), localList.end());
00426 
00427     bool computeGradient = (gradOut != NULL);
00428 
00429     std::vector<double> tmpOut(dof*localList.size());
00430     std::vector<double> tmpGradOut;
00431     if(computeGradient) {
00432       tmpGradOut.resize(3*dof*localList.size());
00433     }
00434 
00435     double* inArr;
00436     da->vecGetBuffer<double>(in, inArr, false, false, true, dof);
00437 
00438     da->ReadFromGhostsBegin<double>(inArr, dof);
00439     da->ReadFromGhostsEnd<double>(inArr);
00440 
00441     if(da->iAmActive()) {
00442       //interpolate at the received points
00443       //The pts must be inside the domain and not on the positive boundaries
00444       unsigned int ptsCtr = 0;
00445       double hxFac = (1.0/static_cast<double>(1u << balOctMaxD));
00446       for(da->init<ot::DA_FLAGS::WRITABLE>();
00447           (da->curr() < da->end<ot::DA_FLAGS::WRITABLE>()) && 
00448           (ptsCtr < localList.size()); da->next<ot::DA_FLAGS::WRITABLE>()) {
00449 
00450         Point pt = da->getCurrentOffset();
00451         unsigned int currLev = da->getLevel(da->curr());
00452 
00453         ot::TreeNode currOct(pt.xint(), pt.yint(), pt.zint(), currLev, 3, maxDepth);
00454 
00455         unsigned int indices[8];
00456         da->getNodeIndices(indices);
00457 
00458         unsigned char childNum = da->getChildNumber();
00459         unsigned char hnMask = da->getHangingNodeIndex(da->curr());
00460         unsigned char elemType = 0;
00461         GET_ETYPE_BLOCK(elemType, hnMask, childNum)
00462 
00463           double x0 = (pt.x())*hxFac;
00464         double y0 = (pt.y())*hxFac;
00465         double z0 = (pt.z())*hxFac;
00466         double hxOct = (static_cast<double>(1u << (maxDepth - currLev)))*hxFac;
00467 
00468         //All the recieved points lie within some octant or the other.
00469         //So the ptsCtr will be incremented properly inside this loop.
00470         //Evaluate at all points within this octant
00471         while( (ptsCtr < localList.size()) &&
00472             ( (currOct == ((localList[ptsCtr].value)->node)) ||
00473               (currOct.isAncestor(((localList[ptsCtr].value)->node))) ) ) {
00474 
00475           double px = ((localList[ptsCtr].value)->values)[0];
00476           double py = ((localList[ptsCtr].value)->values)[1];
00477           double pz = ((localList[ptsCtr].value)->values)[2];
00478           double xloc =  (2.0*(px - x0)/hxOct) - 1.0;
00479           double yloc =  (2.0*(py - y0)/hxOct) - 1.0;
00480           double zloc =  (2.0*(pz - z0)/hxOct) - 1.0;
00481 
00482           double ShFnVals[8];
00483           for(int j = 0; j < 8; j++) {
00484             ShFnVals[j] = ( ShapeFnCoeffs[childNum][elemType][j][0] + 
00485                 (ShapeFnCoeffs[childNum][elemType][j][1]*xloc) +
00486                 (ShapeFnCoeffs[childNum][elemType][j][2]*yloc) +
00487                 (ShapeFnCoeffs[childNum][elemType][j][3]*zloc) +
00488                 (ShapeFnCoeffs[childNum][elemType][j][4]*xloc*yloc) +
00489                 (ShapeFnCoeffs[childNum][elemType][j][5]*yloc*zloc) +
00490                 (ShapeFnCoeffs[childNum][elemType][j][6]*zloc*xloc) +
00491                 (ShapeFnCoeffs[childNum][elemType][j][7]*xloc*yloc*zloc) );
00492           }//end for j
00493 
00494           unsigned int outIdx = localList[ptsCtr].index;
00495 
00496           for(int k = 0; k < dof; k++) {
00497             tmpOut[(dof*outIdx) + k] = 0.0;
00498             for(int j = 0; j < 8; j++) {
00499               tmpOut[(dof*outIdx) + k] += (inArr[(dof*indices[j]) + k]*ShFnVals[j]);
00500             }//end for j
00501           }//end for k
00502 
00503           if(computeGradient) {
00504 
00505             double GradShFnVals[8][3];
00506             for(int j = 0; j < 8; j++) {
00507               GradShFnVals[j][0] = ( ShapeFnCoeffs[childNum][elemType][j][1] +
00508                   (ShapeFnCoeffs[childNum][elemType][j][4]*yloc) +
00509                   (ShapeFnCoeffs[childNum][elemType][j][6]*zloc) +
00510                   (ShapeFnCoeffs[childNum][elemType][j][7]*yloc*zloc) );
00511 
00512               GradShFnVals[j][1] = ( ShapeFnCoeffs[childNum][elemType][j][2] +
00513                   (ShapeFnCoeffs[childNum][elemType][j][4]*xloc) +
00514                   (ShapeFnCoeffs[childNum][elemType][j][5]*zloc) +
00515                   (ShapeFnCoeffs[childNum][elemType][j][7]*xloc*zloc) );
00516 
00517               GradShFnVals[j][2] = ( ShapeFnCoeffs[childNum][elemType][j][3] +
00518                   (ShapeFnCoeffs[childNum][elemType][j][5]*yloc) +
00519                   (ShapeFnCoeffs[childNum][elemType][j][6]*xloc) +
00520                   (ShapeFnCoeffs[childNum][elemType][j][7]*xloc*yloc) );            
00521             }//end for j
00522 
00523             double gradFac = (2.0/hxOct);
00524 
00525             for(int k = 0; k < dof; k++) {
00526               for(int l = 0; l < 3; l++) {
00527                 tmpGradOut[(3*dof*outIdx) + (3*k) + l] = 0.0;
00528                 for(int j = 0; j < 8; j++) {
00529                   tmpGradOut[(3*dof*outIdx) + (3*k) + l] += (inArr[(dof*indices[j]) + k]*GradShFnVals[j][l]);
00530                 }//end for j
00531                 tmpGradOut[(3*dof*outIdx) + (3*k) + l] *= gradFac;
00532               }//end for l
00533             }//end for k
00534 
00535           }//end if need grad
00536 
00537           ptsCtr++;
00538         }//end while
00539 
00540       }//end writable loop
00541     } else {
00542       assert(localList.empty());
00543     }//end if active
00544 
00545     da->vecRestoreBuffer<double>(in, inArr, false, false, true, dof);
00546 
00547     recvList.clear();
00548     localList.clear();
00549 
00550     //Return the results. This communication is the exact reverse of the earlier
00551     //communication.
00552 
00553     std::vector<double> results(dof*numPts);
00554     for(int i = 0; i < npes; i++) {
00555       sendCnts[i] *= dof;
00556       sendDisps[i] *= dof;
00557       recvCnts[i] *= dof;
00558       recvDisps[i] *= dof;
00559     }//end for i
00560     par::Mpi_Alltoallv_sparse<double >(&(*(tmpOut.begin())), 
00561         recvCnts, recvDisps, &(*(results.begin())), sendCnts, sendDisps, comm);
00562     tmpOut.clear();
00563 
00564     std::vector<double> gradResults;
00565     if(computeGradient) {
00566       for(int i = 0; i < npes; i++) {
00567         sendCnts[i] *= 3;
00568         sendDisps[i] *= 3;
00569         recvCnts[i] *= 3;
00570         recvDisps[i] *= 3;
00571       }//end for i
00572       gradResults.resize(3*dof*numPts);
00573       par::Mpi_Alltoallv_sparse<double >(&(*(tmpGradOut.begin())), 
00574           recvCnts, recvDisps, &(*(gradResults.begin())), sendCnts, sendDisps, comm);
00575       tmpGradOut.clear();
00576     }
00577 
00578     delete [] sendCnts;
00579     delete [] sendDisps;
00580     delete [] recvCnts;
00581     delete [] recvDisps;
00582 
00583     //Use commMap and re-order the results in the same order as the original
00584     //points
00585     out.resize(dof*numPts);
00586     for(int i = 0; i < numPts; i++) {
00587       for(int j = 0; j < dof; j++) {
00588         out[(dof*commMap[i]) + j] = results[(dof*i) + j];
00589       }//end for j
00590     }//end for i
00591 
00592     if(computeGradient) {
00593       gradOut->resize(3*dof*numPts);
00594       for(int i = 0; i < numPts; i++) {
00595         for(int j = 0; j < 3*dof; j++) {
00596           (*gradOut)[(3*dof*commMap[i]) + j] = gradResults[(3*dof*i) + j];
00597         }//end for j
00598       }//end for i
00599     }
00600 
00601     delete [] commMap;
00602 
00603   }//end function
00604 
00605   void writePartitionVTK(ot::DA* da, const char* outFileName) {
00606     int rank = da->getRankAll();
00607     //Only processor writes
00608     if(!rank) {
00609       std::vector<ot::TreeNode> minBlocks = da->getMinAllBlocks();
00610       unsigned int maxDepth = da->getMaxDepth();
00611       ot::TreeNode root(3, maxDepth);
00612       std::vector<ot::TreeNode> allBlocks;
00613       ot::completeSubtree(root, minBlocks, allBlocks, 3, maxDepth, true, true);
00614 
00615       FILE* outfile = fopen(outFileName,"w");
00616 
00617       //Set the weights of allBlocks to be the processor ids. 
00618       for(unsigned int i = 0; i < allBlocks.size(); i++) {
00619         unsigned int pId;
00620         bool found = seq::maxLowerBound<ot::TreeNode>(minBlocks, allBlocks[i], pId, NULL, NULL);
00621         assert(found);
00622         allBlocks[i].setWeight(pId);
00623       }
00624 
00625       unsigned int numNode = static_cast<unsigned int>(allBlocks.size());
00626 
00627       float coord[8][3] = {
00628         {0.0,0.0,0.0},
00629         {1.0,0.0,0.0},
00630         {0.0,1.0,0.0},
00631         {1.0,1.0,0.0},
00632         {0.0,0.0,1.0},
00633         {1.0,0.0,1.0},
00634         {0.0,1.0,1.0},
00635         {1.0,1.0,1.0}
00636       };
00637 
00638       fprintf(outfile,"# vtk DataFile Version 3.0\n");
00639       fprintf(outfile,"Octree field file\n");
00640       fprintf(outfile,"ASCII\n");
00641       fprintf(outfile,"DATASET UNSTRUCTURED_GRID\n");
00642       fprintf(outfile,"POINTS %d float\n",(numNode*8));
00643 
00644       for (unsigned int i = 0; i < numNode; i++) {
00645         unsigned int x = allBlocks[i].getX();
00646         unsigned int y = allBlocks[i].getY(); 
00647         unsigned int z = allBlocks[i].getZ();
00648         unsigned int d = allBlocks[i].getLevel();
00649         float fx, fy, fz,hx;
00650         fx = ((float)x)/((float)(1u<<maxDepth));
00651         fy = ((float)y)/((float)(1u<<maxDepth));
00652         fz = ((float)z)/((float)(1u<<maxDepth));
00653         hx = ((float)(1u<<(maxDepth-d)))/((float)(1u<<maxDepth));
00654 
00655         for(int j = 0; j < 8; j++)
00656         {
00657           float fxn,fyn,fzn;
00658           fxn = fx + coord[j][0]*hx;
00659           fyn = fy + coord[j][1]*hx;
00660           fzn = fz + coord[j][2]*hx;
00661           fprintf(outfile,"%f %f %f \n",fxn,fyn,fzn);
00662         }
00663       }
00664 
00665       fprintf(outfile,"\nCELLS %d %d\n",numNode,numNode*9);
00666 
00667       for(int i = 0; i < numNode; i++)
00668       {
00669         fprintf(outfile,"8 ");
00670 
00671         for(int j = 0; j < 8; j++)
00672         {
00673           int index = (8*i)+j;
00674           fprintf(outfile,"%d ",index);
00675         }
00676         fprintf(outfile,"\n");
00677       }
00678 
00679       fprintf(outfile,"\nCELL_TYPES %d\n",numNode);
00680 
00681       for(int i = 0; i < numNode; i++)
00682       {
00683         fprintf(outfile,"11 \n");
00684       }
00685 
00686       fprintf(outfile,"\nCELL_DATA %d\n",numNode);
00687       fprintf(outfile,"SCALARS scalars unsigned_int\n");
00688       fprintf(outfile,"LOOKUP_TABLE default\n");
00689 
00690       for (unsigned int i =0; i< numNode; i++) {
00691         unsigned int v = allBlocks[i].getWeight();
00692         fprintf(outfile,"%u \n", v);
00693       }
00694 
00695       fclose(outfile);
00696 
00697     }//end if p0
00698   }//end function
00699 
00700   unsigned int getGlobalMinLevel(ot::DA* da) {
00701 
00702     unsigned int myMinLev = ot::TreeNode::MAX_LEVEL;
00703     unsigned int globalMinLev;
00704 
00705     //It is sufficient to loop over the elements, since boundaries were added at the same level as some element. 
00706     if(da->iAmActive()) {
00707       for(da->init<ot::DA_FLAGS::WRITABLE>();
00708           da->curr() < da->end<ot::DA_FLAGS::WRITABLE>();
00709           da->next<ot::DA_FLAGS::WRITABLE>())  
00710       {
00711         unsigned int currLevel = da->getLevel(da->curr());
00712         if(currLevel < myMinLev) {
00713           myMinLev = currLevel;
00714         }
00715       }      
00716     }
00717 
00718     par::Mpi_Allreduce<unsigned int>(&myMinLev, &globalMinLev, 1, MPI_MIN, da->getComm() );
00719 
00720     //The initial octree is not root. So the min lev in the initial octree is atleast 1. So in the embedded octree the minlev is atleast 2. 
00721     assert(globalMinLev > 1);
00722 
00723     //Return the result in the original octree configuration
00724     return (globalMinLev -1);
00725   }
00726 
00727   unsigned int getGlobalMaxLevel(ot::DA* da) {
00728 
00729     unsigned int myMaxLev = 0;
00730     unsigned int globalMaxLev;
00731 
00732     //It is sufficient to loop over the elements, since boundaries were added at the same level as some element. 
00733     if( da->iAmActive() ) {
00734       for(da->init<ot::DA_FLAGS::WRITABLE>(); 
00735           da->curr() < da->end<ot::DA_FLAGS::WRITABLE>();
00736           da->next<ot::DA_FLAGS::WRITABLE>())  
00737       {
00738         unsigned int currLevel = da->getLevel(da->curr());
00739         if(currLevel > myMaxLev) {
00740           myMaxLev = currLevel;
00741         }
00742       }      
00743     }
00744 
00745     par::Mpi_Allreduce<unsigned int>(&myMaxLev, &globalMaxLev, 1, MPI_MAX, da->getComm() );
00746 
00747     //m_uiMaxDepth will be 0 on inActive processors.
00748     if(da->iAmActive()) {
00749       assert(globalMaxLev <= da->getMaxDepth() );
00750     }else {
00751       assert(globalMaxLev <= ot::TreeNode::MAX_LEVEL);
00752     }
00753 
00754     //Return the result in the original octree configuration
00755     return (globalMaxLev -1);   
00756   }
00757 
00758   unsigned int getSortOrder(unsigned int x, unsigned int y, 
00759       unsigned int z, unsigned int sz) {
00760 
00761     // first compare the x, y, and z to determine which one dominates ...
00762     unsigned int _x = x^(x+sz);
00763     unsigned int _y = y^(y+sz);
00764     unsigned int _z = z^(z+sz);
00765 
00766     // compute order ...
00767     if (_x > _y) {
00768       if ( _y > _z) {
00769         return ot::DA_FLAGS::ZYX;
00770       } else if ( _x > _z ) {
00771         return ot::DA_FLAGS::YZX;
00772       } else {
00773         return ot::DA_FLAGS::YXZ;
00774       }
00775     } else {
00776       if ( _y > _z) {
00777         return ot::DA_FLAGS::ZXY;
00778       } else if ( _x > _z ) {
00779         return ot::DA_FLAGS::ZXY;
00780       } else {
00781         return ot::DA_FLAGS::XYZ;
00782       }
00783     }
00784   }
00785 
00786   unsigned char getTouchConfig(const ot::TreeNode& curr, 
00787       const ot::TreeNode& next, unsigned int maxDepth) {
00788     unsigned char c = 0;
00789     unsigned int cx = curr.minX();
00790     unsigned int cy = curr.minY();
00791     unsigned int cz = curr.minZ();
00792     unsigned int nx = next.minX();
00793     unsigned int ny = next.minY();
00794     unsigned int nz = next.minZ();
00795     unsigned int cd = curr.getLevel();
00796     unsigned int nd = next.getLevel();
00797     unsigned int cs = (1u<<(maxDepth - cd));
00798     unsigned int ns = (1u<<(maxDepth - nd));
00799 
00800     //_zzyyxxT  
00801     bool isTouchingX = false;
00802     bool isTouchingY = false;
00803     bool isTouchingZ = false;
00804 
00805     if (cx == nx) {
00806       isTouchingX = true;
00807     }
00808 
00809     if (cx == (nx + ns) ) {
00810       isTouchingX = true;
00811       c += (1 << 1);
00812     }
00813 
00814     if ( (cx + cs) == nx ) {
00815       isTouchingX = true;
00816       c += (2 << 1);
00817     }
00818 
00819     if ( (cx + cs) == (nx + ns) ) {
00820       isTouchingX = true;
00821       c += (3 << 1);
00822     }
00823 
00824     if (cy == ny) {
00825       isTouchingY = true;
00826     }
00827 
00828     if (cy == (ny + ns) ) {
00829       isTouchingY = true;
00830       c += (1 << 3);
00831     }
00832 
00833     if ( (cy + cs) == ny ) {
00834       isTouchingY = true;
00835       c += (2 << 3);
00836     }
00837 
00838     if ( (cy + cs) == (ny + ns) ) {
00839       isTouchingY = true;
00840       c += (3 << 3);
00841     }
00842 
00843     if (cz == nz) {
00844       isTouchingZ = true;
00845     }
00846 
00847     if (cz == (nz + ns) ) {
00848       isTouchingZ = true;
00849       c += (1 << 5);
00850     }
00851 
00852     if ( (cz + cs) == nz ) {
00853       isTouchingZ = true;
00854       c += (2<<5);
00855     }
00856 
00857     if ( (cz + cs) == (nz + ns) ) {
00858       isTouchingZ = true;
00859       c += (3<<5);
00860     }
00861 
00862     if ( isTouchingX && isTouchingY && isTouchingZ ) {
00863       c += 1;
00864     } else {
00865       c = 0;
00866     }
00867 
00868     return c;
00869   }
00870 
00871   bool isRegularGrid(ot::DA* da) {
00872     int iHaveHanging = 0;
00873     if(da->iAmActive()) {
00874       for(da->init<ot::DA_FLAGS::WRITABLE>(); 
00875           da->curr() < da->end<ot::DA_FLAGS::WRITABLE>();
00876           da->next<ot::DA_FLAGS::WRITABLE>()) { 
00877         if(da->isHanging(da->curr())) {
00878           iHaveHanging = 1;
00879           std::cout<<(da->getRankActive())<<
00880             " found a hanging node for element with id: "
00881             <<(da->curr())<<std::endl;
00882 
00883           break;
00884         }
00885       }//end for writable
00886     }//end if active
00887 
00888     int anyOneHasHanging;
00889     par::Mpi_Allreduce<int>(&iHaveHanging, &anyOneHasHanging, 1, MPI_SUM, da->getComm());
00890 
00891     return (!anyOneHasHanging);
00892   }//end function
00893 
00894   void assignBoundaryFlags(ot::DA* da, 
00895       std::vector<unsigned char> & bdyFlagVec) {
00896 
00897     da->createVector(bdyFlagVec, false, false, 1);//Nodal, Non-ghosted, single dof
00898 
00899     for(int i = 0; i < bdyFlagVec.size(); i++) {
00900       bdyFlagVec[i] = 0;
00901     }//initialization loop
00902 
00903     unsigned char *bdyFlagArr = NULL;
00904     da->vecGetBuffer<unsigned char>(bdyFlagVec,bdyFlagArr,
00905         false,false,false,1);
00906 
00907     if(da->iAmActive()) {
00908       //We can only loop over the elements, hence the positive boundary elements
00909       //will add the flags for the external positive boundary nodes.
00910       for(da->init<ot::DA_FLAGS::ALL>(); 
00911           da->curr() < da->end<ot::DA_FLAGS::ALL>();
00912           da->next<ot::DA_FLAGS::ALL>()) { 
00913         unsigned char currentFlags;
00914         bool calledGetNodeIndices = false;
00915         if(da->isBoundaryOctant(&currentFlags)) {
00916           //The status of the anchor of any real octant is determined by the
00917           //negative face boundaries only
00918           int xNegBdy = (currentFlags & ot::TreeNode::X_NEG_BDY);
00919           int yNegBdy = (currentFlags & ot::TreeNode::Y_NEG_BDY);
00920           int zNegBdy = (currentFlags & ot::TreeNode::Z_NEG_BDY);
00921 
00922           unsigned char hnMask = da->getHangingNodeIndex(da->curr());
00923           if(!(hnMask & 1)) {
00924             //Anchor is not hanging                 
00925             if(xNegBdy) {
00926               if(yNegBdy && zNegBdy){
00927                 bdyFlagArr[da->curr()] = ot::TreeNode::CORNER_BDY;
00928               }else if(yNegBdy || zNegBdy) {
00929                 bdyFlagArr[da->curr()] = ot::TreeNode::EDGE_BDY;
00930               }else {
00931                 bdyFlagArr[da->curr()] = ot::TreeNode::FACE_BDY;
00932               }
00933             }else if(yNegBdy) {
00934               if(zNegBdy) {
00935                 bdyFlagArr[da->curr()] = ot::TreeNode::EDGE_BDY;
00936               }else {
00937                 bdyFlagArr[da->curr()] = ot::TreeNode::FACE_BDY;
00938               }
00939             }else if(zNegBdy) {
00940               bdyFlagArr[da->curr()] = ot::TreeNode::FACE_BDY;
00941             }
00942           }//end if anchor hanging
00943 
00944           if(currentFlags > ot::TreeNode::NEG_POS_DEMARCATION) {
00945             //Has at least one positive boundary
00946             //May have negative boundaries as well
00947             unsigned int indices[8];
00948             da->getNodeIndices(indices); 
00949             calledGetNodeIndices = true;
00950             int xPosBdy = (currentFlags & ot::TreeNode::X_POS_BDY);
00951             int yPosBdy = (currentFlags & ot::TreeNode::Y_POS_BDY);
00952             int zPosBdy = (currentFlags & ot::TreeNode::Z_POS_BDY);
00953 
00954             if(!(hnMask & (1 << 1))) {
00955               bool xBdy = xPosBdy;
00956               bool yBdy = yNegBdy;
00957               bool zBdy = zNegBdy;
00958               if(xBdy) {
00959                 if(yBdy && zBdy){
00960                   bdyFlagArr[indices[1]] = ot::TreeNode::CORNER_BDY;
00961                 }else if(yBdy || zBdy) {
00962                   bdyFlagArr[indices[1]] = ot::TreeNode::EDGE_BDY;
00963                 }else {
00964                   bdyFlagArr[indices[1]] = ot::TreeNode::FACE_BDY;
00965                 }
00966               }else if(yBdy) {
00967                 if(zBdy) {
00968                   bdyFlagArr[indices[1]] = ot::TreeNode::EDGE_BDY;
00969                 }else {
00970                   bdyFlagArr[indices[1]] = ot::TreeNode::FACE_BDY;
00971                 }
00972               }else if(zBdy) {
00973                 bdyFlagArr[indices[1]] = ot::TreeNode::FACE_BDY;
00974               }
00975             }
00976 
00977             if(!(hnMask & (1 << 2))) {
00978               bool xBdy = xNegBdy;
00979               bool yBdy = yPosBdy;
00980               bool zBdy = zNegBdy;
00981               if(xBdy) {
00982                 if(yBdy && zBdy){
00983                   bdyFlagArr[indices[2]] = ot::TreeNode::CORNER_BDY;
00984                 }else if(yBdy || zBdy) {
00985                   bdyFlagArr[indices[2]] = ot::TreeNode::EDGE_BDY;
00986                 }else {
00987                   bdyFlagArr[indices[2]] = ot::TreeNode::FACE_BDY;
00988                 }
00989               }else if(yBdy) {
00990                 if(zBdy) {
00991                   bdyFlagArr[indices[2]] = ot::TreeNode::EDGE_BDY;
00992                 }else {
00993                   bdyFlagArr[indices[2]] = ot::TreeNode::FACE_BDY;
00994                 }
00995               }else if(zBdy) {
00996                 bdyFlagArr[indices[2]] = ot::TreeNode::FACE_BDY;
00997               }
00998             }
00999 
01000             if(!(hnMask & (1 << 3))) {
01001               bool xBdy = xPosBdy;
01002               bool yBdy = yPosBdy;
01003               bool zBdy = zNegBdy;
01004               if(xBdy) {
01005                 if(yBdy && zBdy){
01006                   bdyFlagArr[indices[3]] = ot::TreeNode::CORNER_BDY;
01007                 }else if(yBdy || zBdy) {
01008                   bdyFlagArr[indices[3]] = ot::TreeNode::EDGE_BDY;
01009                 }else {
01010                   bdyFlagArr[indices[3]] = ot::TreeNode::FACE_BDY;
01011                 }
01012               }else if(yBdy) {
01013                 if(zBdy) {
01014                   bdyFlagArr[indices[3]] = ot::TreeNode::EDGE_BDY;
01015                 }else {
01016                   bdyFlagArr[indices[3]] = ot::TreeNode::FACE_BDY;
01017                 }
01018               }else if(zBdy) {
01019                 bdyFlagArr[indices[3]] = ot::TreeNode::FACE_BDY;
01020               }
01021             }
01022 
01023             if(!(hnMask & (1 << 4))) {
01024               bool xBdy = xNegBdy;
01025               bool yBdy = yNegBdy;
01026               bool zBdy = zPosBdy;
01027               if(xBdy) {
01028                 if(yBdy && zBdy){
01029                   bdyFlagArr[indices[4]] = ot::TreeNode::CORNER_BDY;
01030                 }else if(yBdy || zBdy) {
01031                   bdyFlagArr[indices[4]] = ot::TreeNode::EDGE_BDY;
01032                 }else {
01033                   bdyFlagArr[indices[4]] = ot::TreeNode::FACE_BDY;
01034                 }
01035               }else if(yBdy) {
01036                 if(zBdy) {
01037                   bdyFlagArr[indices[4]] = ot::TreeNode::EDGE_BDY;
01038                 }else {
01039                   bdyFlagArr[indices[4]] = ot::TreeNode::FACE_BDY;
01040                 }
01041               }else if(zBdy) {
01042                 bdyFlagArr[indices[4]] = ot::TreeNode::FACE_BDY;
01043               }
01044             }
01045 
01046             if(!(hnMask & (1 << 5))) {
01047               bool xBdy = xPosBdy;
01048               bool yBdy = yNegBdy;
01049               bool zBdy = zPosBdy;
01050               if(xBdy) {
01051                 if(yBdy && zBdy){
01052                   bdyFlagArr[indices[5]] = ot::TreeNode::CORNER_BDY;
01053                 }else if(yBdy || zBdy) {
01054                   bdyFlagArr[indices[5]] = ot::TreeNode::EDGE_BDY;
01055                 }else {
01056                   bdyFlagArr[indices[5]] = ot::TreeNode::FACE_BDY;
01057                 }
01058               }else if(yBdy) {
01059                 if(zBdy) {
01060                   bdyFlagArr[indices[5]] = ot::TreeNode::EDGE_BDY;
01061                 }else {
01062                   bdyFlagArr[indices[5]] = ot::TreeNode::FACE_BDY;
01063                 }
01064               }else if(zBdy) {
01065                 bdyFlagArr[indices[5]] = ot::TreeNode::FACE_BDY;
01066               }
01067             }
01068 
01069             if(!(hnMask & (1 << 6))) {
01070               bool xBdy = xNegBdy;
01071               bool yBdy = yPosBdy;
01072               bool zBdy = zPosBdy;
01073               if(xBdy) {
01074                 if(yBdy && zBdy){
01075                   bdyFlagArr[indices[6]] = ot::TreeNode::CORNER_BDY;
01076                 }else if(yBdy || zBdy) {
01077                   bdyFlagArr[indices[6]] = ot::TreeNode::EDGE_BDY;
01078                 }else {
01079                   bdyFlagArr[indices[6]] = ot::TreeNode::FACE_BDY;
01080                 }
01081               }else if(yBdy) {
01082                 if(zBdy) {
01083                   bdyFlagArr[indices[6]] = ot::TreeNode::EDGE_BDY;
01084                 }else {
01085                   bdyFlagArr[indices[6]] = ot::TreeNode::FACE_BDY;
01086                 }
01087               }else if(zBdy) {
01088                 bdyFlagArr[indices[6]] = ot::TreeNode::FACE_BDY;
01089               }
01090             }
01091 
01092             if(!(hnMask & (1 << 7))) {
01093               bool xBdy = xPosBdy;
01094               bool yBdy = yPosBdy;
01095               bool zBdy = zPosBdy;
01096               if(xBdy) {
01097                 if(yBdy && zBdy){
01098                   bdyFlagArr[indices[7]] = ot::TreeNode::CORNER_BDY;
01099                 }else if(yBdy || zBdy) {
01100                   bdyFlagArr[indices[7]] = ot::TreeNode::EDGE_BDY;
01101                 }else {
01102                   bdyFlagArr[indices[7]] = ot::TreeNode::FACE_BDY;
01103                 }
01104               }else if(yBdy) {
01105                 if(zBdy) {
01106                   bdyFlagArr[indices[7]] = ot::TreeNode::EDGE_BDY;
01107                 }else {
01108                   bdyFlagArr[indices[7]] = ot::TreeNode::FACE_BDY;
01109                 }
01110               }else if(zBdy) {
01111                 bdyFlagArr[indices[7]] = ot::TreeNode::FACE_BDY;
01112               }
01113             }
01114           }//end if-else has positive boundaries
01115         }//end if boundary
01116         if( (!calledGetNodeIndices) && (da->isLUTcompressed()) ) {
01117           da->updateQuotientCounter();
01118         }
01119       }//end for all own elements
01120     }//end if active
01121 
01122     da->vecRestoreBuffer<unsigned char>(bdyFlagVec,bdyFlagArr,false,false,false,1);
01123   }//end function
01124 
01125   void includeSiblingsOfBoundary(std::vector<ot::TreeNode>& allBoundaryLeaves, 
01126       const ot::TreeNode& myFirstOctant, const ot::TreeNode& myLastOctant) {
01127     PROF_ADD_BDY_SIBLINGS_BEGIN
01128 
01129       std::vector<ot::TreeNode> tmpAllBoundaryLeaves;
01130 
01131     for(unsigned int i = 0; i < allBoundaryLeaves.size(); i++) {
01132       ot::TreeNode thisOct = allBoundaryLeaves[i];
01133       ot::TreeNode thisOctParent = thisOct.getParent();
01134       unsigned int thisCnum = ((unsigned int)(thisOct.getChildNumber()));
01135       std::vector<ot::TreeNode> siblingsToAdd;
01136       thisOctParent.addChildren(siblingsToAdd);
01137       for(unsigned int j=0; j < 8; j++) {
01138         if( (j != thisCnum) && (j != (7-thisCnum)) ) {
01139           if( (siblingsToAdd[j] >= myFirstOctant) && 
01140               (siblingsToAdd[j] <= myLastOctant) ) {
01141             tmpAllBoundaryLeaves.push_back(siblingsToAdd[j]);
01142           }
01143         }
01144       }   
01145     }//end for i
01146 
01147     seq::makeVectorUnique<ot::TreeNode>(tmpAllBoundaryLeaves,false);
01148 
01149     std::vector<ot::TreeNode> tmp2Vec;
01150 
01151     unsigned int tmpCnt = 0;
01152     unsigned int bdyCnt = 0;
01153 
01154     //The two lists are independently sorted and unique, Now we do a linear
01155     //pass and merge them so that the result is also sorted and unique.
01156 
01157     while ( (tmpCnt < tmpAllBoundaryLeaves.size()) &&
01158         (bdyCnt < allBoundaryLeaves.size()) ) {
01159       if ( tmpAllBoundaryLeaves[tmpCnt] < allBoundaryLeaves[bdyCnt] ) {
01160         tmp2Vec.push_back(tmpAllBoundaryLeaves[tmpCnt]);
01161         tmpCnt++;
01162       }else if( tmpAllBoundaryLeaves[tmpCnt] > allBoundaryLeaves[bdyCnt] ) {
01163         tmp2Vec.push_back(allBoundaryLeaves[bdyCnt]);
01164         bdyCnt++;
01165       }else {
01166         tmp2Vec.push_back(allBoundaryLeaves[bdyCnt]);
01167         bdyCnt++;
01168         tmpCnt++; //tmpCnt must also be incremented to preserve uniqueness.
01169       }
01170     }
01171 
01172     while (bdyCnt < allBoundaryLeaves.size()) {
01173       tmp2Vec.push_back(allBoundaryLeaves[bdyCnt]);
01174       bdyCnt++;
01175     }
01176 
01177     while (tmpCnt < tmpAllBoundaryLeaves.size()) {
01178       tmp2Vec.push_back(tmpAllBoundaryLeaves[tmpCnt]);
01179       tmpCnt++;
01180     }
01181 
01182     allBoundaryLeaves = tmp2Vec;
01183 
01184     tmp2Vec.clear();
01185     tmpAllBoundaryLeaves.clear();
01186 
01187     PROF_ADD_BDY_SIBLINGS_END
01188   }//end function
01189 
01190   void prepareAprioriCommMessagesInDAtype1(const std::vector<ot::TreeNode>& in,
01191       std::vector<ot::TreeNode>& allBoundaryLeaves, std::vector<ot::TreeNode>& blocks,
01192       const std::vector<ot::TreeNode>& allBlocks, int myRank, int npes, int* sendCnt,
01193       std::vector<std::vector<unsigned int> >& sendNodes) {
01194     PROF_DA_APRIORI_COMM_BEGIN
01195 
01196       std::vector<unsigned int> bdy2elem;
01197 
01198     unsigned int bdyCnt = 0;
01199     unsigned int allCnt = 0;
01200 
01201     while (bdyCnt < allBoundaryLeaves.size()) {
01202       if ( allBoundaryLeaves[bdyCnt] < in[allCnt]) {
01203         bdyCnt++;
01204       } else if ( allBoundaryLeaves[bdyCnt] > in[allCnt]) {
01205         allCnt++;
01206       } else {
01207         //Both are equal.               
01208         bdy2elem.push_back(allCnt);
01209         bdyCnt++;
01210       }
01211     }
01212 
01213     //This step is necessary because some elements of allBoundaryLeaves were not
01214     //copied from the "in" vector, instead we generated them directly. So these
01215     //octants will not have correct flags set. So we find the corresponding copy
01216     //in the "in" vector.  
01217     allBoundaryLeaves.clear();
01218     for(unsigned int i = 0; i < bdy2elem.size(); i++) {
01219       allBoundaryLeaves.push_back(in[bdy2elem[i]]);
01220     }
01221 
01222     // 3. Reduce the list of global blocks to a smaller list that only has the
01223     // blocks which neighbour ones own blocks.
01224 
01225     //First mark your own blocks as being singular or not.
01226     unsigned int allBdyCnt = 0;
01227     for (unsigned int i = 0; i < blocks.size(); i++) {
01228       while( (allBdyCnt < allBoundaryLeaves.size()) &&
01229           (allBoundaryLeaves[allBdyCnt] < blocks[i]) ) {
01230         allBdyCnt++;
01231       }
01232       //Note, even if a block is Singular but is
01233       //not a ghost candidates (i.e., it is completely internal).
01234       // It will be treated as being NOT singular.
01235       bool isSingular = false;
01236       if( (allBdyCnt < allBoundaryLeaves.size()) &&
01237           (allBoundaryLeaves[allBdyCnt] == blocks[i]) ) {
01238         isSingular = true;
01239       }
01240       if(isSingular) {
01241         blocks[i].setWeight(1);
01242       }else {
01243         blocks[i].setWeight(0);
01244       }
01245     }
01246 
01247     std::vector<ot::TreeNode> myNhBlocks;
01248     for (int j = 0; j < blocks.size(); j++) {
01249       unsigned int myMaxX;
01250       unsigned int myMaxY;
01251       unsigned int myMaxZ;
01252       unsigned int myMinX;
01253       unsigned int myMinY;
01254       unsigned int myMinZ;
01255       if(blocks[j].getWeight()) {
01256         //Since the subsequent selection is done based on the octant's parent's
01257         //neighbours, Singular blocks must be handled differently.
01258         myMaxX = blocks[j].getParent().maxX();
01259         myMaxY = blocks[j].getParent().maxY();
01260         myMaxZ = blocks[j].getParent().maxZ();
01261         myMinX = blocks[j].getParent().minX();
01262         myMinY = blocks[j].getParent().minY();
01263         myMinZ = blocks[j].getParent().minZ();
01264       }else {
01265         myMaxX = blocks[j].maxX();
01266         myMaxY = blocks[j].maxY();
01267         myMaxZ = blocks[j].maxZ();
01268         myMinX = blocks[j].minX();
01269         myMinY = blocks[j].minY();
01270         myMinZ = blocks[j].minZ();
01271       }
01272       double myLenX = (double)(myMaxX-myMinX);
01273       double myLenY = (double)(myMaxY-myMinY);
01274       double myLenZ = (double)(myMaxZ-myMinZ);
01275       double myXc  = ((double)(myMinX + myMaxX))/2.0;
01276       double myYc  = ((double)(myMinY + myMaxY))/2.0;
01277       double myZc  = ((double)(myMinZ + myMaxZ))/2.0;
01278       for (int k = 0; k < allBlocks.size(); k++) {
01279         if ( (allBlocks[k] >= blocks[0]) && 
01280             (allBlocks[k] <= blocks[blocks.size()-1]) ) {
01281           //ignore my own blocks
01282           continue;
01283         }
01284         unsigned int hisMinX = allBlocks[k].minX();
01285         unsigned int hisMaxX = allBlocks[k].maxX();
01286         unsigned int hisMinY = allBlocks[k].minY();
01287         unsigned int hisMaxY = allBlocks[k].maxY();
01288         unsigned int hisMinZ = allBlocks[k].minZ();
01289         unsigned int hisMaxZ = allBlocks[k].maxZ();
01290         double hisLenX = (double)(hisMaxX-hisMinX);
01291         double hisLenY = (double)(hisMaxY-hisMinY);
01292         double hisLenZ = (double)(hisMaxZ-hisMinZ);
01293         double hisXc  = ((double)(hisMinX + hisMaxX))/2.0;
01294         double hisYc  = ((double)(hisMinY + hisMaxY))/2.0;
01295         double hisZc  = ((double)(hisMinZ + hisMaxZ))/2.0;
01296         double deltaX = ( (hisXc > myXc) ? (hisXc - myXc) : (myXc - hisXc ) );
01297         double deltaY = ( (hisYc > myYc) ? (hisYc - myYc) : (myYc - hisYc ) );
01298         double deltaZ = ( (hisZc > myZc) ? (hisZc - myZc) : (myZc - hisZc ) );
01299 
01300         //Note: This test will pass if the octants intersect (say one is an
01301         //ancestor of another) or they simply touch externally.
01302 
01303         if ((deltaX <= ((hisLenX+myLenX)/2.0)) &&
01304             (deltaY <= ((hisLenY+myLenY)/2.0)) &&
01305             (deltaZ <= ((hisLenZ+myLenZ)/2.0))) {
01306           //We touch
01307           myNhBlocks.push_back(allBlocks[k]);
01308         }//end if
01309       }//end for k
01310     }//end for j
01311 
01312     //This also sorts myNhBlocks
01313     seq::makeVectorUnique<ot::TreeNode>(myNhBlocks,false);
01314 
01315     sendNodes.resize(npes);
01316     for (int i=0; i < npes; i++) {
01317       sendNodes[i].clear();
01318       sendCnt[i] = 0;
01319     } 
01320 
01321     for (int j=0; j<allBoundaryLeaves.size(); j++) {
01322       //It is important to make the selection using the parent. This tackles
01323       //the cases where some nodes of an octant are hanging and so even if the octant itself
01324       //does not touch a processor, its parent does and so the replacement for
01325       //the hanging node will be mapped to that processor. This also handles
01326       //the case where two or more siblings belong to different processors. By
01327       //making the test using the parent they will be sent to each other.
01328       //Moreover, some of the nodes of these children could be hanging and the
01329       //replacement could belong to a third processor. Even in this case, the
01330       //octants will be sent to the correct processors. 
01331 
01332       ot::TreeNode parNode = allBoundaryLeaves[j].getParent();
01333       unsigned int myMaxX = parNode.maxX();
01334       unsigned int myMaxY = parNode.maxY();
01335       unsigned int myMaxZ = parNode.maxZ();
01336       unsigned int myMinX = parNode.minX();
01337       unsigned int myMinY = parNode.minY();
01338       unsigned int myMinZ = parNode.minZ();
01339       double myLenX = (double)(myMaxX-myMinX);
01340       double myLenY = (double)(myMaxY-myMinY);
01341       double myLenZ = (double)(myMaxZ-myMinZ);
01342       double myXc  = ((double)(myMinX + myMaxX))/2.0;
01343       double myYc  = ((double)(myMinY + myMaxY))/2.0;
01344       double myZc  = ((double)(myMinZ + myMaxZ))/2.0;
01345       unsigned int lastP = npes;
01346       for (int k = 0; k < myNhBlocks.size(); k++) {
01347         if (myNhBlocks[k].getWeight() == lastP) {
01348           continue;
01349         }
01350         unsigned int hisMinX = myNhBlocks[k].minX();
01351         unsigned int hisMaxX = myNhBlocks[k].maxX();
01352         unsigned int hisMinY = myNhBlocks[k].minY();
01353         unsigned int hisMaxY = myNhBlocks[k].maxY();
01354         unsigned int hisMinZ = myNhBlocks[k].minZ();
01355         unsigned int hisMaxZ = myNhBlocks[k].maxZ();
01356         double hisLenX = (double)(hisMaxX-hisMinX);
01357         double hisLenY = (double)(hisMaxY-hisMinY);
01358         double hisLenZ = (double)(hisMaxZ-hisMinZ);
01359         double hisXc  = ((double)(hisMinX + hisMaxX))/2.0;
01360         double hisYc  = ((double)(hisMinY + hisMaxY))/2.0;
01361         double hisZc  = ((double)(hisMinZ + hisMaxZ))/2.0;
01362         double deltaX = ( (hisXc > myXc) ? (hisXc - myXc) : (myXc - hisXc ) );
01363         double deltaY = ( (hisYc > myYc) ? (hisYc - myYc) : (myYc - hisYc ) );
01364         double deltaZ = ( (hisZc > myZc) ? (hisZc - myZc) : (myZc - hisZc ) );
01365 
01366         //Note: This test will pass if the octants intersect (say one is an
01367         //ancestor of another) or they simply touch externally.
01368 
01369         if ((deltaX <= ((hisLenX+myLenX)/2.0)) &&
01370             (deltaY <= ((hisLenY+myLenY)/2.0)) &&
01371             (deltaZ <= ((hisLenZ+myLenZ)/2.0))) {
01372           //We touch
01373           sendNodes[myNhBlocks[k].getWeight()].push_back(bdy2elem[j]); 
01374           sendCnt[myNhBlocks[k].getWeight()]++;
01375           lastP = myNhBlocks[k].getWeight();
01376         }//end if
01377       }//end for k
01378     }//end for j
01379 
01380     PROF_DA_APRIORI_COMM_END
01381   }//end function
01382 
01383   void prepareAprioriCommMessagesInDAtype2(const std::vector<ot::TreeNode>& in,
01384       std::vector<ot::TreeNode>& allBoundaryLeaves, std::vector<ot::TreeNode>& blocks,
01385       const std::vector<ot::TreeNode>& minsOfBlocks, int myRank, int npes, int* sendCnt,
01386       std::vector<std::vector<unsigned int> >& sendNodes) {
01387     PROF_DA_APRIORI_COMM_BEGIN
01388 
01389       std::vector<unsigned int> bdy2elem;
01390 
01391     unsigned int bdyCnt = 0;
01392     unsigned int allCnt = 0;
01393     unsigned int maxDepth = in[0].getMaxDepth();
01394     unsigned int dim = in[0].getDim(); 
01395 
01396     while (bdyCnt < allBoundaryLeaves.size()) {
01397       if ( allBoundaryLeaves[bdyCnt] < in[allCnt]) {
01398         bdyCnt++;
01399       } else if ( allBoundaryLeaves[bdyCnt] > in[allCnt]) {
01400         allCnt++;
01401       } else {
01402         //Both are equal.               
01403         bdy2elem.push_back(allCnt);
01404         bdyCnt++;
01405       }
01406     }
01407 
01408     //This step is necessary because some elements of allBoundaryLeaves were not
01409     //copied from the "in" vector, instead we generated them directly. So these
01410     //octants will not have correct flags set. So we find the corresponding copy
01411     //in the "in" vector.  
01412     allBoundaryLeaves.clear();
01413     for(unsigned int i = 0; i < bdy2elem.size(); i++) {
01414       allBoundaryLeaves.push_back(in[bdy2elem[i]]);
01415     }
01416 
01417     sendNodes.resize(npes);
01418     for (int i = 0; i < npes; i++) {
01419       sendNodes[i].clear();
01420       sendCnt[i] = 0;
01421     } 
01422 
01423     for (int j = 0; j < allBoundaryLeaves.size(); j++) {
01424       //It is important to make the selection using the parent. This tackles
01425       //the cases where some nodes of an octant are hanging and so even if the octant itself
01426       //does not touch a processor, its parent does and so the replacement for
01427       //the hanging node will be mapped to that processor. This also handles
01428       //the case where two or more siblings belong to different processors. By
01429       //making the test using the parent they will be sent to each other.
01430       //Moreover, some of the nodes of these children could be hanging and the
01431       //replacement could belong to a third processor. Even in this case, the
01432       //octants will be sent to the correct processors. 
01433 
01434       //1. We must not miss any pre-ghost elements that point to one of our own
01435       //octants, i.e. any pre-ghost element that we integrate over. This is
01436       //because we need to build the nlist for these octants as well.
01437       //2. We might get some extra ghosts they will be marked as FOREIGN and
01438       //will be ignored.
01439       //3. We must try to get as many post-ghost octants as possible to avoid
01440       //misses later and hence reduce subsequent communication. We should get
01441       //all direct post-ghost octants.
01442       //4. Post-ghosts are read-only and so it is sufficient to test for
01443       //post-ghost using its anchor
01444       std::vector<ot::TreeNode> myVertices;
01445       std::vector<ot::TreeNode> parVertices;
01446       std::vector<ot::TreeNode> anchorMirrors;
01447 
01448       unsigned int myX = allBoundaryLeaves[j].getX();
01449       unsigned int myY = allBoundaryLeaves[j].getY();
01450       unsigned int myZ = allBoundaryLeaves[j].getZ();
01451 
01452       //keys to check if you are a pre-ghost
01453       //Positive boundaries will not be pre-ghosts
01454       if(!(allBoundaryLeaves[j].getFlag() & ot::TreeNode::BOUNDARY)) {
01455         unsigned int myLev = allBoundaryLeaves[j].getLevel();
01456         unsigned int mySz = (1u<<(maxDepth - myLev));
01457 
01458         //All vertices except my anchor. Since my anchor belongs to my processor
01459         myVertices.push_back(ot::TreeNode((myX + mySz), myY, myZ, maxDepth, dim, maxDepth));
01460         myVertices.push_back(ot::TreeNode(myX, (myY + mySz), myZ, maxDepth, dim, maxDepth));
01461         myVertices.push_back(ot::TreeNode((myX + mySz), (myY + mySz), myZ, maxDepth, dim, maxDepth));
01462         myVertices.push_back(ot::TreeNode(myX, myY, (myZ + mySz), maxDepth, dim, maxDepth));
01463         myVertices.push_back(ot::TreeNode((myX + mySz), myY, (myZ + mySz), maxDepth, dim, maxDepth));
01464         myVertices.push_back(ot::TreeNode(myX, (myY + mySz), (myZ + mySz), maxDepth, dim, maxDepth));
01465         myVertices.push_back(ot::TreeNode((myX + mySz), (myY + mySz), (myZ + mySz), maxDepth, dim, maxDepth));
01466 
01467         ot::TreeNode parNode = allBoundaryLeaves[j].getParent();
01468         unsigned int myCnum = allBoundaryLeaves[j].getChildNumber();
01469         unsigned int parX = parNode.getX();
01470         unsigned int parY = parNode.getY();
01471         unsigned int parZ = parNode.getZ();
01472         unsigned int parLev = parNode.getLevel();
01473         unsigned int parSz = (1u<<(maxDepth - parLev));
01474 
01475         //vertices numbers myCnum and (7 - myCnum) can't be hanging and so they
01476         //will not be mapped to the corresponding vertices of my parent
01477         if( (myCnum != 0) && (myCnum != 7) ) {
01478           parVertices.push_back(ot::TreeNode(parX, parY, parZ, maxDepth, dim, maxDepth));
01479         }
01480         if( (myCnum != 1) && (myCnum != 6) ) {
01481           parVertices.push_back(ot::TreeNode((parX + parSz), parY, parZ, maxDepth, dim, maxDepth));
01482         }
01483         if( (myCnum != 2) && (myCnum != 5) ) {
01484           parVertices.push_back(ot::TreeNode(parX, (parY + parSz), parZ, maxDepth, dim, maxDepth));
01485         }
01486         if( (myCnum != 3) && (myCnum != 4) ) {
01487           parVertices.push_back(ot::TreeNode((parX + parSz), (parY + parSz), parZ, maxDepth, dim, maxDepth));
01488         }
01489         if( (myCnum != 4) && (myCnum != 3) ) {
01490           parVertices.push_back(ot::TreeNode(parX, parY, (parZ + parSz), maxDepth, dim, maxDepth));
01491         }
01492         if( (myCnum != 5) && (myCnum != 2) ) {
01493           parVertices.push_back(ot::TreeNode((parX + parSz), parY, (parZ + parSz), maxDepth, dim, maxDepth));
01494         }
01495         if( (myCnum != 6) && (myCnum != 1) ) {
01496           parVertices.push_back(ot::TreeNode(parX, (parY + parSz), (parZ + parSz), maxDepth, dim, maxDepth));
01497         }
01498         if( (myCnum != 7) && (myCnum != 0) ) {
01499           parVertices.push_back(ot::TreeNode((parX + parSz), (parY + parSz),
01500                 (parZ + parSz), maxDepth, dim, maxDepth));
01501         }
01502 
01503       }//end if positive boundary
01504 
01505       //Keys to check if you are a post-ghost
01506       //If the anchor is hanging we need not send it. When some processor searches
01507       //for this node as its primary key and does not find it, it will be
01508       //understood that this is hanging  
01509       if(allBoundaryLeaves[j].getFlag() & ot::TreeNode::NODE) {
01510         //-x
01511         if( myX ) {
01512           anchorMirrors.push_back(ot::TreeNode((myX - 1), myY, myZ, 
01513                 maxDepth, dim, maxDepth));
01514         }
01515         //-y
01516         if( myY ) {
01517           anchorMirrors.push_back(ot::TreeNode(myX, (myY - 1), myZ, 
01518                 maxDepth, dim, maxDepth));
01519         }
01520         //-z
01521         if( myZ ) {
01522           anchorMirrors.push_back(ot::TreeNode(myX, myY, (myZ - 1), 
01523                 maxDepth, dim, maxDepth));
01524         }
01525         //-xy
01526         if( myX && myY ) {
01527           anchorMirrors.push_back(ot::TreeNode((myX - 1), (myY - 1), myZ, 
01528                 maxDepth, dim, maxDepth));
01529         }
01530         //-yz
01531         if( myY && myZ ) {
01532           anchorMirrors.push_back(ot::TreeNode(myX, (myY - 1), (myZ - 1),
01533                 maxDepth, dim, maxDepth));
01534         }
01535         //-zx
01536         if( myZ && myX ) {
01537           anchorMirrors.push_back(ot::TreeNode((myX - 1), myY, (myZ - 1),
01538                 maxDepth, dim, maxDepth));
01539         }
01540         //-xyz
01541         if( myX && myY && myZ ) {
01542           anchorMirrors.push_back(ot::TreeNode((myX - 1), (myY - 1), (myZ - 1),
01543                 maxDepth, dim, maxDepth));
01544         }
01545       }//end if hanging anchor
01546 
01547       std::vector<unsigned int> pIds;
01548       for(int k = 0; k < parVertices.size(); k++) {
01549         unsigned int idx;
01550         seq::maxLowerBound<ot::TreeNode>(minsOfBlocks, parVertices[k], idx, NULL, NULL);
01551         pIds.push_back(idx);
01552       }
01553 
01554       for(int k = 0; k < anchorMirrors.size(); k++) {
01555         unsigned int idx;
01556         seq::maxLowerBound<ot::TreeNode>(minsOfBlocks, anchorMirrors[k], idx, NULL, NULL);
01557         pIds.push_back(idx);
01558       }
01559 
01560       for(int k = 0; k < myVertices.size(); k++) {
01561         unsigned int idx;
01562         seq::maxLowerBound<ot::TreeNode>(minsOfBlocks, myVertices[k], idx, NULL, NULL);
01563         pIds.push_back(idx);
01564       }//end for k
01565 
01566       //Do not send the same octant to the same processor twice 
01567       seq::makeVectorUnique<unsigned int>(pIds, false);
01568 
01569       for(int k = 0; k < pIds.size(); k++) {
01570         //Send to processor pIds[k] 
01571         //Only send to other processors  
01572         if(pIds[k] != myRank) {
01573           sendNodes[pIds[k]].push_back(bdy2elem[j]); 
01574           sendCnt[pIds[k]]++;
01575         }
01576       }//end for k
01577     }//end for j
01578 
01579     PROF_DA_APRIORI_COMM_END
01580   }//end function
01581 
01582   void DA_Initialize(MPI_Comm comm) {
01583     PROF_DA_INIT_BEGIN 
01584 
01585 #ifdef __USE_MG_INIT_TYPE3__
01586       createShapeFnCoeffs_Type3(comm);
01587 #else
01588 #ifdef __USE_MG_INIT_TYPE2__
01589     createShapeFnCoeffs_Type2(comm);
01590 #else
01591     createShapeFnCoeffs_Type1(comm);
01592 #endif
01593 #endif
01594 
01595     PROF_DA_INIT_END 
01596   }
01597 
01598   void DA_Finalize() {
01599     PROF_DA_FINAL_BEGIN 
01600 
01601       for(unsigned int cNum = 0; cNum < 8; cNum++) {
01602         for(unsigned int eType = 0; eType < 18; eType++) {
01603           for(unsigned int i = 0; i < 8; i++) {
01604             delete [] (ShapeFnCoeffs[cNum][eType][i]);
01605             ShapeFnCoeffs[cNum][eType][i] = NULL;
01606           }
01607           delete [] (ShapeFnCoeffs[cNum][eType]);
01608           ShapeFnCoeffs[cNum][eType] = NULL;
01609         }
01610         delete [] (ShapeFnCoeffs[cNum]);
01611         ShapeFnCoeffs[cNum] = NULL;
01612       }
01613 
01614     delete [] ShapeFnCoeffs;
01615     ShapeFnCoeffs = NULL;
01616 
01617     PROF_DA_FINAL_END 
01618   }
01619 
01620   int createShapeFnCoeffs_Type3(MPI_Comm comm) {
01621     FILE* infile;
01622     int rank;
01623     MPI_Comm_rank(comm, &rank); 
01624     char fname[250];
01625     sprintf(fname,"ShapeFnCoeffs_%d.inp", rank);
01626     infile = fopen(fname,"r");
01627     if(!infile) {
01628       std::cout<<"The file "<<fname<<" is not good for reading."<<std::endl;
01629       assert(false);
01630     }
01631 
01632     typedef double* doublePtr;
01633     typedef doublePtr* double2Ptr;
01634     typedef double2Ptr* double3Ptr;
01635 
01636     ShapeFnCoeffs = new double3Ptr[8];
01637     for(unsigned int cNum = 0; cNum < 8; cNum++) {
01638       ShapeFnCoeffs[cNum] = new double2Ptr[18];
01639       for(unsigned int eType = 0; eType < 18; eType++) {
01640         ShapeFnCoeffs[cNum][eType] = new doublePtr[8];
01641         for(unsigned int i = 0; i < 8; i++) {
01642           ShapeFnCoeffs[cNum][eType][i] = new double[8];
01643           for(unsigned int j = 0; j < 8; j++) {
01644             fscanf(infile,"%lf",&(ShapeFnCoeffs[cNum][eType][i][j]));
01645           }
01646         }
01647       }
01648     }
01649 
01650     fclose(infile);
01651     return 1;
01652   }
01653 
01654   int createShapeFnCoeffs_Type2(MPI_Comm comm) {
01655     FILE* infile;
01656 
01657     int rank, npes;
01658     MPI_Comm_rank(comm, &rank);
01659     MPI_Comm_size(comm, &npes);
01660 
01661     const int THOUSAND = 1000;
01662     int numGroups = (npes/THOUSAND);
01663     if( (numGroups*THOUSAND) < npes) {
01664       numGroups++;
01665     }
01666 
01667     MPI_Comm newComm;
01668 
01669     bool* isEmptyList = new bool[npes];
01670     for(int i = 0; i < numGroups; i++) {
01671       for(int j = 0; (j < (i*THOUSAND)) && (j < npes); j++) {
01672         isEmptyList[j] = true;
01673       }
01674       for(int j = (i*THOUSAND); (j < ((i+1)*THOUSAND)) && (j < npes); j++) {
01675         isEmptyList[j] = false;
01676       }
01677       for(int j = ((i + 1)*THOUSAND); j < npes; j++) {
01678         isEmptyList[j] = true;
01679       }
01680       MPI_Comm tmpComm;
01681       par::splitComm2way(isEmptyList, &tmpComm, comm);
01682       if(!(isEmptyList[rank])) {
01683         newComm = tmpComm;
01684       }
01685     }//end for i
01686     delete [] isEmptyList;
01687 
01688     if((rank % THOUSAND) == 0) {
01689       char fname[250];
01690       sprintf(fname,"ShapeFnCoeffs_%d.inp", (rank/THOUSAND));
01691       infile = fopen(fname,"r");
01692       if(!infile) {
01693         std::cout<<"The file "<<fname<<" is not good for reading."<<std::endl;
01694         assert(false);
01695       }
01696     }
01697 
01698     typedef double* doublePtr;
01699     typedef doublePtr* double2Ptr;
01700     typedef double2Ptr* double3Ptr;
01701 
01702     ShapeFnCoeffs = new double3Ptr[8];
01703     for(unsigned int cNum = 0; cNum < 8; cNum++) {
01704       ShapeFnCoeffs[cNum] = new double2Ptr[18];
01705       for(unsigned int eType = 0; eType < 18; eType++) {
01706         ShapeFnCoeffs[cNum][eType] = new doublePtr[8];
01707         for(unsigned int i = 0; i < 8; i++) {
01708           ShapeFnCoeffs[cNum][eType][i] = new double[8];
01709           if((rank % THOUSAND) == 0){
01710             for(unsigned int j = 0; j < 8; j++) {
01711               fscanf(infile,"%lf",&(ShapeFnCoeffs[cNum][eType][i][j]));
01712             }
01713           }
01714         }
01715       }
01716     }
01717 
01718     if((rank % THOUSAND) == 0){
01719       fclose(infile);
01720     }
01721 
01722     double * tmpMat = new double[9216];
01723 
01724     if((rank % THOUSAND) == 0) {
01725       unsigned int ctr = 0;
01726       for(unsigned int cNum = 0; cNum < 8; cNum++) {
01727         for(unsigned int eType = 0; eType < 18; eType++) {
01728           for(unsigned int i = 0; i < 8; i++) {
01729             for(unsigned int j = 0; j < 8; j++) {
01730               tmpMat[ctr] = ShapeFnCoeffs[cNum][eType][i][j];
01731               ctr++;
01732             }
01733           }
01734         }
01735       }
01736     }
01737 
01738     par::Mpi_Bcast<double>(tmpMat,9216, 0, newComm);
01739 
01740     if((rank % THOUSAND) != 0) {
01741       unsigned int ctr = 0;
01742       for(unsigned int cNum = 0; cNum < 8; cNum++) {
01743         for(unsigned int eType = 0; eType < 18; eType++) {
01744           for(unsigned int i = 0; i < 8; i++) {
01745             for(unsigned int j = 0; j < 8; j++) {
01746               ShapeFnCoeffs[cNum][eType][i][j] = tmpMat[ctr];
01747               ctr++;
01748             }
01749           }
01750         }
01751       }
01752     }
01753 
01754     delete [] tmpMat;
01755 
01756     return 1;
01757   }//end of function
01758 
01759   int createShapeFnCoeffs_Type1(MPI_Comm comm) {
01760     FILE* infile;
01761     int rank;
01762     MPI_Comm_rank(comm, &rank);
01763     if(!rank) {
01764       char fname[250];
01765       sprintf(fname,"ShapeFnCoeffs.inp");
01766       infile = fopen(fname,"r");
01767       if(!infile) {
01768         std::cout<<"The file "<<fname<<" is not good for reading."<<std::endl;
01769         assert(false);
01770       }
01771     }
01772 
01773     typedef double* doublePtr;
01774     typedef doublePtr* double2Ptr;
01775     typedef double2Ptr* double3Ptr;
01776 
01777     ShapeFnCoeffs = new double3Ptr[8];
01778     for(unsigned int cNum = 0; cNum < 8; cNum++) {
01779       ShapeFnCoeffs[cNum] = new double2Ptr[18];
01780       for(unsigned int eType = 0; eType < 18; eType++) {
01781         ShapeFnCoeffs[cNum][eType] = new doublePtr[8];
01782         for(unsigned int i = 0; i < 8; i++) {
01783           ShapeFnCoeffs[cNum][eType][i] = new double[8];
01784           if(!rank){
01785             for(unsigned int j = 0; j < 8; j++) {
01786               fscanf(infile,"%lf",&(ShapeFnCoeffs[cNum][eType][i][j]));
01787             }
01788           }
01789         }
01790       }
01791     }
01792 
01793     if(!rank){
01794       fclose(infile);
01795     }
01796 
01797     double * tmpMat = new double[9216];
01798 
01799     if(!rank) {
01800       unsigned int ctr = 0;
01801       for(unsigned int cNum = 0; cNum < 8; cNum++) {
01802         for(unsigned int eType = 0; eType < 18; eType++) {
01803           for(unsigned int i = 0; i < 8; i++) {
01804             for(unsigned int j = 0; j < 8; j++) {
01805               tmpMat[ctr] = ShapeFnCoeffs[cNum][eType][i][j];
01806               ctr++;
01807             }
01808           }
01809         }
01810       }
01811     }
01812 
01813     par::Mpi_Bcast<double>(tmpMat,9216, 0, comm);
01814 
01815     if(rank) {
01816       unsigned int ctr = 0;
01817       for(unsigned int cNum = 0; cNum < 8; cNum++) {
01818         for(unsigned int eType = 0; eType < 18; eType++) {
01819           for(unsigned int i = 0; i < 8; i++) {
01820             for(unsigned int j = 0; j < 8; j++) {
01821               ShapeFnCoeffs[cNum][eType][i][j] = tmpMat[ctr];
01822               ctr++;
01823             }
01824           }
01825         }
01826       }
01827     }
01828 
01829     delete [] tmpMat;
01830 
01831     return 1;
01832   }//end of function
01833 
01834 }//end namespace
01835 
01836 

Generated on Tue Mar 24 16:14:05 2009 for DENDRO by  doxygen 1.3.9.1