00001
00002 #include "mpi.h"
00003 #include "petsc.h"
00004 #include "sys.h"
00005 #include <vector>
00006 #include "TreeNode.h"
00007 #include "parUtils.h"
00008 #include "oda.h"
00009 #include "handleStencils.h"
00010 #include "odaJac.h"
00011 #include "colors.h"
00012 #include <cstdlib>
00013 #include <cstring>
00014 #include "externVars.h"
00015 #include "dendro.h"
00016
00017
00018 #ifdef MPI_WTIME_IS_GLOBAL
00019 #undef MPI_WTIME_IS_GLOBAL
00020 #endif
00021
00022 #ifdef PETSC_USE_LOG
00023
00024 int Jac1DiagEvent;
00025 int Jac1MultEvent;
00026 int Jac1FinestDiagEvent;
00027 int Jac1FinestMultEvent;
00028 #endif
00029
00030 double**** LaplacianType2Stencil;
00031 double**** MassType2Stencil;
00032
00033 int main(int argc, char ** argv ) {
00034 int size, rank;
00035 bool incCorner = 1;
00036 unsigned int numPts;
00037 unsigned int solveU = 0;
00038 unsigned int writeB = 0;
00039 unsigned int numLoops = 100;
00040 char Kstr[20];
00041 char pFile[50],bFile[50],uFile[50];
00042 double gSize[3];
00043 unsigned int ptsLen;
00044 unsigned int maxNumPts= 1;
00045 unsigned int dim=3;
00046 unsigned int maxDepth=30;
00047 bool compressLut=true;
00048 double localTime, totalTime;
00049 double startTime, endTime;
00050 DendroIntL locSz, totalSz;
00051 std::vector<ot::TreeNode> linOct, balOct;
00052 std::vector<double> pts;
00053
00054 PetscInitialize(&argc,&argv,"options",NULL);
00055 ot::RegisterEvents();
00056 ot::DA_Initialize(MPI_COMM_WORLD);
00057
00058 #ifdef PETSC_USE_LOG
00059 PetscLogEventRegister(&Jac1DiagEvent,"ODAmatDiag",PETSC_VIEWER_COOKIE);
00060 PetscLogEventRegister(&Jac1MultEvent,"ODAmatMult",PETSC_VIEWER_COOKIE);
00061 PetscLogEventRegister(&Jac1FinestDiagEvent,"ODAmatDiagFinest",PETSC_VIEWER_COOKIE);
00062 PetscLogEventRegister(&Jac1FinestMultEvent,"ODAmatMultFinest",PETSC_VIEWER_COOKIE);
00063 int stages[4];
00064 PetscLogStageRegister(&stages[0],"P2O.");
00065 PetscLogStageRegister(&stages[1],"Bal");
00066 PetscLogStageRegister(&stages[2],"ODACreate");
00067 PetscLogStageRegister(&stages[3],"MatVec");
00068 #endif
00069
00070 MPI_Comm_size(MPI_COMM_WORLD,&size);
00071 MPI_Comm_rank(MPI_COMM_WORLD,&rank);
00072 if(argc < 3) {
00073 std::cerr << "Usage: " << argv[0] << "inpfile maxDepth[30] solveU[0]\
00074 writeB[0] dim[3] maxNumPtsPerOctant[1] incCorner[1] numLoops[100] compressLut[1] " << std::endl;
00075 return -1;
00076 }
00077 if(argc > 2) {
00078 maxDepth = atoi(argv[2]);
00079 }
00080 if(argc > 3) {
00081 solveU = atoi(argv[3]);
00082 }
00083 if(argc > 4) {
00084 writeB = atoi(argv[4]);
00085 }
00086 if(argc > 5) {
00087 dim = atoi(argv[5]);
00088 }
00089 if(argc > 6) {
00090 maxNumPts = atoi(argv[6]);
00091 }
00092 if(argc > 7) { incCorner = (bool)(atoi(argv[7]));}
00093 if(argc > 8) { numLoops = atoi(argv[8]); }
00094 if(argc > 9) { compressLut = (bool)(atoi(argv[9]));}
00095
00096 strcpy(bFile,argv[1]);
00097 ot::int2str(rank,Kstr);
00098 strcat(bFile,Kstr);
00099 strcat(bFile,"_\0");
00100 ot::int2str(size,Kstr);
00101 strcat(bFile,Kstr);
00102 strcpy(pFile,bFile);
00103 strcpy(uFile,bFile);
00104 strcat(bFile,"_Bal.ot\0");
00105 strcat(pFile,".pts\0");
00106 strcat(uFile,".sol\0");
00107
00108
00109 if(!rank){
00110 std::cout << " reading "<<pFile<<std::endl;
00111 }
00112 ot::readPtsFromFile(pFile, pts);
00113 if(!rank){
00114 std::cout << " finished reading "<<pFile<<std::endl;
00115 }
00116 ptsLen = pts.size();
00117 std::vector<ot::TreeNode> tmpNodes;
00118 for(int i=0;i<ptsLen;i+=3) {
00119 if( (pts[i] > 0.0) &&
00120 (pts[i+1] > 0.0)
00121 && (pts[i+2] > 0.0) &&
00122 ( ((unsigned int)(pts[i]*((double)(1u << maxDepth)))) < (1u << maxDepth)) &&
00123 ( ((unsigned int)(pts[i+1]*((double)(1u << maxDepth)))) < (1u << maxDepth)) &&
00124 ( ((unsigned int)(pts[i+2]*((double)(1u << maxDepth)))) < (1u << maxDepth)) ) {
00125 #ifdef __DEBUG__
00126 assert((i+2) < ptsLen);
00127 #endif
00128 tmpNodes.push_back( ot::TreeNode((unsigned int)(pts[i]*(double)(1u << maxDepth)),
00129 (unsigned int)(pts[i+1]*(double)(1u << maxDepth)),
00130 (unsigned int)(pts[i+2]*(double)(1u << maxDepth)),
00131 maxDepth,dim,maxDepth) );
00132 }
00133 }
00134 pts.clear();
00135 par::removeDuplicates<ot::TreeNode>(tmpNodes,false,MPI_COMM_WORLD);
00136 linOct = tmpNodes;
00137 tmpNodes.clear();
00138 par::partitionW<ot::TreeNode>(linOct, NULL,MPI_COMM_WORLD);
00139
00140 locSz = linOct.size();
00141 par::Mpi_Reduce<DendroIntL>(&locSz, &totalSz, 1, MPI_SUM, 0, MPI_COMM_WORLD);
00142 if(rank==0) {
00143 std::cout<<"# pts= " << totalSz<<std::endl;
00144 }
00145
00146 pts.resize(3*(linOct.size()));
00147 ptsLen = (3*(linOct.size()));
00148 for(int i=0;i<linOct.size();i++) {
00149 pts[3*i] = (((double)(linOct[i].getX())) + 0.5)/((double)(1u << maxDepth));
00150 pts[(3*i)+1] = (((double)(linOct[i].getY())) +0.5)/((double)(1u << maxDepth));
00151 pts[(3*i)+2] = (((double)(linOct[i].getZ())) +0.5)/((double)(1u << maxDepth));
00152 }
00153 linOct.clear();
00154 gSize[0] = 1.;
00155 gSize[1] = 1.;
00156 gSize[2] = 1.;
00157
00158 MPI_Barrier(MPI_COMM_WORLD);
00159 #ifdef PETSC_USE_LOG
00160 PetscLogStagePush(stages[0]);
00161 #endif
00162 startTime = MPI_Wtime();
00163 ot::points2Octree(pts, gSize, linOct, dim, maxDepth, maxNumPts, MPI_COMM_WORLD);
00164 endTime = MPI_Wtime();
00165 #ifdef PETSC_USE_LOG
00166 PetscLogStagePop();
00167 #endif
00168 localTime = endTime - startTime;
00169 par::Mpi_Reduce<double>(&localTime, &totalTime, 1, MPI_MAX, 0, MPI_COMM_WORLD);
00170 if(!rank){
00171 std::cout <<"P2n Time: "<<totalTime << std::endl;
00172 }
00173
00174 locSz = linOct.size();
00175 par::Mpi_Reduce<DendroIntL>(&locSz, &totalSz, 1, MPI_SUM, 0, MPI_COMM_WORLD);
00176 if(rank==0) {
00177 std::cout<<"# of Unbalanced Octants: " << totalSz<<std::endl;
00178 }
00179 pts.clear();
00180
00181
00182 MPI_Barrier(MPI_COMM_WORLD);
00183 #ifdef PETSC_USE_LOG
00184 PetscLogStagePush(stages[1]);
00185 #endif
00186 startTime = MPI_Wtime();
00187 ot::balanceOctree (linOct, balOct, dim, maxDepth, incCorner, MPI_COMM_WORLD, NULL, NULL);
00188 endTime = MPI_Wtime();
00189 #ifdef PETSC_USE_LOG
00190 PetscLogStagePop();
00191 #endif
00192 linOct.clear();
00193 if(writeB) {
00194 ot::writeNodesToFile(bFile,balOct);
00195 }
00196
00197 locSz = balOct.size();
00198 localTime = endTime - startTime;
00199 par::Mpi_Reduce<DendroIntL>(&locSz, &totalSz, 1, MPI_SUM, 0, MPI_COMM_WORLD);
00200 par::Mpi_Reduce<double>(&localTime, &totalTime, 1, MPI_MAX, 0, MPI_COMM_WORLD);
00201
00202 if(!rank) {
00203 std::cout << "# of Balanced Octants: "<< totalSz << std::endl;
00204 std::cout << "bal Time: "<<totalTime << std::endl;
00205 }
00206
00207
00208 MPI_Barrier(MPI_COMM_WORLD);
00209 #ifdef PETSC_USE_LOG
00210 PetscLogStagePush(stages[2]);
00211 #endif
00212 startTime = MPI_Wtime();
00213 assert(!(balOct.empty()));
00214 ot::DA da(balOct, MPI_COMM_WORLD, MPI_COMM_WORLD, compressLut);
00215 endTime = MPI_Wtime();
00216 #ifdef PETSC_USE_LOG
00217 PetscLogStagePop();
00218 #endif
00219 balOct.clear();
00220
00221 locSz = da.getNodeSize();
00222 localTime = endTime - startTime;
00223 par::Mpi_Reduce<DendroIntL>(&locSz, &totalSz, 1, MPI_SUM, 0, MPI_COMM_WORLD);
00224 par::Mpi_Reduce<double>(&localTime, &totalTime, 1, MPI_MAX, 0, MPI_COMM_WORLD);
00225
00226 if(!rank) {
00227 std::cout << "Total # Vertices: "<< totalSz << std::endl;
00228 std::cout << "Time to build ODA: "<<totalTime << std::endl;
00229 }
00230
00231 MPI_Barrier(MPI_COMM_WORLD);
00232 #ifdef PETSC_USE_LOG
00233 PetscLogStagePush(stages[3]);
00234 #endif
00235
00236 Mat J;
00237 Vec in, out, diag;
00238 PetscScalar zero = 0.0;
00239
00240
00241 da.createVector(in,false,false,1);
00242 da.createVector(out,false,false,1);
00243 da.createVector(diag,false,false,1);
00244
00245 createLmatType2(LaplacianType2Stencil);
00246 createMmatType2(MassType2Stencil);
00247 if(!rank) {
00248 std::cout << "Created stencils."<< std::endl;
00249 }
00250
00251 if(!rank) {
00252 std::cout<<rank << " Creating Jacobian" << std::endl;
00253 }
00254
00255 iC(CreateJacobian1(&da,&J));
00256
00257 if(!rank) {
00258 std::cout<<rank << " Computing Jacobian" << std::endl;
00259 }
00260
00261 iC(ComputeJacobian1(&da,J));
00262
00263 if(!rank) {
00264 std::cout<<rank << " Finished computing Jacobian" << std::endl;
00265 }
00266
00267 VecSet(in, zero);
00268
00269 for(unsigned int i=0;i<numLoops;i++) {
00270 iC(Jacobian1MatGetDiagonal(J, diag));
00271 iC(Jacobian1MatMult(J, in, out));
00272 }
00273
00274 VecDestroy(in);
00275 VecDestroy(out);
00276 VecDestroy(diag);
00277
00278 iC(Jacobian1MatDestroy(J));
00279
00280 destroyLmatType2(LaplacianType2Stencil);
00281 destroyMmatType2(MassType2Stencil);
00282
00283 if(!rank) {
00284 std::cout << "Destroyed stencils."<< std::endl;
00285 }
00286
00287 #ifdef PETSC_USE_LOG
00288 PetscLogStagePop();
00289 #endif
00290 if (!rank) {
00291 std::cout << GRN << "Finalizing PETSC" << NRM << std::endl;
00292 }
00293 ot::DA_Finalize();
00294 PetscFinalize();
00295 }
00296