00001
00002 #include "mpi.h"
00003 #include "petsc.h"
00004 #include "sys.h"
00005 #include "parUtils.h"
00006 #include "octUtils.h"
00007 #include "TreeNode.h"
00008 #include "omg.h"
00009 #include "handleStencils.h"
00010 #include "elasticityJac.h"
00011 #include "omgJac.h"
00012 #include <cstdlib>
00013 #include "externVars.h"
00014 #include "dendro.h"
00015
00016 #ifdef MPI_WTIME_IS_GLOBAL
00017 #undef MPI_WTIME_IS_GLOBAL
00018 #endif
00019
00020 #ifndef iC
00021 #define iC(fun) {CHKERRQ(fun);}
00022 #endif
00023
00024 #ifdef PETSC_USE_LOG
00025 int elasticityDiagEvent;
00026 int elasticityMultEvent;
00027 int elasticityFinestDiagEvent;
00028 int elasticityFinestMultEvent;
00029
00030 int vecMassDiagEvent;
00031 int vecMassMultEvent;
00032 int vecMassFinestDiagEvent;
00033 int vecMassFinestMultEvent;
00034 #endif
00035
00036 #ifdef PETSC_USE_LOG
00037
00038 int Jac1DiagEvent;
00039 int Jac1MultEvent;
00040 int Jac1FinestDiagEvent;
00041 int Jac1FinestMultEvent;
00042
00043 int Jac2DiagEvent;
00044 int Jac2MultEvent;
00045 int Jac2FinestDiagEvent;
00046 int Jac2FinestMultEvent;
00047
00048 int Jac3DiagEvent;
00049 int Jac3MultEvent;
00050 int Jac3FinestDiagEvent;
00051 int Jac3FinestMultEvent;
00052 #endif
00053
00054 double***** LaplacianType1Stencil;
00055 double**** LaplacianType2Stencil;
00056 double***** MassType1Stencil;
00057 double**** MassType2Stencil;
00058 double****** ShapeFnStencil;
00059
00060 double**** GradDivType2Stencil;
00061
00062 double gaussian(double mean, double std_deviation);
00063
00064 int main(int argc, char ** argv ) {
00065 int size, rank;
00066 double startTime, endTime;
00067 double localTime, globalTime;
00068 double gSize[3];
00069 unsigned int local_num_pts = 5000;
00070 unsigned int dim = 3;
00071 unsigned int maxDepth = 30;
00072 unsigned int maxNumPts = 1;
00073 bool writePts = false;
00074 bool incCorner = 1;
00075 bool compressLut = false;
00076 double mgLoadFac = 2.0;
00077 unsigned int numLoops = 100;
00078 std::vector<ot::TreeNode> linOct;
00079 std::vector<ot::TreeNode> balOct;
00080 std::vector<ot::TreeNode> tmpNodes;
00081 std::vector<double> pts;
00082 unsigned int ptsLen;
00083 DendroIntL localSz, totalSz;
00084
00085 PetscInitialize(&argc, &argv, "options", NULL);
00086 ot::RegisterEvents();
00087
00088 #ifdef PETSC_USE_LOG
00089 PetscLogEventRegister(&Jac1DiagEvent,"ODAmatDiag",PETSC_VIEWER_COOKIE);
00090 PetscLogEventRegister(&Jac1MultEvent,"ODAmatMult",PETSC_VIEWER_COOKIE);
00091 PetscLogEventRegister(&Jac1FinestDiagEvent,"ODAmatDiagFinest",PETSC_VIEWER_COOKIE);
00092 PetscLogEventRegister(&Jac1FinestMultEvent,"ODAmatMultFinest",PETSC_VIEWER_COOKIE);
00093
00094 PetscLogEventRegister(&Jac2DiagEvent,"OMGmatDiag-2",PETSC_VIEWER_COOKIE);
00095 PetscLogEventRegister(&Jac2MultEvent,"OMGmatMult-2",PETSC_VIEWER_COOKIE);
00096 PetscLogEventRegister(&Jac2FinestDiagEvent,"OMGmatDiagFinest-2",PETSC_VIEWER_COOKIE);
00097 PetscLogEventRegister(&Jac2FinestMultEvent,"OMGmatMultFinest-2",PETSC_VIEWER_COOKIE);
00098
00099 PetscLogEventRegister(&Jac3DiagEvent,"OMGmatDiag-3",PETSC_VIEWER_COOKIE);
00100 PetscLogEventRegister(&Jac3MultEvent,"OMGmatMult-3",PETSC_VIEWER_COOKIE);
00101 PetscLogEventRegister(&Jac3FinestDiagEvent,"OMGmatDiagFinest-3",PETSC_VIEWER_COOKIE);
00102 PetscLogEventRegister(&Jac3FinestMultEvent,"OMGmatMultFinest-3",PETSC_VIEWER_COOKIE);
00103 PetscLogEventRegister(&vecMassDiagEvent,"vMassDiag",PETSC_VIEWER_COOKIE);
00104 PetscLogEventRegister(&vecMassMultEvent,"vMassMult",PETSC_VIEWER_COOKIE);
00105 PetscLogEventRegister(&vecMassFinestDiagEvent,"vMassDiagFinest",PETSC_VIEWER_COOKIE);
00106 PetscLogEventRegister(&vecMassFinestMultEvent,"vMassMultFinest",PETSC_VIEWER_COOKIE);
00107
00108 PetscLogEventRegister(&elasticityDiagEvent,"elDiag",PETSC_VIEWER_COOKIE);
00109 PetscLogEventRegister(&elasticityMultEvent,"elMult",PETSC_VIEWER_COOKIE);
00110 PetscLogEventRegister(&elasticityFinestDiagEvent,"elDiagFinest",PETSC_VIEWER_COOKIE);
00111 PetscLogEventRegister(&elasticityFinestMultEvent,"elMultFinest",PETSC_VIEWER_COOKIE);
00112
00113 int stages[3];
00114 PetscLogStageRegister(&stages[0],"P2O.");
00115 PetscLogStageRegister(&stages[1],"Bal");
00116 PetscLogStageRegister(&stages[2],"Solve");
00117 #endif
00118
00119 MPI_Comm_size(MPI_COMM_WORLD, &size);
00120 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
00121
00122
00123
00124
00125 if(argc > 1) {
00126 local_num_pts = atoi(argv[1]);
00127 }
00128 if(argc > 2) {
00129 dim = atoi(argv[2]);
00130 }
00131 if(argc > 3) {
00132 maxDepth = atoi(argv[3]);
00133 }
00134 if(argc > 4) {
00135 maxNumPts = atoi(argv[4]);
00136 }
00137 if(argc > 5) {
00138 incCorner = (bool)(atoi(argv[5]));
00139 }
00140 if(argc > 6) {
00141 compressLut = (bool)(atoi(argv[6]));
00142 }
00143 if(argc > 7) {
00144 mgLoadFac = atof(argv[7]);
00145 }
00146 if(argc > 8) {
00147 numLoops = atoi(argv[8]);
00148 }
00149
00150 MPI_Barrier(MPI_COMM_WORLD);
00151 if(!rank){
00152 std::cout << "Creating points on the fly... "<<std::endl;
00153 }
00154
00155 startTime = MPI_Wtime();
00156
00157 srand48(0x12345678 + 76543*rank);
00158
00159 pts.resize(3*local_num_pts);
00160 for(unsigned int i = 0; i < (3*local_num_pts); i++) {
00161 pts[i]= gaussian(0.5, 0.16);
00162 }
00163
00164 MPI_Barrier(MPI_COMM_WORLD);
00165 if(!rank){
00166 std::cout << " finished creating points "<<std::endl;
00167 }
00168 endTime = MPI_Wtime();
00169 localTime = endTime - startTime;
00170 if(!rank){
00171 std::cout <<"point generation time: "<<localTime << std::endl;
00172 }
00173
00174 ptsLen = pts.size();
00175
00176 if(writePts) {
00177 char ptsFileName[100];
00178 sprintf(ptsFileName, "tempPts%d_%d.pts", rank, size);
00179 ot::writePtsToFile(ptsFileName, pts);
00180 }
00181
00182 for(unsigned int i = 0; i < ptsLen; i+=3) {
00183 if( (pts[i] > 0.0) &&
00184 (pts[i+1] > 0.0)
00185 && (pts[i+2] > 0.0) &&
00186 ( ((unsigned int)(pts[i]*((double)(1u << maxDepth)))) < (1u << maxDepth)) &&
00187 ( ((unsigned int)(pts[i+1]*((double)(1u << maxDepth)))) < (1u << maxDepth)) &&
00188 ( ((unsigned int)(pts[i+2]*((double)(1u << maxDepth)))) < (1u << maxDepth)) ) {
00189 tmpNodes.push_back( ot::TreeNode((unsigned int)(pts[i]*(double)(1u << maxDepth)),
00190 (unsigned int)(pts[i+1]*(double)(1u << maxDepth)),
00191 (unsigned int)(pts[i+2]*(double)(1u << maxDepth)),
00192 maxDepth,dim,maxDepth) );
00193 }
00194 }
00195 pts.clear();
00196
00197 MPI_Barrier(MPI_COMM_WORLD);
00198
00199 if(!rank){
00200 std::cout <<"Removing bad points... "<<localTime << std::endl;
00201 }
00202
00203 par::removeDuplicates<ot::TreeNode>(tmpNodes, false, MPI_COMM_WORLD);
00204 linOct = tmpNodes;
00205 tmpNodes.clear();
00206
00207 MPI_Barrier(MPI_COMM_WORLD);
00208
00209 if(!rank){
00210 std::cout <<"Partitioning Input... "<<localTime << std::endl;
00211 }
00212
00213 par::partitionW<ot::TreeNode>(linOct, NULL, MPI_COMM_WORLD);
00214
00215
00216 localSz = linOct.size();
00217 par::Mpi_Reduce<DendroIntL>(&localSz, &totalSz, 1, MPI_SUM, 0, MPI_COMM_WORLD);
00218
00219 MPI_Barrier(MPI_COMM_WORLD);
00220
00221 if(!rank) {
00222 std::cout<<"# pts= " << totalSz<<std::endl;
00223 }
00224
00225 MPI_Barrier(MPI_COMM_WORLD);
00226
00227 pts.resize(3*(linOct.size()));
00228 ptsLen = (3*(linOct.size()));
00229 for(int i = 0; i < linOct.size(); i++) {
00230 pts[3*i] = (((double)(linOct[i].getX())) + 0.5)/((double)(1u << maxDepth));
00231 pts[(3*i)+1] = (((double)(linOct[i].getY())) +0.5)/((double)(1u << maxDepth));
00232 pts[(3*i)+2] = (((double)(linOct[i].getZ())) +0.5)/((double)(1u << maxDepth));
00233 }
00234 linOct.clear();
00235
00236 gSize[0] = 1.0;
00237 gSize[1] = 1.0;
00238 gSize[2] = 1.0;
00239
00240 MPI_Barrier(MPI_COMM_WORLD);
00241
00242 #ifdef PETSC_USE_LOG
00243 PetscLogStagePush(stages[0]);
00244 #endif
00245 startTime = MPI_Wtime();
00246 ot::points2Octree(pts, gSize, linOct, dim, maxDepth, maxNumPts, MPI_COMM_WORLD);
00247 endTime = MPI_Wtime();
00248 localTime = endTime - startTime;
00249 #ifdef PETSC_USE_LOG
00250 PetscLogStagePop();
00251 #endif
00252 par::Mpi_Reduce<double>(&localTime, &globalTime, 1, MPI_MAX, 0, MPI_COMM_WORLD);
00253 if(!rank){
00254 std::cout <<"P2n Time: "<<globalTime << "secs " << std::endl;
00255 }
00256 pts.clear();
00257
00258 localSz = linOct.size();
00259 par::Mpi_Reduce<DendroIntL>(&localSz, &totalSz, 1, MPI_SUM, 0, MPI_COMM_WORLD);
00260 if(rank==0) {
00261 std::cout<<"linOct.size = " << totalSz<<std::endl;
00262 }
00263
00264 MPI_Barrier(MPI_COMM_WORLD);
00265
00266 #ifdef PETSC_USE_LOG
00267 PetscLogStagePush(stages[1]);
00268 #endif
00269 startTime = MPI_Wtime();
00270 ot::balanceOctree (linOct, balOct, dim, maxDepth, incCorner, MPI_COMM_WORLD, NULL, NULL);
00271 endTime = MPI_Wtime();
00272 localTime = endTime - startTime;
00273 #ifdef PETSC_USE_LOG
00274 PetscLogStagePop();
00275 #endif
00276 par::Mpi_Reduce<double>(&localTime, &globalTime, 1, MPI_MAX, 0, MPI_COMM_WORLD);
00277 if(!rank){
00278 std::cout <<"Bal Time: "<<globalTime << "secs " << std::endl;
00279 }
00280 linOct.clear();
00281
00282 localSz = balOct.size();
00283 par::Mpi_Reduce<DendroIntL>(&localSz, &totalSz, 1, MPI_SUM, 0, MPI_COMM_WORLD);
00284 if(rank==0) {
00285 std::cout<<"balOct.size = " << totalSz<<std::endl;
00286 }
00287
00288 #ifdef PETSC_USE_LOG
00289 PetscLogStagePush(stages[2]);
00290 #endif
00291 ot::DAMG_Initialize(MPI_COMM_WORLD);
00292
00293 ot::DAMG *damg;
00294 int nlevels = 1;
00295 unsigned int dof = 3;
00296
00297 ot::DAMGCreateAndSetDA(PETSC_COMM_WORLD, nlevels, NULL, &damg,
00298 balOct, dof, mgLoadFac, compressLut, incCorner);
00299
00300 balOct.clear();
00301
00302 MPI_Barrier(MPI_COMM_WORLD);
00303 ot::PrintDAMG(damg);
00304 MPI_Barrier(MPI_COMM_WORLD);
00305
00306 createLmatType2(LaplacianType2Stencil);
00307 createMmatType2(MassType2Stencil);
00308 createGDmatType2(GradDivType2Stencil);
00309
00310 ot::DAMGCreateSuppressedDOFs(damg);
00311
00312 SetElasticityContexts(damg);
00313
00314 ot::getPrivateMatricesForKSP_Shell = getPrivateMatricesForKSP_Shell_Elas;
00315
00316 VecZeroEntries(DAMGGetx(damg));
00317
00318 DAMGSetKSP(damg, CreateElasticityMat, ComputeElasticityMat, ComputeRHS4);
00319
00320 for(unsigned int i = 0; i < numLoops; i++) {
00321 MatMult(DAMGGetJ(damg), DAMGGetx(damg), DAMGGetRHS(damg));
00322 }
00323
00324 destroyLmatType2(LaplacianType2Stencil);
00325 destroyMmatType2(MassType2Stencil);
00326 destroyGDmatType2(GradDivType2Stencil);
00327
00328 DestroyElasticityContexts(damg);
00329
00330 DAMGDestroy(damg);
00331
00332 ot::DAMG_Finalize();
00333
00334 #ifdef PETSC_USE_LOG
00335 PetscLogStagePop();
00336 #endif
00337
00338 PetscFinalize();
00339 }
00340
00341 double gaussian(double mean, double std_deviation) {
00342 static double t1 = 0, t2=0;
00343 double x1, x2, x3, r;
00344
00345 using namespace std;
00346
00347
00348 if(t1) {
00349 const double tmp = t1;
00350 t1 = 0;
00351 return mean + std_deviation * tmp;
00352 }
00353 if(t2) {
00354 const double tmp = t2;
00355 t2 = 0;
00356 return mean + std_deviation * tmp;
00357 }
00358
00359
00360 do {
00361 x1 = 2 * drand48() - 1;
00362 x2 = 2 * drand48() - 1;
00363 x3 = 2 * drand48() - 1;
00364 r = x1 * x1 + x2 * x2 + x3*x3;
00365 } while(r >= 1);
00366
00367
00368 r = sqrt(-2.0 * log(r) / r);
00369
00370
00371 t1 = (r * x2);
00372 t2 = (r * x3);
00373
00374 return mean + (std_deviation * r * x1);
00375 }
00376
00377