00001
00007 #include "dtypes.h"
00008 #include "petscpc.h"
00009 #include "petscmg.h"
00010 #include "petscmat.h"
00011 #include "private/pcimpl.h"
00012 #include "omg.h"
00013 #include "oda.h"
00014 #include "odaUtils.h"
00015 #include "parUtils.h"
00016 #include <iostream>
00017 #include "dendro.h"
00018
00019 #ifndef iC
00020 #define iC(fun) {CHKERRQ(fun);}
00021 #endif
00022
00023 #ifdef __DEBUG__
00024 #ifndef __DEBUG_MG__
00025 #define __DEBUG_MG__
00026 #endif
00027 #endif
00028
00029 namespace ot {
00030
00031 extern double ***** RmatType1Stencil;
00032 extern double **** RmatType2Stencil;
00033 extern unsigned short**** VtxMap1;
00034 extern unsigned short***** VtxMap2;
00035 extern unsigned short***** VtxMap3;
00036 extern unsigned short****** VtxMap4;
00037
00038 extern void (*getPrivateMatricesForKSP_Shell)(Mat mat,
00039 Mat *AmatPrivate, Mat *PmatPrivate, MatStructure* pFlag);
00040
00041
00042
00043 int DAMGCreateSuppressedDOFs(DAMG* damg) {
00044 int nlevels = damg[0]->nlevels;
00045 unsigned int dof = damg[0]->dof;
00046 for(int i = 0; i < nlevels; i++) {
00047 unsigned int sz = (dof*(damg[i]->da->getLocalBufferSize()));
00048 if(sz) {
00049 damg[i]->suppressedDOF = new unsigned char[sz];
00050 }
00051
00052 if(damg[i]->da_aux) {
00053 unsigned int sz2 = (dof*(damg[i]->da_aux->getLocalBufferSize()));
00054 if(sz2) {
00055 damg[i]->suppressedDOFaux = new unsigned char[sz2];
00056 }
00057 }
00058 }
00059 return 1;
00060 }
00061
00062 PetscErrorCode DAMG_Initialize(MPI_Comm comm) {
00063 PROF_MG_INIT_BEGIN
00064
00065 ot::DA_Initialize(comm);
00066
00067 #ifdef __USE_MG_INIT_TYPE3__
00068 ot::DAMG_InitPrivateType3(comm);
00069 #else
00070 #ifdef __USE_MG_INIT_TYPE2__
00071 ot::DAMG_InitPrivateType2(comm);
00072 #else
00073 ot::DAMG_InitPrivateType1(comm);
00074 #endif
00075 #endif
00076
00077 PROF_MG_INIT_END
00078 }
00079
00080 void DAMG_InitPrivateType3(MPI_Comm comm) {
00081
00082 int rank;
00083 MPI_Comm_rank(comm, &rank);
00084
00085 IreadRmatType1Stencil(RmatType1Stencil, rank);
00086 IreadRmatType2Stencil(RmatType2Stencil, rank);
00087 IreadVtxMaps(VtxMap1, VtxMap2, VtxMap3, VtxMap4, rank);
00088
00089 }
00090
00091 void DAMG_InitPrivateType2(MPI_Comm comm) {
00092
00093 int rank, npes;
00094 MPI_Comm_rank(comm, &rank);
00095 MPI_Comm_size(comm, &npes);
00096
00097 const int THOUSAND = 1000;
00098 int numGroups = (npes/THOUSAND);
00099 if( (numGroups*THOUSAND) < npes) {
00100 numGroups++;
00101 }
00102
00103 MPI_Comm newComm;
00104
00105 bool* isEmptyList = new bool[npes];
00106 for(int i = 0; i < numGroups; i++) {
00107 for(int j = 0; (j < (i*THOUSAND)) && (j < npes); j++) {
00108 isEmptyList[j] = true;
00109 }
00110 for(int j = (i*THOUSAND); (j < ((i+1)*THOUSAND)) && (j < npes); j++) {
00111 isEmptyList[j] = false;
00112 }
00113 for(int j = ((i + 1)*THOUSAND); j < npes; j++) {
00114 isEmptyList[j] = true;
00115 }
00116 MPI_Comm tmpComm;
00117 par::splitComm2way(isEmptyList, &tmpComm, comm);
00118 if(!(isEmptyList[rank])) {
00119 newComm = tmpComm;
00120 }
00121 }
00122 delete [] isEmptyList;
00123
00124 if( (rank % THOUSAND) == 0) {
00125 IreadRmatType1Stencil(RmatType1Stencil, (rank/THOUSAND));
00126 IreadRmatType2Stencil(RmatType2Stencil, (rank/THOUSAND));
00127 IreadVtxMaps(VtxMap1, VtxMap2, VtxMap3, VtxMap4, (rank/THOUSAND));
00128 } else {
00129
00130 typedef double**** double4Ptr;
00131 typedef double*** double3Ptr;
00132 typedef double** double2Ptr;
00133 typedef double* doublePtr;
00134
00135 RmatType1Stencil = new double4Ptr[8];
00136 for(int i = 0; i < 8; i++) {
00137 RmatType1Stencil[i] = new double3Ptr[8];
00138 for(int j = 0; j < 8; j++) {
00139 RmatType1Stencil[i][j] = new double2Ptr[18];
00140 for(int k = 0; k < 18; k++) {
00141 RmatType1Stencil[i][j][k] = new doublePtr[8];
00142 for(int l = 0; l < 8; l++) {
00143 RmatType1Stencil[i][j][k][l] = new double[8];
00144 }
00145 }
00146 }
00147 }
00148
00149 RmatType2Stencil = new double3Ptr[8];
00150 for(int j = 0; j < 8; j++) {
00151 RmatType2Stencil[j] = new double2Ptr[18];
00152 for(int k = 0; k < 18; k++) {
00153 RmatType2Stencil[j][k] = new doublePtr[8];
00154 for(int l = 0; l < 8; l++) {
00155 RmatType2Stencil[j][k][l] = new double[8];
00156 }
00157 }
00158 }
00159
00160 typedef unsigned short***** us5Ptr;
00161 typedef unsigned short**** us4Ptr;
00162 typedef unsigned short*** us3Ptr;
00163 typedef unsigned short** us2Ptr;
00164 typedef unsigned short* usPtr;
00165
00166 VtxMap1 = new us3Ptr[8];
00167 VtxMap2 = new us4Ptr[8];
00168 VtxMap3 = new us4Ptr[7];
00169 VtxMap4 = new us5Ptr[7];
00170 for(int i = 0; i < 8; i++) {
00171 VtxMap1[i] = new us2Ptr[8];
00172 VtxMap2[i] = new us3Ptr[8];
00173 for(int j = 0; j < 8; j++) {
00174 VtxMap1[i][j] = new usPtr[18];
00175 VtxMap2[i][j] = new us2Ptr[8];
00176 for(int k = 0; k < 18; k++) {
00177 VtxMap1[i][j][k] = new unsigned short[8];
00178 }
00179 for(int k = 0; k < 8; k++) {
00180 VtxMap2[i][j][k] = new usPtr[18];
00181 for(int l = 0; l < 18; l++) {
00182 VtxMap2[i][j][k][l] = new unsigned short[8];
00183 }
00184 }
00185 }
00186 }
00187
00188 for(int i = 0; i < 7; i++) {
00189 VtxMap3[i] = new us3Ptr[2];
00190 VtxMap4[i] = new us4Ptr[2];
00191 for(int j = 0; j < 2; j++) {
00192 VtxMap3[i][j] = new us2Ptr[8];
00193 VtxMap4[i][j] = new us3Ptr[8];
00194 for(int k = 0; k < 8; k++) {
00195 VtxMap3[i][j][k] = new usPtr[18];
00196 VtxMap4[i][j][k] = new us2Ptr[8];
00197 for(int l = 0; l < 18; l++) {
00198 VtxMap3[i][j][k][l] =new unsigned short[8];
00199 }
00200 for(int l = 0; l < 8; l++) {
00201 VtxMap4[i][j][k][l] = new usPtr[18];
00202 for(int m = 0; m < 18; m++) {
00203 VtxMap4[i][j][k][l][m] = new unsigned short[8];
00204 }
00205 }
00206 }
00207 }
00208 }
00209
00210 }
00211
00212
00213
00214
00215
00216 double * tmpRmats = new double [82944];
00217
00218 if((rank % THOUSAND) == 0) {
00219 unsigned int ctr = 0;
00220 for(int i = 0; i < 8; i++) {
00221 for(int j = 0; j < 8; j++) {
00222 for(int k = 0; k < 18; k++) {
00223 for(int l = 0; l < 8; l++) {
00224 for(int m = 0; m < 8; m++) {
00225 tmpRmats[ctr] = RmatType1Stencil[i][j][k][l][m];
00226 ctr++;
00227 }
00228 }
00229 }
00230 }
00231 }
00232 for(int j = 0; j < 8; j++) {
00233 for(int k = 0; k < 18; k++) {
00234 for(int l = 0; l < 8; l++) {
00235 for(int m = 0; m < 8; m++) {
00236 tmpRmats[ctr] = RmatType2Stencil[j][k][l][m];
00237 ctr++;
00238 }
00239 }
00240 }
00241 }
00242 }
00243
00244 par::Mpi_Bcast<double>(tmpRmats, 82944, 0, newComm);
00245
00246 if((rank % THOUSAND) != 0) {
00247 unsigned int ctr = 0;
00248 for(int i = 0; i < 8; i++) {
00249 for(int j = 0; j < 8; j++) {
00250 for(int k = 0; k < 18; k++) {
00251 for(int l = 0; l < 8; l++) {
00252 for(int m = 0; m < 8; m++) {
00253 RmatType1Stencil[i][j][k][l][m] = tmpRmats[ctr];
00254 ctr++;
00255 }
00256 }
00257 }
00258 }
00259 }
00260 for(int j = 0; j < 8; j++) {
00261 for(int k = 0; k < 18; k++) {
00262 for(int l = 0; l < 8; l++) {
00263 for(int m = 0; m < 8; m++) {
00264 RmatType2Stencil[j][k][l][m] = tmpRmats[ctr];
00265 ctr++;
00266 }
00267 }
00268 }
00269 }
00270 }
00271
00272 delete [] tmpRmats;
00273
00274
00275
00276
00277
00278
00279 unsigned short * tmpVtxMaps = new unsigned short[228096];
00280
00281 if((rank % THOUSAND) == 0) {
00282 unsigned int ctr = 0;
00283 for(int i = 0; i < 8; i++) {
00284 for(int j = 0; j < 8; j++) {
00285 for(int k = 0; k < 18; k++) {
00286 for(int l = 0; l < 8; l++) {
00287 tmpVtxMaps[ctr] = VtxMap1[i][j][k][l];
00288 ctr++;
00289 }
00290 }
00291 }
00292 }
00293 for(int i = 0; i < 8; i++) {
00294 for(int j = 0; j < 8; j++) {
00295 for(int k = 0; k < 8; k++) {
00296 for(int l = 0; l < 18; l++) {
00297 for(int m = 0; m < 8; m++) {
00298 tmpVtxMaps[ctr] = VtxMap2[i][j][k][l][m];
00299 ctr++;
00300 }
00301 }
00302 }
00303 }
00304 }
00305 for(int i = 0; i < 7; i++) {
00306 for(int j = 0; j < 2; j++) {
00307 for(int k = 0; k < 8; k++) {
00308 for(int l = 0; l < 18; l++) {
00309 for(int m = 0; m < 8; m++) {
00310 tmpVtxMaps[ctr] = VtxMap3[i][j][k][l][m];
00311 ctr++;
00312 }
00313 }
00314 }
00315 }
00316 }
00317 for(int i = 0; i < 7; i++) {
00318 for(int j = 0; j < 2; j++) {
00319 for(int k = 0; k < 8; k++) {
00320 for(int l = 0; l < 8; l++) {
00321 for(int m = 0; m < 18; m++) {
00322 for(int n = 0; n < 8; n++) {
00323 tmpVtxMaps[ctr] = VtxMap4[i][j][k][l][m][n];
00324 ctr++;
00325 }
00326 }
00327 }
00328 }
00329 }
00330 }
00331 }
00332
00333 par::Mpi_Bcast<unsigned short>(tmpVtxMaps, 228096, 0, newComm);
00334
00335 if((rank % THOUSAND) != 0) {
00336 unsigned int ctr = 0;
00337 for(int i = 0; i < 8; i++) {
00338 for(int j = 0; j < 8; j++) {
00339 for(int k = 0; k < 18; k++) {
00340 for(int l = 0; l < 8; l++) {
00341 VtxMap1[i][j][k][l] = tmpVtxMaps[ctr];
00342 ctr++;
00343 }
00344 }
00345 }
00346 }
00347 for(int i = 0; i < 8; i++) {
00348 for(int j = 0; j < 8; j++) {
00349 for(int k = 0; k < 8; k++) {
00350 for(int l = 0; l < 18; l++) {
00351 for(int m = 0; m < 8; m++) {
00352 VtxMap2[i][j][k][l][m] = tmpVtxMaps[ctr];
00353 ctr++;
00354 }
00355 }
00356 }
00357 }
00358 }
00359 for(int i = 0; i < 7; i++) {
00360 for(int j = 0; j < 2; j++) {
00361 for(int k = 0; k < 8; k++) {
00362 for(int l = 0; l < 18; l++) {
00363 for(int m = 0; m < 8; m++) {
00364 VtxMap3[i][j][k][l][m] = tmpVtxMaps[ctr];
00365 ctr++;
00366 }
00367 }
00368 }
00369 }
00370 }
00371 for(int i = 0; i < 7; i++) {
00372 for(int j = 0; j < 2; j++) {
00373 for(int k = 0; k < 8; k++) {
00374 for(int l = 0; l < 8; l++) {
00375 for(int m = 0; m < 18; m++) {
00376 for(int n = 0; n < 8; n++) {
00377 VtxMap4[i][j][k][l][m][n] = tmpVtxMaps[ctr];
00378 ctr++;
00379 }
00380 }
00381 }
00382 }
00383 }
00384 }
00385 }
00386
00387 delete [] tmpVtxMaps;
00388
00389 }
00390
00391 void DAMG_InitPrivateType1(MPI_Comm comm) {
00392
00393 int rank;
00394 MPI_Comm_rank(comm, &rank);
00395
00396 if(!rank) {
00397
00398 readRmatType1Stencil(RmatType1Stencil);
00399 readRmatType2Stencil(RmatType2Stencil);
00400 readVtxMaps(VtxMap1, VtxMap2, VtxMap3, VtxMap4);
00401 } else {
00402
00403 typedef double**** double4Ptr;
00404 typedef double*** double3Ptr;
00405 typedef double** double2Ptr;
00406 typedef double* doublePtr;
00407
00408 RmatType1Stencil = new double4Ptr[8];
00409 for(int i = 0; i < 8; i++) {
00410 RmatType1Stencil[i] = new double3Ptr[8];
00411 for(int j = 0; j < 8; j++) {
00412 RmatType1Stencil[i][j] = new double2Ptr[18];
00413 for(int k = 0; k < 18; k++) {
00414 RmatType1Stencil[i][j][k] = new doublePtr[8];
00415 for(int l = 0; l < 8; l++) {
00416 RmatType1Stencil[i][j][k][l] = new double[8];
00417 }
00418 }
00419 }
00420 }
00421
00422 RmatType2Stencil = new double3Ptr[8];
00423 for(int j = 0; j < 8; j++) {
00424 RmatType2Stencil[j] = new double2Ptr[18];
00425 for(int k = 0; k < 18; k++) {
00426 RmatType2Stencil[j][k] = new doublePtr[8];
00427 for(int l = 0; l < 8; l++) {
00428 RmatType2Stencil[j][k][l] = new double[8];
00429 }
00430 }
00431 }
00432
00433 typedef unsigned short***** us5Ptr;
00434 typedef unsigned short**** us4Ptr;
00435 typedef unsigned short*** us3Ptr;
00436 typedef unsigned short** us2Ptr;
00437 typedef unsigned short* usPtr;
00438
00439 VtxMap1 = new us3Ptr[8];
00440 VtxMap2 = new us4Ptr[8];
00441 VtxMap3 = new us4Ptr[7];
00442 VtxMap4 = new us5Ptr[7];
00443 for(int i = 0; i < 8; i++) {
00444 VtxMap1[i] = new us2Ptr[8];
00445 VtxMap2[i] = new us3Ptr[8];
00446 for(int j = 0; j < 8; j++) {
00447 VtxMap1[i][j] = new usPtr[18];
00448 VtxMap2[i][j] = new us2Ptr[8];
00449 for(int k = 0; k < 18; k++) {
00450 VtxMap1[i][j][k] = new unsigned short[8];
00451 }
00452 for(int k = 0; k < 8; k++) {
00453 VtxMap2[i][j][k] = new usPtr[18];
00454 for(int l = 0; l < 18; l++) {
00455 VtxMap2[i][j][k][l] = new unsigned short[8];
00456 }
00457 }
00458 }
00459 }
00460
00461 for(int i = 0; i < 7; i++) {
00462 VtxMap3[i] = new us3Ptr[2];
00463 VtxMap4[i] = new us4Ptr[2];
00464 for(int j = 0; j < 2; j++) {
00465 VtxMap3[i][j] = new us2Ptr[8];
00466 VtxMap4[i][j] = new us3Ptr[8];
00467 for(int k = 0; k < 8; k++) {
00468 VtxMap3[i][j][k] = new usPtr[18];
00469 VtxMap4[i][j][k] = new us2Ptr[8];
00470 for(int l = 0; l < 18; l++) {
00471 VtxMap3[i][j][k][l] =new unsigned short[8];
00472 }
00473 for(int l = 0; l < 8; l++) {
00474 VtxMap4[i][j][k][l] = new usPtr[18];
00475 for(int m = 0; m < 18; m++) {
00476 VtxMap4[i][j][k][l][m] = new unsigned short[8];
00477 }
00478 }
00479 }
00480 }
00481 }
00482
00483 }
00484
00485
00486
00487
00488
00489 double * tmpRmats = new double [82944];
00490
00491 if(!rank) {
00492 unsigned int ctr = 0;
00493 for(int i = 0; i < 8; i++) {
00494 for(int j = 0; j < 8; j++) {
00495 for(int k = 0; k < 18; k++) {
00496 for(int l = 0; l < 8; l++) {
00497 for(int m = 0; m < 8; m++) {
00498 tmpRmats[ctr] = RmatType1Stencil[i][j][k][l][m];
00499 ctr++;
00500 }
00501 }
00502 }
00503 }
00504 }
00505 for(int j = 0; j < 8; j++) {
00506 for(int k = 0; k < 18; k++) {
00507 for(int l = 0; l < 8; l++) {
00508 for(int m = 0; m < 8; m++) {
00509 tmpRmats[ctr] = RmatType2Stencil[j][k][l][m];
00510 ctr++;
00511 }
00512 }
00513 }
00514 }
00515 }
00516
00517 par::Mpi_Bcast<double>(tmpRmats, 82944, 0, comm);
00518
00519 if(rank) {
00520 unsigned int ctr = 0;
00521 for(int i = 0; i < 8; i++) {
00522 for(int j = 0; j < 8; j++) {
00523 for(int k = 0; k < 18; k++) {
00524 for(int l = 0; l < 8; l++) {
00525 for(int m = 0; m < 8; m++) {
00526 RmatType1Stencil[i][j][k][l][m] = tmpRmats[ctr];
00527 ctr++;
00528 }
00529 }
00530 }
00531 }
00532 }
00533 for(int j = 0; j < 8; j++) {
00534 for(int k = 0; k < 18; k++) {
00535 for(int l = 0; l < 8; l++) {
00536 for(int m = 0; m < 8; m++) {
00537 RmatType2Stencil[j][k][l][m] = tmpRmats[ctr];
00538 ctr++;
00539 }
00540 }
00541 }
00542 }
00543 }
00544
00545 delete [] tmpRmats;
00546
00547
00548
00549
00550
00551
00552 unsigned short * tmpVtxMaps = new unsigned short[228096];
00553
00554 if(!rank) {
00555 unsigned int ctr = 0;
00556 for(int i = 0; i < 8; i++) {
00557 for(int j = 0; j < 8; j++) {
00558 for(int k = 0; k < 18; k++) {
00559 for(int l = 0; l < 8; l++) {
00560 tmpVtxMaps[ctr] = VtxMap1[i][j][k][l];
00561 ctr++;
00562 }
00563 }
00564 }
00565 }
00566 for(int i = 0; i < 8; i++) {
00567 for(int j = 0; j < 8; j++) {
00568 for(int k = 0; k < 8; k++) {
00569 for(int l = 0; l < 18; l++) {
00570 for(int m = 0; m < 8; m++) {
00571 tmpVtxMaps[ctr] = VtxMap2[i][j][k][l][m];
00572 ctr++;
00573 }
00574 }
00575 }
00576 }
00577 }
00578 for(int i = 0; i < 7; i++) {
00579 for(int j = 0; j < 2; j++) {
00580 for(int k = 0; k < 8; k++) {
00581 for(int l = 0; l < 18; l++) {
00582 for(int m = 0; m < 8; m++) {
00583 tmpVtxMaps[ctr] = VtxMap3[i][j][k][l][m];
00584 ctr++;
00585 }
00586 }
00587 }
00588 }
00589 }
00590 for(int i = 0; i < 7; i++) {
00591 for(int j = 0; j < 2; j++) {
00592 for(int k = 0; k < 8; k++) {
00593 for(int l = 0; l < 8; l++) {
00594 for(int m = 0; m < 18; m++) {
00595 for(int n = 0; n < 8; n++) {
00596 tmpVtxMaps[ctr] = VtxMap4[i][j][k][l][m][n];
00597 ctr++;
00598 }
00599 }
00600 }
00601 }
00602 }
00603 }
00604 }
00605
00606 par::Mpi_Bcast<unsigned short>(tmpVtxMaps, 228096, 0, comm);
00607
00608 if(rank) {
00609 unsigned int ctr = 0;
00610 for(int i = 0; i < 8; i++) {
00611 for(int j = 0; j < 8; j++) {
00612 for(int k = 0; k < 18; k++) {
00613 for(int l = 0; l < 8; l++) {
00614 VtxMap1[i][j][k][l] = tmpVtxMaps[ctr];
00615 ctr++;
00616 }
00617 }
00618 }
00619 }
00620 for(int i = 0; i < 8; i++) {
00621 for(int j = 0; j < 8; j++) {
00622 for(int k = 0; k < 8; k++) {
00623 for(int l = 0; l < 18; l++) {
00624 for(int m = 0; m < 8; m++) {
00625 VtxMap2[i][j][k][l][m] = tmpVtxMaps[ctr];
00626 ctr++;
00627 }
00628 }
00629 }
00630 }
00631 }
00632 for(int i = 0; i < 7; i++) {
00633 for(int j = 0; j < 2; j++) {
00634 for(int k = 0; k < 8; k++) {
00635 for(int l = 0; l < 18; l++) {
00636 for(int m = 0; m < 8; m++) {
00637 VtxMap3[i][j][k][l][m] = tmpVtxMaps[ctr];
00638 ctr++;
00639 }
00640 }
00641 }
00642 }
00643 }
00644 for(int i = 0; i < 7; i++) {
00645 for(int j = 0; j < 2; j++) {
00646 for(int k = 0; k < 8; k++) {
00647 for(int l = 0; l < 8; l++) {
00648 for(int m = 0; m < 18; m++) {
00649 for(int n = 0; n < 8; n++) {
00650 VtxMap4[i][j][k][l][m][n] = tmpVtxMaps[ctr];
00651 ctr++;
00652 }
00653 }
00654 }
00655 }
00656 }
00657 }
00658 }
00659
00660 delete [] tmpVtxMaps;
00661
00662 }
00663
00664 PetscErrorCode DAMG_Finalize() {
00665 PROF_MG_FINAL_BEGIN
00666
00667 ot::DA_Finalize();
00668
00669 destroyRmatType1Stencil(RmatType1Stencil);
00670 destroyRmatType2Stencil(RmatType2Stencil);
00671 destroyVtxMaps(VtxMap1, VtxMap2, VtxMap3, VtxMap4);
00672
00673 PROF_MG_FINAL_END
00674 }
00675
00676 PetscErrorCode DAMGDestroy(DAMG* damg)
00677 {
00678 PetscErrorCode ierr;
00679 int i,nlevels = damg[0]->nlevels;
00680
00681 PetscFunctionBegin;
00682
00683 int rank;
00684 MPI_Comm_rank(damg[0]->comm,&rank);
00685
00686 if (!damg) SETERRQ(PETSC_ERR_ARG_NULL,"Passing null as DAMG");
00687
00688 for (i = 1; i < nlevels; i++) {
00689 if (damg[i]->R) {
00690 ierr = MatDestroy(damg[i]->R);
00691 CHKERRQ(ierr);
00692 }
00693 }
00694
00695 for (i = 0; i < nlevels; i++) {
00696 if (damg[i]->x) {
00697 ierr = VecDestroy(damg[i]->x);
00698 CHKERRQ(ierr);
00699 }
00700
00701 if (damg[i]->b) {
00702 ierr = VecDestroy(damg[i]->b);
00703 CHKERRQ(ierr);
00704 }
00705
00706 if (damg[i]->r) {
00707 ierr = VecDestroy(damg[i]->r);
00708 CHKERRQ(ierr);
00709 }
00710
00711 if(damg[i]->suppressedDOF) {
00712 delete [] (damg[i]->suppressedDOF);
00713 damg[i]->suppressedDOF = NULL;
00714 }
00715
00716 if(damg[i]->suppressedDOFaux) {
00717 delete [] (damg[i]->suppressedDOFaux);
00718 damg[i]->suppressedDOFaux = NULL;
00719 }
00720
00721 if (damg[i]->B && (damg[i]->B != damg[i]->J)) {
00722 ierr = MatDestroy(damg[i]->B);CHKERRQ(ierr);
00723 }
00724
00725 if (damg[i]->J) {
00726 ierr = MatDestroy(damg[i]->J);CHKERRQ(ierr);
00727 }
00728
00729 if (damg[i]->ksp) {
00730 ierr = KSPDestroy(damg[i]->ksp);CHKERRQ(ierr);
00731 }
00732
00733 if (damg[i]->da) {
00734 delete damg[i]->da;
00735 damg[i]->da = NULL;
00736 }
00737
00738 if (damg[i]->da_aux) {
00739 delete damg[i]->da_aux;
00740 damg[i]->da_aux = NULL;
00741 }
00742
00743 delete damg[i];
00744 damg[i] = NULL;
00745 }
00746
00747 delete [] damg;
00748 damg = NULL;
00749 PetscFunctionReturn(0);
00750 }
00751
00752
00753
00754
00755 PetscErrorCode DAMGCreateJMatrix(DAMG damg, PetscErrorCode (*crjac)(DAMG,Mat*)) {
00756 PetscErrorCode ierr;
00757 PetscFunctionBegin;
00758 if(crjac){
00759 ierr = (*crjac)(damg,&(damg->J)); CHKERRQ(ierr);
00760 }else{
00761 SETERRQ(PETSC_ERR_ARG_NULL,"ot::DA can not create a Matrix for you! Pass a function handle.");
00762 }
00763 PetscFunctionReturn(0);
00764 }
00765
00766
00767
00768
00769
00770 PetscErrorCode DAMGSetKSP(DAMG *damg, PetscErrorCode (*crjac)(DAMG,Mat*),
00771 PetscErrorCode (*compJac)(DAMG,Mat,Mat), PetscErrorCode (*rhs)(DAMG,Vec) )
00772 {
00773 PetscErrorCode ierr;
00774 int nlevels = damg[0]->nlevels;
00775
00776
00777 PROF_MG_SET_KSP_BEGIN
00778
00779 int rank;
00780 MPI_Comm_rank(damg[0]->comm, &rank);
00781
00782 if (!damg) SETERRQ(PETSC_ERR_ARG_NULL,"Passing null as DAMG");
00783 if (!crjac) SETERRQ(PETSC_ERR_ARG_NULL,"ot::DA can not create a Matrix for you! Pass a function handle.");
00784
00785 if (!damg[0]->ksp) {
00786
00787 for (int i = 0; i < nlevels; i++) {
00788 if (!damg[i]->B) {
00789 ierr = (*crjac)(damg[i],&(damg[i]->B));CHKERRQ(ierr);
00790 }
00791 if (!damg[i]->J) {
00792 #ifdef __DEBUG_MG__
00793 if(!rank) {
00794 std::cout<<"J and B are the same for lev: "<<i<<std::endl;
00795 fflush(stdout);
00796 }
00797 #endif
00798 damg[i]->J = damg[i]->B;
00799 }else {
00800 #ifdef __DEBUG_MG__
00801 if(!rank) {
00802 std::cout<<"J and B are different for lev: "<<i<<std::endl;
00803 fflush(stdout);
00804 }
00805 #endif
00806 assert(damg[i]->J != damg[i]->B);
00807 }
00808
00809 ierr = KSPCreate(damg[i]->comm,&(damg[i]->ksp));CHKERRQ(ierr);
00810
00811
00812 if(i < (nlevels-1)) {
00813 char prefix[256];
00814 sprintf(prefix,"damg_levels_%d_",(int)i);
00815 KSPSetOptionsPrefix(damg[i]->ksp,prefix);
00816 }
00817
00818 ierr = DAMGSetUpLevel(damg,damg[i]->ksp,i+1); CHKERRQ(ierr);
00819
00820 ierr = KSPSetFromOptions(damg[i]->ksp);CHKERRQ(ierr);
00821
00822 if( (damg[0]->da->getNpesActive()) < (damg[0]->da->getNpesAll()) ) {
00823
00824
00825 PC pc;
00826 const char* clearOptionPrefix;
00827 char optionName[256];
00828 if(i == 0) {
00829 KSPGetOptionsPrefix(damg[0]->ksp, &clearOptionPrefix);
00830
00831 sprintf(optionName, "-%sksp_type",clearOptionPrefix);
00832 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00833 ierr = KSPSetType(damg[0]->ksp, KSPRICHARDSON); CHKERRQ(ierr);
00834
00835 sprintf(optionName, "-%sksp_knoll",clearOptionPrefix);
00836 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00837 ierr = KSPSetInitialGuessKnoll(damg[0]->ksp, PETSC_FALSE);
00838 CHKERRQ(ierr);
00839
00840 sprintf(optionName, "-%sksp_richardson_scale",clearOptionPrefix);
00841 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00842 ierr = KSPRichardsonSetScale(damg[0]->ksp, 1.0);
00843 CHKERRQ(ierr);
00844
00845 sprintf(optionName, "-%sksp_right_pc",clearOptionPrefix);
00846 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00847 sprintf(optionName, "-%sksp_symmetric_pc",clearOptionPrefix);
00848 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00849 ierr = KSPSetPreconditionerSide(damg[0]->ksp, PC_LEFT);
00850 CHKERRQ(ierr);
00851
00852 sprintf(optionName, "-%sksp_norm_type",clearOptionPrefix);
00853 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00854 ierr = KSPSetNormType(damg[0]->ksp, KSP_NORM_NO);
00855 CHKERRQ(ierr);
00856
00857 sprintf(optionName, "-%sksp_rtol",clearOptionPrefix);
00858 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00859 sprintf(optionName, "-%sksp_atol",clearOptionPrefix);
00860 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00861 sprintf(optionName, "-%sksp_divtol",clearOptionPrefix);
00862 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00863 sprintf(optionName, "-%sksp_max_it",clearOptionPrefix);
00864 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00865 ierr = KSPSetTolerances(damg[0]->ksp, PETSC_DEFAULT, PETSC_DEFAULT,
00866 PETSC_DEFAULT, 1);
00867 CHKERRQ(ierr);
00868
00869 ierr = KSPSetConvergenceTest(damg[0]->ksp, KSPSkipConverged,
00870 PETSC_NULL);
00871 CHKERRQ(ierr);
00872
00873 ierr = KSPSetInitialGuessNonzero(damg[0]->ksp, PETSC_FALSE);
00874 CHKERRQ(ierr);
00875
00876 ierr = KSPGetPC(damg[0]->ksp, &pc); CHKERRQ(ierr);
00877
00878 PCGetOptionsPrefix(pc, &clearOptionPrefix);
00879 sprintf(optionName, "-%spc_type",clearOptionPrefix);
00880 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00881 ierr = PCSetType(pc, PCSHELL); CHKERRQ(ierr);
00882 ierr = PCShellSetName(pc, "PC_KSP_Shell"); CHKERRQ(ierr);
00883
00884 PC_KSP_Shell* pcShellContext = new PC_KSP_Shell;
00885 pcShellContext->sol_private = NULL;
00886 pcShellContext->rhs_private = NULL;
00887 pcShellContext->ksp_private = NULL;
00888 pcShellContext->pc = pc;
00889 pcShellContext->iAmActive = damg[0]->da->iAmActive();
00890 pcShellContext->commActive = damg[0]->da->getCommActive();
00891 ierr = PCShellSetContext(pc, pcShellContext); CHKERRQ(ierr);
00892 ierr = PCShellSetSetUp(pc, PC_KSP_Shell_SetUp); CHKERRQ(ierr);
00893 ierr = PCShellSetApply(pc, PC_KSP_Shell_Apply); CHKERRQ(ierr);
00894 ierr = PCShellSetDestroy(pc, PC_KSP_Shell_Destroy); CHKERRQ(ierr);
00895 } else {
00896 ierr = KSPGetPC(damg[i]->ksp, &pc); CHKERRQ(ierr);
00897 }
00898
00899 PetscTruth ismg;
00900 PetscTypeCompare((PetscObject)pc, PCMG, &ismg);
00901
00902 if(ismg) {
00903 PetscTruth useRTLMG;
00904 ierr = PetscOptionsHasName(PETSC_NULL,"-damg_useRTLMG",&useRTLMG);
00905 CHKERRQ(ierr);
00906 KSP lksp;
00907 if(useRTLMG) {
00908 while(ismg) {
00909 ierr = PCMGGetSmoother(pc,0,&lksp);CHKERRQ(ierr);
00910 ierr = KSPGetPC(lksp, &pc); CHKERRQ(ierr);
00911 PetscTypeCompare((PetscObject)pc, PCMG, &ismg);
00912 }
00913 } else {
00914 ierr = PCMGGetSmoother(pc,0,&lksp);CHKERRQ(ierr);
00915 }
00916
00917 KSPGetOptionsPrefix(lksp, &clearOptionPrefix);
00918
00919 sprintf(optionName, "-%sksp_type",clearOptionPrefix);
00920 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00921 ierr = KSPSetType(lksp, KSPRICHARDSON); CHKERRQ(ierr);
00922
00923 sprintf(optionName, "-%sksp_knoll",clearOptionPrefix);
00924 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00925 ierr = KSPSetInitialGuessKnoll(lksp, PETSC_FALSE);
00926 CHKERRQ(ierr);
00927
00928 sprintf(optionName, "-%sksp_richardson_scale",clearOptionPrefix);
00929 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00930 ierr = KSPRichardsonSetScale(lksp, 1.0);
00931 CHKERRQ(ierr);
00932
00933 sprintf(optionName, "-%sksp_right_pc",clearOptionPrefix);
00934 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00935 sprintf(optionName, "-%sksp_symmetric_pc",clearOptionPrefix);
00936 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00937 ierr = KSPSetPreconditionerSide(lksp, PC_LEFT);
00938 CHKERRQ(ierr);
00939
00940 sprintf(optionName, "-%sksp_norm_type",clearOptionPrefix);
00941 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00942 ierr = KSPSetNormType(lksp, KSP_NORM_NO);
00943 CHKERRQ(ierr);
00944
00945 sprintf(optionName, "-%sksp_rtol",clearOptionPrefix);
00946 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00947 sprintf(optionName, "-%sksp_atol",clearOptionPrefix);
00948 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00949 sprintf(optionName, "-%sksp_divtol",clearOptionPrefix);
00950 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00951 sprintf(optionName, "-%sksp_max_it",clearOptionPrefix);
00952 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00953 ierr = KSPSetTolerances(lksp, PETSC_DEFAULT, PETSC_DEFAULT,
00954 PETSC_DEFAULT, 1);
00955 CHKERRQ(ierr);
00956
00957 ierr = KSPSetConvergenceTest(lksp, KSPSkipConverged, PETSC_NULL);
00958 CHKERRQ(ierr);
00959
00960 ierr = KSPSetInitialGuessNonzero(lksp, PETSC_FALSE);
00961 CHKERRQ(ierr);
00962
00963 ierr = KSPGetPC(lksp, &pc); CHKERRQ(ierr);
00964
00965 PCGetOptionsPrefix(pc, &clearOptionPrefix);
00966 sprintf(optionName, "-%spc_type",clearOptionPrefix);
00967 ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00968 ierr = PCSetType(pc, PCSHELL); CHKERRQ(ierr);
00969 ierr = PCShellSetName(pc, "PC_KSP_Shell"); CHKERRQ(ierr);
00970
00971 PC_KSP_Shell* pcShellContext = new PC_KSP_Shell;
00972 pcShellContext->sol_private = NULL;
00973 pcShellContext->rhs_private = NULL;
00974 pcShellContext->ksp_private = NULL;
00975 pcShellContext->pc = pc;
00976 pcShellContext->iAmActive = damg[0]->da->iAmActive();
00977 pcShellContext->commActive = damg[0]->da->getCommActive();
00978 ierr = PCShellSetContext(pc, pcShellContext); CHKERRQ(ierr);
00979 ierr = PCShellSetSetUp(pc, PC_KSP_Shell_SetUp); CHKERRQ(ierr);
00980 ierr = PCShellSetApply(pc, PC_KSP_Shell_Apply); CHKERRQ(ierr);
00981 ierr = PCShellSetDestroy(pc, PC_KSP_Shell_Destroy); CHKERRQ(ierr);
00982 }
00983 }
00984
00985 damg[i]->solve = DAMGSolveKSP;
00986 damg[i]->rhs = rhs;
00987 }
00988 }
00989
00990 #ifdef __DEBUG_MG__
00991 MPI_Barrier(damg[0]->comm);
00992 if(!rank) {
00993 std::cout<<"Finished setting up ksp for all levels."<<std::endl;
00994 fflush(stdout);
00995 }
00996 MPI_Barrier(damg[0]->comm);
00997 #endif
00998
00999
01000 for (int i = 0; i < nlevels; i++) {
01001 if(compJac) {
01002 ierr = (*compJac)(damg[i],damg[i]->J,damg[i]->B); CHKERRQ(ierr);
01003 }
01004 }
01005
01006
01007 for(int level = 0; level < nlevels; level++) {
01008 KSPSetOperators(damg[level]->ksp, damg[level]->J,
01009 damg[level]->B, SAME_NONZERO_PATTERN);
01010
01011 PC pc;
01012 KSPGetPC(damg[level]->ksp, &pc);
01013
01014 PetscTruth ismg;
01015 PetscTypeCompare((PetscObject)pc, PCMG, &ismg);
01016
01017 if(ismg) {
01018 for(int i = 0; i <= level; i++) {
01019 KSP lksp;
01020 PCMGGetSmoother(pc, i, &lksp);
01021 KSPSetOperators(lksp, damg[i]->J, damg[i]->B, SAME_NONZERO_PATTERN);
01022 }
01023 }
01024 }
01025
01026 #ifdef __DEBUG_MG__
01027 MPI_Barrier(damg[0]->comm);
01028 if(!rank) {
01029 std::cout<<"Finished building matrices for all levels."<<std::endl;
01030 fflush(stdout);
01031 }
01032 MPI_Barrier(damg[0]->comm);
01033 #endif
01034
01035 PROF_MG_SET_KSP_END
01036 }
01037
01038 PetscErrorCode DAMGSetNullSpace(DAMG* damg, PetscTruth has_cnst, PetscInt n,
01039 PetscErrorCode (*func)(DAMG,Vec[]))
01040 {
01041 PetscErrorCode ierr;
01042 int i, j, nlevels = damg[0]->nlevels;
01043 Vec *nulls = 0;
01044 MatNullSpace nullsp;
01045 KSP iksp;
01046 PC pc,ipc;
01047 PetscTruth ismg,isred;
01048
01049 PetscFunctionBegin;
01050 if (!damg) SETERRQ(PETSC_ERR_ARG_NULL,"Passing null as DAMG");
01051 if (!damg[0]->ksp) SETERRQ(PETSC_ERR_ORDER,"Must call AFTER DAMGSetKSP() or DAMGSetSNES()");
01052 if ((n && !func) || (!n && func)) SETERRQ(PETSC_ERR_ARG_INCOMP,"Both n and func() must be set together");
01053 if (n < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Cannot have negative number of vectors in null space n = %D",n)
01054
01055 for (i = 0; i < nlevels; i++) {
01056 if (n) {
01057 ierr = VecDuplicateVecs(damg[i]->b,n,&nulls);CHKERRQ(ierr);
01058 ierr = (*func)(damg[i],nulls);CHKERRQ(ierr);
01059 }
01060 ierr = MatNullSpaceCreate(damg[i]->comm,has_cnst,n,nulls,&nullsp);CHKERRQ(ierr);
01061 ierr = KSPSetNullSpace(damg[i]->ksp,nullsp);CHKERRQ(ierr);
01062 for (j = i; j < nlevels; j++) {
01063 ierr = KSPGetPC(damg[j]->ksp,&pc);CHKERRQ(ierr);
01064 ierr = PetscTypeCompare((PetscObject)pc,PCMG,&ismg);CHKERRQ(ierr);
01065 if (ismg) {
01066 ierr = PCMGGetSmoother(pc,i,&iksp);CHKERRQ(ierr);
01067 ierr = KSPSetNullSpace(iksp, nullsp);CHKERRQ(ierr);
01068 }
01069 }
01070 ierr = MatNullSpaceDestroy(nullsp);CHKERRQ(ierr);
01071 if (n) {
01072 ierr = PetscFree(nulls);CHKERRQ(ierr);
01073 }
01074 }
01075
01076
01077 for (i = 0; i < nlevels; i++) {
01078 ierr = KSPGetPC(damg[i]->ksp,&pc);CHKERRQ(ierr);
01079 ierr = PetscTypeCompare((PetscObject)pc,PCMG,&ismg);CHKERRQ(ierr);
01080 if (ismg) {
01081 ierr = PCMGGetSmoother(pc,0,&iksp);CHKERRQ(ierr);
01082 ierr = KSPGetPC(iksp,&ipc);CHKERRQ(ierr);
01083 ierr = PetscTypeCompare((PetscObject)ipc,PCREDUNDANT,&isred);CHKERRQ(ierr);
01084 if (isred) {
01085 ierr = PCRedundantGetPC(ipc,&ipc);CHKERRQ(ierr);
01086 }
01087 ierr = PCFactorSetShiftPd(ipc,PETSC_TRUE);CHKERRQ(ierr);
01088 }
01089 }
01090
01091 PetscFunctionReturn(0);
01092 }
01093
01094 PetscErrorCode DAMGSetInitialGuess(DAMG* damg, PetscErrorCode (*guess)(DAMG, Vec)) {
01095 int i, nlevels = damg[0]->nlevels;
01096 for(i = 0; i < nlevels; i++) {
01097 damg[i]->initialguess = guess;
01098 }
01099 return(0);
01100 }
01101
01102 PetscErrorCode DAMGInitialGuessCurrent(DAMG damg, Vec vec) {
01103 return (0);
01104 }
01105
01106 PetscErrorCode DAMGSolve(DAMG* damg)
01107 {
01108 PetscErrorCode ierr;
01109 int i,nlevels = damg[0]->nlevels;
01110 PetscTruth gridseq,vecmonitor;
01111
01112 PetscFunctionBegin;
01113 ierr = PetscOptionsHasName(0,"-damg_grid_sequence",&gridseq);CHKERRQ(ierr);
01114 ierr = PetscOptionsHasName(0,"-damg_vecmonitor",&vecmonitor);CHKERRQ(ierr);
01115 if (gridseq) {
01116 if(damg[0]->initialguess) {
01117 (*(damg[0]->initialguess))(damg[0],damg[0]->x);
01118 KSPSetInitialGuessNonzero(damg[0]->ksp, PETSC_TRUE);
01119 }
01120 for (i=0; i<nlevels-1; i++) {
01121 ierr = (*damg[i]->solve)(damg,i);CHKERRQ(ierr);
01122 if (vecmonitor) {
01123 ierr = VecView(damg[i]->x,PETSC_VIEWER_DRAW_(damg[i]->comm));CHKERRQ(ierr);
01124 }
01125 ierr = MatInterpolate(damg[i+1]->R,damg[i]->x,damg[i+1]->x);CHKERRQ(ierr);
01126 KSPSetInitialGuessNonzero(damg[i+1]->ksp, PETSC_TRUE);
01127 }
01128 }else {
01129 if(damg[nlevels-1]->initialguess) {
01130 (*(damg[nlevels-1]->initialguess))(damg[nlevels-1], damg[nlevels-1]->x);
01131 KSPSetInitialGuessNonzero(damg[nlevels-1]->ksp, PETSC_TRUE);
01132 }
01133 }
01134 ierr = (*DAMGGetFine(damg)->solve)(damg, nlevels-1);CHKERRQ(ierr);
01135 if (vecmonitor) {
01136 ierr = VecView(damg[nlevels-1]->x,PETSC_VIEWER_DRAW_(damg[nlevels-1]->comm));CHKERRQ(ierr);
01137 }
01138 PetscFunctionReturn(0);
01139 }
01140
01141 PetscErrorCode DAMGSolveKSP(DAMG *damg, int level)
01142 {
01143 PetscErrorCode ierr;
01144
01145 PetscFunctionBegin;
01146
01147 if (damg[level]->rhs) {
01148 ierr = (*damg[level]->rhs)(damg[level],damg[level]->b);CHKERRQ(ierr);
01149 }
01150
01151 ierr = KSPSolve(damg[level]->ksp, damg[level]->b, damg[level]->x);CHKERRQ(ierr);
01152
01153 PetscFunctionReturn(0);
01154 }
01155
01156 PetscErrorCode DAMGSetUpLevel(DAMG* damg, KSP ksp, int nlevels)
01157 {
01158 PetscErrorCode ierr;
01159 PC pc;
01160 PetscTruth ismg,monitor,ismf,isshell,ismffd;
01161 PetscTruth useRTLMG;
01162 KSP lksp;
01163 MPI_Comm *comms,comm;
01164 PetscViewer ascii;
01165
01166 PetscFunctionBegin;
01167 if (!damg) SETERRQ(PETSC_ERR_ARG_NULL,"Passing null as DAMG");
01168
01169 ierr = PetscOptionsHasName(PETSC_NULL,"-damg_ksp_monitor",&monitor);CHKERRQ(ierr);
01170 ierr = PetscOptionsHasName(PETSC_NULL,"-damg_useRTLMG",&useRTLMG);CHKERRQ(ierr);
01171
01172 if (monitor) {
01173 ierr = PetscObjectGetComm((PetscObject)ksp,&comm);CHKERRQ(ierr);
01174 ierr = PetscViewerASCIIOpen(comm,"stdout",&ascii);CHKERRQ(ierr);
01175 ierr = PetscViewerASCIISetTab(ascii,1+(damg[0]->nlevels)-nlevels);CHKERRQ(ierr);
01176 ierr = KSPMonitorSet(ksp, KSPMonitorDefault, ascii,
01177 (PetscErrorCode(*)(void*))PetscViewerDestroy);CHKERRQ(ierr);
01178 }
01179
01180
01181 ierr = KSPSetType(ksp, KSPFGMRES); CHKERRQ(ierr);
01182 ierr = KSPGetPC(ksp,&pc); CHKERRQ(ierr);
01183 ierr = PCSetType(pc,PCMG); CHKERRQ(ierr);
01184
01185 const char *mainKSPprefix;
01186 KSPGetOptionsPrefix(ksp, &mainKSPprefix);
01187
01188 if(useRTLMG) {
01189 char prefixName[256];
01190 sprintf(prefixName, "rtlmg_finest_");
01191 PCSetOptionsPrefix(pc, mainKSPprefix);
01192 PCAppendOptionsPrefix(pc, prefixName);
01193
01194 for(int i = (nlevels-1); i >= 1; i--) {
01195 ierr = PetscMalloc(2*sizeof(MPI_Comm),&comms);CHKERRQ(ierr);
01196 for (int j = 0; j < 2; j++) {
01197 comms[j] = damg[j]->comm;
01198 }
01199 PCMGSetLevels(pc,2,comms);
01200 PetscFree(comms);
01201 PCMGSetType(pc,PC_MG_FULL);
01202 PetscTypeCompare((PetscObject)pc,PCMG,&ismg);
01203 if (ismg) {
01204
01205 PCMGGetSmoother(pc,1,&lksp);
01206 KSPSetOperators(lksp,damg[i]->B,damg[i]->B,DIFFERENT_NONZERO_PATTERN);
01207 PCMGSetR(pc,1,damg[i]->r);
01208 PCMGSetResidual(pc,1,PCMGDefaultResidual,damg[i]->B);
01209
01210 PCMGGetSmoother(pc,0,&lksp);
01211 KSPSetOperators(lksp,damg[i-1]->J,damg[i-1]->J,DIFFERENT_NONZERO_PATTERN);
01212 PCMGSetX(pc,0,damg[i-1]->x);
01213 PCMGSetRhs(pc,0,damg[i-1]->b);
01214
01215 PCMGSetInterpolation(pc,1,damg[i]->R);
01216 PCMGSetRestriction(pc,1,damg[i]->R);
01217 if(i > 1) {
01218
01219 KSPSetType(lksp,KSPFGMRES);
01220 KSPGetPC(lksp,&pc);
01221 PCSetType(pc,PCMG);
01222 sprintf(prefixName, "rtlmg_levels_%d_",((int)(i-1)));
01223 PCSetOptionsPrefix(pc, mainKSPprefix);
01224 PCAppendOptionsPrefix(pc, prefixName);
01225 }
01226 }
01227 }
01228 } else {
01229
01230 ierr = PetscMalloc(nlevels*sizeof(MPI_Comm),&comms);CHKERRQ(ierr);
01231 for (int i = 0; i < nlevels; i++) {
01232 comms[i] = damg[i]->comm;
01233 }
01234 ierr = PCMGSetLevels(pc,nlevels,comms);CHKERRQ(ierr);
01235 ierr = PCMGSetType(pc,PC_MG_FULL);CHKERRQ(ierr);
01236
01237 ierr = PetscFree(comms);CHKERRQ(ierr);
01238
01239 ierr = PetscTypeCompare((PetscObject)pc,PCMG,&ismg);CHKERRQ(ierr);
01240 if (ismg) {
01241
01242 for (int i = 0; i < nlevels; i++) {
01243 ierr = PCMGGetSmoother(pc, i, &lksp);CHKERRQ(ierr);
01244
01245 ierr = KSPSetOperators(lksp, damg[i]->J, damg[i]->B,
01246 DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
01247
01248 if (i < nlevels-1) {
01249 ierr = PCMGSetX(pc,i,damg[i]->x);CHKERRQ(ierr);
01250 ierr = PCMGSetRhs(pc,i,damg[i]->b);CHKERRQ(ierr);
01251 }
01252 if (i > 0) {
01253 ierr = PCMGSetR(pc,i,damg[i]->r);CHKERRQ(ierr);
01254 ierr = PCMGSetResidual(pc,i,PCMGDefaultResidual,damg[i]->J);CHKERRQ(ierr);
01255 }
01256 if (monitor) {
01257 ierr = PetscObjectGetComm((PetscObject)lksp,&comm);CHKERRQ(ierr);
01258 ierr = PetscViewerASCIIOpen(comm,"stdout",&ascii);CHKERRQ(ierr);
01259 ierr = PetscViewerASCIISetTab(ascii,1+damg[0]->nlevels-i);CHKERRQ(ierr);
01260 ierr = KSPMonitorSet(lksp,KSPMonitorDefault,ascii,
01261 (PetscErrorCode(*)(void*))PetscViewerDestroy);CHKERRQ(ierr);
01262 }
01263
01264
01265
01266 ierr = PetscTypeCompare((PetscObject)damg[i]->B,MATSHELL,&isshell);CHKERRQ(ierr);
01267 ierr = PetscTypeCompare((PetscObject)damg[i]->B,MATDAAD,&ismf);CHKERRQ(ierr);
01268 ierr = PetscTypeCompare((PetscObject)damg[i]->B,MATMFFD,&ismffd);CHKERRQ(ierr);
01269 if (isshell || ismf || ismffd) {
01270 PC lpc;
01271 ierr = KSPGetPC(lksp,&lpc);CHKERRQ(ierr);
01272
01273 ierr = PCSetType(lpc,PCNONE);CHKERRQ(ierr);
01274 }
01275 }
01276
01277
01278 for (int i=1; i<nlevels; i++) {
01279 ierr = PCMGSetInterpolation(pc,i,damg[i]->R);CHKERRQ(ierr);
01280 ierr = PCMGSetRestriction(pc,i,damg[i]->R);CHKERRQ(ierr);
01281 }
01282 }
01283 }
01284
01285 PetscFunctionReturn(0);
01286 }
01287
01288
01289
01290
01291 PetscErrorCode DAMGCreateAndSetDA(MPI_Comm comm, int & nlevels,
01292 void* user, DAMG** damg, std::vector<ot::TreeNode>& finestOctree,
01293 unsigned int dof, double loadFac,
01294 bool compressLut, bool incCorner) {
01295
01296 PROF_MG_SET_DA_BEGIN
01297
01298 PROF_SET_DA_STAGE1_BEGIN
01299
01300 PetscErrorCode ierr;
01301 int rank, npes;
01302 MPI_Comm_rank(comm, &rank);
01303 MPI_Comm_size(comm, &npes);
01304
01305 #ifndef __SILENT_MODE__
01306 if(!rank) {
01307 std::cout<<"MG-Load Fac: "<<loadFac<<std::endl;
01308 fflush(stdout);
01309 }
01310 #endif
01311
01312 par::partitionW<ot::TreeNode>(finestOctree, NULL, comm);
01313
01314 assert(nlevels > 0);
01315 if(finestOctree.empty()) {
01316 std::cout<<"Processor "<<rank<<
01317 " called DAMGCreateAndSetDA with an empty finest octree."<<std::endl;
01318 fflush(stdout);
01319 assert(false);
01320 }
01321
01322 unsigned int dim = finestOctree[0].getDim();
01323 unsigned int maxDepth = finestOctree[0].getMaxDepth();
01324
01325 PROF_SET_DA_STAGE1_END
01326 #ifdef __PROF_WITH_BARRIER__
01327 MPI_Barrier(comm);
01328 #endif
01329 PROF_SET_DA_STAGE2_BEGIN
01330
01331 int idxOfCoarsestLev = -1;
01332
01333 std::vector<ot::TreeNode>* coarserOctrees = NULL;
01334
01335 bool* activeStatesInCoarseBal = NULL;
01336 MPI_Comm* activeCommsInCoarseBal = NULL;
01337 int* activeNpesInCoarseBal = NULL;
01338
01339 if(nlevels > 1) {
01340 coarserOctrees = new std::vector<ot::TreeNode> [nlevels-1];
01341 activeStatesInCoarseBal = new bool[nlevels - 1];
01342 activeCommsInCoarseBal = new MPI_Comm[nlevels - 1];
01343 activeNpesInCoarseBal = new int[nlevels - 1];
01344
01345
01346
01347
01348 for(int i = 0; i < (nlevels-1); i++) {
01349 activeStatesInCoarseBal[i] = false;
01350 }
01351
01352 MPI_Comm tmpComm1 = comm;
01353 MPI_Comm tmpComm2;
01354 bool iAmActiveForCoarsening = true;
01355 bool repeatLoop = true;
01356 while( (idxOfCoarsestLev < (nlevels-2)) && (repeatLoop) ) {
01357 std::vector<ot::TreeNode> tmpOctree;
01358
01359 if (idxOfCoarsestLev == -1) {
01360
01361
01362 ot::coarsenOctree(finestOctree, tmpOctree, dim, maxDepth,
01363 tmpComm1, true, &tmpComm2, &iAmActiveForCoarsening);
01364 } else {
01365
01366
01367
01368
01369 ot::coarsenOctree(coarserOctrees[idxOfCoarsestLev], tmpOctree,
01370 dim, maxDepth, tmpComm1, false, &tmpComm2, &iAmActiveForCoarsening);
01371 }
01372
01373 idxOfCoarsestLev++;
01374
01375
01376 if(iAmActiveForCoarsening) {
01377 ot::balanceOctree(tmpOctree, coarserOctrees[idxOfCoarsestLev],
01378 dim, maxDepth, incCorner, tmpComm2, &tmpComm1, &iAmActiveForCoarsening);
01379 }
01380 tmpOctree.clear();
01381
01382
01383
01384
01385 activeStatesInCoarseBal[idxOfCoarsestLev] = iAmActiveForCoarsening;
01386 activeCommsInCoarseBal[idxOfCoarsestLev] = tmpComm1;
01387
01388 if(iAmActiveForCoarsening) {
01389 MPI_Comm_size(tmpComm1, (activeNpesInCoarseBal + idxOfCoarsestLev));
01390 if( (activeNpesInCoarseBal[idxOfCoarsestLev] == 1) &&
01391 (coarserOctrees[idxOfCoarsestLev].size() < 500) ) {
01392 repeatLoop = false;
01393 }
01394 } else {
01395 assert(coarserOctrees[idxOfCoarsestLev].empty());
01396 break;
01397 }
01398
01399 }
01400 }
01401
01402 PROF_SET_DA_STAGE2_END
01403 #ifdef __PROF_WITH_BARRIER__
01404 MPI_Barrier(comm);
01405 #endif
01406 PROF_SET_DA_STAGE3_BEGIN
01407
01408
01409 par::Mpi_Bcast<int>(&idxOfCoarsestLev, 1, 0, comm);
01410
01411
01412 nlevels = (idxOfCoarsestLev + 2);
01413
01414
01415
01416
01417
01418
01419
01420 if(nlevels > 1) {
01421 par::Mpi_Bcast<int>(activeNpesInCoarseBal, (nlevels - 1), 0, comm);
01422 }
01423
01424 #ifdef __DEBUG_MG__
01425 char fname[256];
01426 for(int lev = 0; lev < (nlevels - 1); lev++) {
01427 sprintf(fname, "coarseOctBeforeBdy_%d_%d_%d.ot", lev, rank, npes);
01428 ot::writeNodesToFile(fname, coarserOctrees[lev]);
01429 }
01430 sprintf(fname, "finestOctBeforeBdy_%d_%d.ot", rank, npes);
01431 ot::writeNodesToFile(fname, finestOctree);
01432 #endif
01433
01434 #ifdef __USE_PVT_DA_IN_MG__
01435
01436 std::vector<ot::TreeNode> positiveBoundaries;
01437 ot::addBoundaryNodesType1(finestOctree, positiveBoundaries, dim, maxDepth);
01438
01439
01440 maxDepth = maxDepth + 1;
01441
01442
01443
01444
01445
01446 MPI_Comm bdyComm;
01447 par::splitComm2way((positiveBoundaries.empty()), &bdyComm, comm);
01448
01449 if(!(positiveBoundaries.empty())) {
01450
01451 std::vector<ot::TreeNode > tmpVecTN;
01452 par::sampleSort<ot::TreeNode>(positiveBoundaries, tmpVecTN, bdyComm);
01453 positiveBoundaries = tmpVecTN;
01454 tmpVecTN.clear();
01455 }
01456
01457 par::concatenate<ot::TreeNode>(finestOctree, positiveBoundaries, comm);
01458 positiveBoundaries.clear();
01459
01460
01461 for (int lev = 0; lev < (nlevels - 1); lev++) {
01462 if(activeStatesInCoarseBal[lev]) {
01463 ot::addBoundaryNodesType1(coarserOctrees[lev], positiveBoundaries, dim, maxDepth-1);
01464
01465
01466
01467
01468
01469 par::splitComm2way((positiveBoundaries.empty()), &bdyComm, activeCommsInCoarseBal[lev]);
01470
01471 if(!(positiveBoundaries.empty())) {
01472
01473 std::vector<ot::TreeNode > tmpVecTN;
01474 par::sampleSort<ot::TreeNode>(positiveBoundaries, tmpVecTN, bdyComm);
01475 positiveBoundaries = tmpVecTN;
01476 tmpVecTN.clear();
01477 }
01478
01479 par::concatenate<ot::TreeNode>(coarserOctrees[lev], positiveBoundaries, activeCommsInCoarseBal[lev]);
01480 positiveBoundaries.clear();
01481 }
01482 }
01483
01484 #endif
01485
01486 #ifdef __DEBUG_MG__
01487 for(int lev = 0; lev < (nlevels - 1); lev++) {
01488 sprintf(fname, "coarseOctAfterBdy_%d_%d_%d.ot", lev, rank, npes);
01489 ot::writeNodesToFile(fname, coarserOctrees[lev]);
01490 }
01491 sprintf(fname, "finestOctAfterBdy_%d_%d.ot", rank, npes);
01492 ot::writeNodesToFile(fname, finestOctree);
01493 #endif
01494
01495
01496
01497 DendroIntL* localOctreeSizeForThisLevel = new DendroIntL[nlevels];
01498 DendroIntL* globalOctreeSizeForThisLevel = new DendroIntL[nlevels];
01499 localOctreeSizeForThisLevel[0] = finestOctree.size();
01500 for(int lev = 0; lev < (nlevels - 1); lev++) {
01501 localOctreeSizeForThisLevel[lev + 1] = coarserOctrees[lev].size();
01502 }
01503 par::Mpi_Allreduce<DendroIntL>(localOctreeSizeForThisLevel,
01504 globalOctreeSizeForThisLevel, nlevels, MPI_SUM, comm);
01505 delete [] localOctreeSizeForThisLevel;
01506
01507 #ifdef __DEBUG_MG__
01508 MPI_Barrier(comm);
01509 for(int i = 0; i < (nlevels - 1); i++) {
01510 if(rank < activeNpesInCoarseBal[i]) {
01511 assert(activeStatesInCoarseBal[i]);
01512 } else {
01513 assert(!(activeStatesInCoarseBal[i]));
01514 }
01515 }
01516 MPI_Barrier(comm);
01517 #endif
01518
01519
01520 int *maxProcsForThisLevel = new int [nlevels];
01521
01522 for(int i = 0; i < nlevels; i++) {
01523 const DendroIntL THOUSAND = 1000;
01524 if(globalOctreeSizeForThisLevel[i] < (THOUSAND*npes)) {
01525 int maxProcsToUse = (globalOctreeSizeForThisLevel[i]/THOUSAND);
01526 if(maxProcsToUse == 0) {
01527 maxProcsToUse = 1;
01528 }
01529 maxProcsForThisLevel[i] = maxProcsToUse;
01530 } else {
01531
01532 maxProcsForThisLevel[i] = npes;
01533 }
01534 }
01535
01536
01537
01538
01539
01540
01541
01542
01543
01544
01545
01546
01547
01548 if(nlevels > 1) {
01549 if(activeNpesInCoarseBal[nlevels - 2] < maxProcsForThisLevel[nlevels - 1]) {
01550 DendroIntL currentGrainSize =
01551 (globalOctreeSizeForThisLevel[nlevels - 1]/activeNpesInCoarseBal[nlevels - 2]);
01552
01553 DendroIntL expectedGrainSize =
01554 (globalOctreeSizeForThisLevel[nlevels - 1]/maxProcsForThisLevel[nlevels - 1]);
01555
01556 if(static_cast<double>(currentGrainSize) <
01557 (1.5*static_cast<double>(expectedGrainSize))) {
01558 maxProcsForThisLevel[nlevels - 1] = activeNpesInCoarseBal[nlevels - 2];
01559 }
01560 }
01561 }
01562
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572
01573
01574
01575
01576
01577
01578
01579 for(int i = (nlevels-2); i >= 0; i--) {
01580 if(maxProcsForThisLevel[i] > maxProcsForThisLevel[i+1]) {
01581 DendroIntL avgGrainSize = (globalOctreeSizeForThisLevel[i]/maxProcsForThisLevel[i]);
01582
01583 DendroIntL avgGrainSizeUsing1LevCoarserNpes =
01584 (globalOctreeSizeForThisLevel[i]/maxProcsForThisLevel[i+1]);
01585
01586 if(static_cast<double>(avgGrainSizeUsing1LevCoarserNpes) <
01587 (1.5*static_cast<double>(avgGrainSize))) {
01588 maxProcsForThisLevel[i] = maxProcsForThisLevel[i+1];
01589 }
01590 } else {
01591 assert(maxProcsForThisLevel[i] == maxProcsForThisLevel[i+1]);
01592 }
01593 }
01594
01595 #ifndef __SILENT_MODE__
01596 if(!rank) {
01597 std::cout<<" Using "<<nlevels<<" Multigrid Levels."<<std::endl;
01598 fflush(stdout);
01599 for(int i = 0; i < nlevels; i++) {
01600 std::cout<<" maxNpes for lev "<< i <<": "<< maxProcsForThisLevel[i] <<std::endl;
01601 fflush(stdout);
01602 }
01603 }
01604 #endif
01605
01606 PROF_SET_DA_STAGE3_END
01607 #ifdef __PROF_WITH_BARRIER__
01608 MPI_Barrier(comm);
01609 #endif
01610 PROF_SET_DA_STAGE4_BEGIN
01611
01612
01613
01614 MPI_Comm* activeComms = new MPI_Comm[nlevels];
01615
01616 if(maxProcsForThisLevel[0] == npes) {
01617 activeComms[0] = comm;
01618 } else {
01619 #ifndef __SILENT_MODE__
01620 if(!rank) {
01621 std::cout<<"splitting Comm in SetDA (using comm) "<<npes<<" -> "<<maxProcsForThisLevel[0]<<std::endl;
01622 }
01623 #endif
01624 par::splitCommUsingSplittingRank(maxProcsForThisLevel[0], activeComms, comm);
01625 }
01626 for(int lev = 1; lev < nlevels; lev++) {
01627 if(maxProcsForThisLevel[lev] == maxProcsForThisLevel[lev - 1]) {
01628
01629 activeComms[lev] = activeComms[lev - 1];
01630 } else {
01631 assert(maxProcsForThisLevel[lev] < maxProcsForThisLevel[lev - 1]);
01632 if(maxProcsForThisLevel[lev] == activeNpesInCoarseBal[lev - 1]) {
01633
01634 activeComms[lev] = activeCommsInCoarseBal[lev - 1];
01635 } else if(maxProcsForThisLevel[lev] < activeNpesInCoarseBal[lev - 1]) {
01636
01637 if(maxProcsForThisLevel[lev - 1] < activeNpesInCoarseBal[lev - 1]) {
01638 #ifndef __SILENT_MODE__
01639 if(!rank) {
01640 std::cout<<"splitting Comm in SetDA (using fine activeComm) "<<
01641 maxProcsForThisLevel[lev - 1]<<" -> "<<maxProcsForThisLevel[lev]<<std::endl;
01642 }
01643 #endif
01644 if(rank < maxProcsForThisLevel[lev - 1]) {
01645 par::splitCommUsingSplittingRank(maxProcsForThisLevel[lev],
01646 (activeComms + lev), activeComms[lev - 1]);
01647 }
01648 } else {
01649 #ifndef __SILENT_MODE__
01650 if(!rank) {
01651 std::cout<<"splitting Comm in SetDA (using coarseBalComm) "<<
01652 activeNpesInCoarseBal[lev - 1]<<" -> "<<maxProcsForThisLevel[lev]<<std::endl;
01653 }
01654 #endif
01655 if(rank < activeNpesInCoarseBal[lev - 1]) {
01656 par::splitCommUsingSplittingRank(maxProcsForThisLevel[lev],
01657 (activeComms + lev), activeCommsInCoarseBal[lev - 1]);
01658 }
01659 }
01660 } else {
01661 #ifndef __SILENT_MODE__
01662 if(!rank) {
01663 std::cout<<"splitting Comm in SetDA (using fine activeComm) "<<
01664 maxProcsForThisLevel[lev - 1]<<" -> "<<maxProcsForThisLevel[lev]<<std::endl;
01665 }
01666 #endif
01667 if(rank < maxProcsForThisLevel[lev - 1]) {
01668 par::splitCommUsingSplittingRank(maxProcsForThisLevel[lev],
01669 (activeComms + lev), activeComms[lev - 1]);
01670 }
01671 }
01672 }
01673 }
01674
01675 PROF_SET_DA_STAGE4_END
01676 #ifdef __PROF_WITH_BARRIER__
01677 MPI_Barrier(comm);
01678 #endif
01679 PROF_SET_DA_STAGE5_BEGIN
01680
01681
01682
01683
01684 if(maxProcsForThisLevel[0] < npes) {
01685 std::vector<ot::TreeNode> tmpOctree;
01686 if(rank < maxProcsForThisLevel[0]) {
01687 DendroIntL avgGlobalOctSize = (globalOctreeSizeForThisLevel[0]/maxProcsForThisLevel[0]);
01688 int extraOcts = (globalOctreeSizeForThisLevel[0] % maxProcsForThisLevel[0]);
01689 if(rank < extraOcts) {
01690 par::scatterValues<ot::TreeNode>(finestOctree, tmpOctree,
01691 (avgGlobalOctSize+1), comm);
01692 }else {
01693 par::scatterValues<ot::TreeNode>(finestOctree, tmpOctree,
01694 avgGlobalOctSize, comm);
01695 }
01696 }else {
01697 par::scatterValues<ot::TreeNode>(finestOctree, tmpOctree,
01698 0, comm);
01699 }
01700 finestOctree = tmpOctree;
01701 tmpOctree.clear();
01702 }
01703
01704
01705
01706
01707
01708
01709
01710
01711
01712
01713
01714
01715
01716
01717
01718
01719 for(int i = 1; i < nlevels; i++) {
01720 if(activeNpesInCoarseBal[i-1] < maxProcsForThisLevel[i]) {
01721
01722
01723
01724
01725
01726
01727 std::vector<ot::TreeNode> tmpOctree;
01728 if(rank < maxProcsForThisLevel[i]) {
01729 DendroIntL avgGlobalOctSize = (globalOctreeSizeForThisLevel[i]/maxProcsForThisLevel[i]);
01730 int extraOcts = (globalOctreeSizeForThisLevel[i] % maxProcsForThisLevel[i]);
01731 if(rank < extraOcts) {
01732 par::scatterValues<ot::TreeNode>(coarserOctrees[i-1], tmpOctree,
01733 (avgGlobalOctSize+1), activeComms[i]);
01734 }else {
01735 par::scatterValues<ot::TreeNode>(coarserOctrees[i-1], tmpOctree,
01736 avgGlobalOctSize, activeComms[i]);
01737 }
01738 }
01739 coarserOctrees[i-1] = tmpOctree;
01740 tmpOctree.clear();
01741 } else if(activeNpesInCoarseBal[i-1] > maxProcsForThisLevel[i]) {
01742
01743
01744
01745
01746
01747 if(activeStatesInCoarseBal[i-1]) {
01748 std::vector<ot::TreeNode> tmpOctree;
01749 if(rank < maxProcsForThisLevel[i]) {
01750 DendroIntL avgGlobalOctSize = (globalOctreeSizeForThisLevel[i]/maxProcsForThisLevel[i]);
01751 int extraOcts = (globalOctreeSizeForThisLevel[i] % maxProcsForThisLevel[i]);
01752 if(rank < extraOcts) {
01753 par::scatterValues<ot::TreeNode>(coarserOctrees[i-1], tmpOctree,
01754 (avgGlobalOctSize+1), activeCommsInCoarseBal[i-1]);
01755 }else {
01756 par::scatterValues<ot::TreeNode>(coarserOctrees[i-1], tmpOctree,
01757 avgGlobalOctSize, activeCommsInCoarseBal[i-1]);
01758 }
01759 }else {
01760 par::scatterValues<ot::TreeNode>(coarserOctrees[i-1], tmpOctree,
01761 0, activeCommsInCoarseBal[i-1]);
01762 }
01763 coarserOctrees[i-1] = tmpOctree;
01764 tmpOctree.clear();
01765 }
01766 }
01767 }
01768
01769 #ifdef __USE_PVT_DA_IN_MG__
01770
01771
01772
01773 ot::markHangingNodesAtAllLevels(finestOctree, nlevels, coarserOctrees,
01774 activeComms, dim, maxDepth);
01775 #endif
01776
01777 if(activeStatesInCoarseBal) {
01778 delete [] activeStatesInCoarseBal;
01779 activeStatesInCoarseBal = NULL;
01780 }
01781
01782 if(activeCommsInCoarseBal) {
01783 delete [] activeCommsInCoarseBal;
01784 activeCommsInCoarseBal = NULL;
01785 }
01786
01787 if(activeNpesInCoarseBal) {
01788 delete [] activeNpesInCoarseBal;
01789 activeNpesInCoarseBal = NULL;
01790 }
01791
01792
01793 DAMG *tmpDAMG = new DAMG[nlevels];
01794 for (int i = 0; i < nlevels; i++) {
01795 tmpDAMG[i] = new _p_DAMG;
01796 tmpDAMG[i]->nlevels = nlevels - i;
01797 tmpDAMG[i]->totalLevels = nlevels;
01798 tmpDAMG[i]->comm = comm;
01799 tmpDAMG[i]->user = user;
01800 tmpDAMG[i]->initialguess = NULL;
01801 tmpDAMG[i]->suppressedDOF = NULL;
01802 tmpDAMG[i]->suppressedDOFaux = NULL;
01803 tmpDAMG[i]->dof = dof;
01804 tmpDAMG[i]->da = NULL;
01805 tmpDAMG[i]->da_aux = NULL;
01806 tmpDAMG[i]->x = NULL;
01807 tmpDAMG[i]->b = NULL;
01808 tmpDAMG[i]->r = NULL;
01809 tmpDAMG[i]->J = NULL;
01810 tmpDAMG[i]->B = NULL;
01811 tmpDAMG[i]->R = NULL;
01812 tmpDAMG[i]->solve = NULL;
01813
01814 tmpDAMG[i]->ksp = NULL;
01815 tmpDAMG[i]->rhs = NULL;
01816 }
01817 *damg = tmpDAMG;
01818
01819 PROF_SET_DA_STAGE5_END
01820 #ifdef __PROF_WITH_BARRIER__
01821 MPI_Barrier(comm);
01822 #endif
01823 PROF_SET_DA_STAGE6_BEGIN
01824
01825 if(nlevels == 1) {
01826
01827
01828 #ifndef __USE_PVT_DA_IN_MG__
01829 tmpDAMG[0]->da = new DA(finestOctree, comm, activeComms[0], compressLut, NULL, NULL);
01830 #else
01831 tmpDAMG[0]->da = new DA(1, finestOctree, comm, activeComms[0], compressLut, NULL, NULL);
01832 #endif
01833
01834 if(!rank) {
01835 int activeNpes = tmpDAMG[0]->da->getNpesActive();
01836 if(activeNpes < npes) {
01837 std::cout<<"WARNING: Some processors will be"<<
01838 " idle even on the finest level."<<
01839 " You might want to use fewer processors."<<std::endl;
01840 fflush(stdout);
01841 }
01842 }
01843
01844 finestOctree.clear();
01845
01846
01847 delete [] maxProcsForThisLevel;
01848 maxProcsForThisLevel = NULL;
01849
01850 delete [] globalOctreeSizeForThisLevel;
01851 globalOctreeSizeForThisLevel = NULL;
01852
01853 delete [] activeComms;
01854 activeComms = NULL;
01855
01856
01857
01858
01859 if(coarserOctrees != NULL) {
01860 delete [] coarserOctrees;
01861 coarserOctrees = NULL;
01862 }
01863
01864 PROF_SET_DA_STAGE6_END
01865
01866 ierr = DAMGSetUp(tmpDAMG);CHKERRQ(ierr);
01867
01868 PROF_MG_SET_DA_END
01869 }
01870
01871
01872 std::vector<ot::TreeNode>* blocksPtr = NULL;
01873 ot::DA* newDa = NULL;
01874 while(idxOfCoarsestLev >= 0) {
01875 #ifdef __DEBUG_MG__
01876 MPI_Barrier(comm);
01877 if(!rank) {
01878 std::cout<<"Meshing 1st DA for lev: "<<idxOfCoarsestLev<<std::endl;
01879 }
01880 MPI_Barrier(comm);
01881 #endif
01882
01883 if(newDa == NULL) {
01884 #ifndef __USE_PVT_DA_IN_MG__
01885 newDa = new DA(coarserOctrees[idxOfCoarsestLev], comm,
01886 activeComms[idxOfCoarsestLev + 1], compressLut,
01887 blocksPtr, NULL);
01888 #else
01889 newDa = new DA(1, coarserOctrees[idxOfCoarsestLev], comm,
01890 activeComms[idxOfCoarsestLev + 1], compressLut,
01891 blocksPtr, NULL);
01892 #endif
01893 }
01894
01895
01896
01897 coarserOctrees[idxOfCoarsestLev].clear();
01898 tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da = newDa;
01899 newDa = NULL;
01900
01901 bool procsNotUsedWell;
01902 if(!rank) {
01903 int activeNpes = tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da->getNpesActive();
01904 if( (activeNpes < maxProcsForThisLevel[idxOfCoarsestLev+1]) ||
01905 (maxProcsForThisLevel[idxOfCoarsestLev+1] <
01906 maxProcsForThisLevel[idxOfCoarsestLev]) ) {
01907 procsNotUsedWell = true;
01908 } else {
01909 procsNotUsedWell = false;
01910 }
01911 }
01912
01913 par::Mpi_Bcast<bool>(&procsNotUsedWell, 1, 0, comm);
01914
01915
01916
01917
01918
01919
01920 double minMaxLoad[2];
01921
01922 if(tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da->iAmActive()) {
01923 ot::DA* currDa = tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da;
01924
01925 unsigned int localPreGhostSize = currDa->getIdxElementBegin();
01926
01927 unsigned int localIndependentSize = currDa->getIndependentSize();
01928
01929 unsigned int localOwnSize = currDa->getElementSize();
01930
01931 unsigned int localDependentSize = (localOwnSize - localIndependentSize);
01932
01933 #ifdef __NO_GHOST_LOOP__
01934 double computationLoad = localOwnSize;
01935 #else
01936 double computationLoad = (localOwnSize + localPreGhostSize);
01937 #endif
01938 double communicationLoad = localDependentSize;
01939
01940 double localLoad = ((0.75*computationLoad) + (0.25*communicationLoad));
01941
01942 par::Mpi_Reduce<double>(&localLoad, &(minMaxLoad[0]), 1,
01943 MPI_MIN, 0, currDa->getCommActive());
01944
01945 par::Mpi_Reduce<double>(&localLoad, &(minMaxLoad[1]), 1,
01946 MPI_MAX, 0, currDa->getCommActive());
01947 }
01948
01949 par::Mpi_Bcast<double>(minMaxLoad, 2, 0, comm);
01950
01951
01952
01953 bool needAuxGrid = (procsNotUsedWell ||
01954 (minMaxLoad[1] > (minMaxLoad[0]*loadFac)));
01955
01956
01957
01958 if(blocksPtr == NULL) {
01959 blocksPtr = new std::vector<ot::TreeNode>;
01960 (*blocksPtr) = tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da->getBlocks();
01961 }
01962
01963 if(needAuxGrid) {
01964
01965
01966
01967
01968
01969 std::vector<ot::TreeNode> fineOctreeCopy;
01970 if(idxOfCoarsestLev > 0) {
01971 fineOctreeCopy = coarserOctrees[idxOfCoarsestLev-1];
01972 }else {
01973 fineOctreeCopy = finestOctree;
01974 }
01975
01976
01977
01978 bool iAmActive = tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da->iAmActive();
01979 MPI_Comm newComm = tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da->getCommActive();
01980
01981 DendroIntL mySize = fineOctreeCopy.size();
01982 DendroIntL totalSize = globalOctreeSizeForThisLevel[idxOfCoarsestLev];
01983
01984 std::vector<ot::TreeNode> fineOctAfterPart;
01985
01986 if(rank < maxProcsForThisLevel[idxOfCoarsestLev]) {
01987 if(iAmActive) {
01988 int newRank;
01989 int newNpes;
01990 MPI_Comm_rank(newComm, &newRank);
01991 MPI_Comm_size(newComm, &newNpes);
01992 DendroIntL avgSize = (totalSize / newNpes);
01993 int leftOvers = (totalSize % newNpes);
01994 if(newRank < leftOvers) {
01995 par::scatterValues<ot::TreeNode>(fineOctreeCopy, fineOctAfterPart,
01996 (avgSize+1), activeComms[idxOfCoarsestLev]);
01997 }else {
01998 par::scatterValues<ot::TreeNode>(fineOctreeCopy, fineOctAfterPart,
01999 avgSize, activeComms[idxOfCoarsestLev]);
02000 }
02001 }else {
02002 par::scatterValues<ot::TreeNode>(fineOctreeCopy, fineOctAfterPart,
02003 0, activeComms[idxOfCoarsestLev]);
02004 }
02005 }
02006
02007 fineOctreeCopy.clear();
02008
02009 #ifdef __DEBUG_MG__
02010 MPI_Barrier(comm);
02011 if(!rank) {
02012 std::cout<<"Meshing 2nd DA for lev: "<<idxOfCoarsestLev<<std::endl;
02013 }
02014 MPI_Barrier(comm);
02015 #endif
02016
02017
02018 #ifndef __USE_PVT_DA_IN_MG__
02019 newDa = new DA(fineOctAfterPart, comm, newComm, compressLut, blocksPtr, NULL);
02020 #else
02021 newDa = new DA(1, fineOctAfterPart, comm, newComm, compressLut, blocksPtr, NULL);
02022 #endif
02023
02024 fineOctAfterPart.clear();
02025
02026
02027
02028
02029 if(!procsNotUsedWell) {
02030 if(newDa->iAmActive()) {
02031 ot::DA* currDa = newDa;
02032
02033 unsigned int localPreGhostSize = currDa->getIdxElementBegin();
02034
02035 unsigned int localOwnSize = currDa->getElementSize();
02036
02037 unsigned int localIndependentSize = currDa->getIndependentSize();
02038
02039 unsigned int localDependentSize = (localOwnSize - localIndependentSize);
02040
02041 #ifdef __NO_GHOST_LOOP__
02042 double computationLoad = localOwnSize;
02043 #else
02044 double computationLoad = (localOwnSize + localPreGhostSize);
02045 #endif
02046 double communicationLoad = localDependentSize;
02047
02048 double localLoad = ((0.75*computationLoad) + (0.25*communicationLoad));
02049
02050 par::Mpi_Reduce<double>(&localLoad, &minMaxLoad[0], 1,
02051 MPI_MIN, 0, currDa->getCommActive());
02052
02053 par::Mpi_Reduce<double>(&localLoad, &minMaxLoad[1], 1,
02054 MPI_MAX, 0, currDa->getCommActive());
02055 }
02056
02057 par::Mpi_Bcast<double>(minMaxLoad, 2, 0, comm);
02058
02059 needAuxGrid = (minMaxLoad[1] > (minMaxLoad[0]*loadFac));
02060 } else {
02061 needAuxGrid = true;
02062 }
02063
02064 if(needAuxGrid) {
02065 tmpDAMG[nlevels-(1+idxOfCoarsestLev)]->da_aux = newDa;
02066 newDa = NULL;
02067
02068
02069 blocksPtr->clear();
02070 delete blocksPtr;
02071 blocksPtr = NULL;
02072 }
02073 }
02074
02075 idxOfCoarsestLev--;
02076 }
02077
02078
02079 delete [] coarserOctrees;
02080 coarserOctrees = NULL;
02081
02082 delete [] globalOctreeSizeForThisLevel;
02083 globalOctreeSizeForThisLevel = NULL;
02084
02085 delete [] maxProcsForThisLevel;
02086 maxProcsForThisLevel = NULL;
02087
02088 #ifdef __DEBUG_MG__
02089 MPI_Barrier(comm);
02090 if(!rank) {
02091 std::cout<<"Meshing finest octree: "<<std::endl;
02092 }
02093 MPI_Barrier(comm);
02094 #endif
02095
02096
02097 if(newDa == NULL) {
02098 #ifndef __USE_PVT_DA_IN_MG__
02099 newDa = new DA(finestOctree, comm, activeComms[0], compressLut, blocksPtr, NULL);
02100 #else
02101 newDa = new DA(1, finestOctree, comm, activeComms[0], compressLut, blocksPtr, NULL);
02102 #endif
02103 }
02104
02105 #ifdef __DEBUG_MG__
02106 MPI_Barrier(comm);
02107 if(!rank) {
02108 std::cout<<"Finished meshing finest octree: "<<std::endl;
02109 }
02110 MPI_Barrier(comm);
02111 #endif
02112
02113 tmpDAMG[nlevels-1]->da = newDa;
02114 newDa = NULL;
02115
02116 finestOctree.clear();
02117
02118 delete [] activeComms;
02119 activeComms = NULL;
02120
02121 if(!rank) {
02122 int activeNpes = tmpDAMG[nlevels-1]->da->getNpesActive();
02123 if(activeNpes < npes) {
02124 std::cout<<"WARNING: Some processors will be idle"
02125 <<" even on the finest level."
02126 <<" You might want to use fewer processors."<<std::endl;
02127 fflush(stdout);
02128 }
02129 }
02130
02131 if(blocksPtr != NULL) {
02132 blocksPtr->clear();
02133 delete blocksPtr;
02134 blocksPtr = NULL;
02135 }
02136
02137 PROF_SET_DA_STAGE6_END
02138
02139 ierr = DAMGSetUp(tmpDAMG); CHKERRQ(ierr);
02140
02141 PROF_MG_SET_DA_END
02142
02143 }
02144
02145 PetscErrorCode DAMGSetUp(DAMG* damg)
02146 {
02147 PROF_MG_SETUP_BEGIN
02148
02149 int i,nlevels = damg[0]->nlevels;
02150
02151
02152 #ifdef __DEBUG_MG__
02153 int rank;
02154 MPI_Comm_rank(damg[0]->comm,&rank);
02155 if(!rank) {
02156 std::cout<<"In DAMGSetUp."<<std::endl;
02157 fflush(stdout);
02158 }
02159 #endif
02160
02161
02162 damg[0]->da->createVector(damg[0]->x, false, false, damg[0]->dof);
02163 VecDuplicate(damg[0]->x,&damg[0]->b);
02164 VecDuplicate(damg[0]->x,&damg[0]->r);
02165
02166 #ifdef __DEBUG_MG__
02167 assert(damg[0]->da_aux == NULL);
02168 #endif
02169
02170 for (i = 1; i < nlevels; i++) {
02171 damg[i]->da->createVector(damg[i]->x, false, false, damg[i]->dof);
02172 VecDuplicate(damg[i]->x,&damg[i]->b);
02173 VecDuplicate(damg[i]->x,&damg[i]->r);
02174
02175
02176
02177
02178 if(damg[i]->da_aux == NULL) {
02179
02180 #ifdef __DEBUG_MG__
02181 if(!rank) {
02182 std::cout<<"Lev: "<<i<<" R-type: 1"<<std::endl;
02183 fflush(stdout);
02184 }
02185 #endif
02186 createInterpolationType1(damg[i-1],damg[i],&damg[i]->R);
02187 }else {
02188
02189 #ifdef __DEBUG_MG__
02190 if(!rank) {
02191 std::cout<<"Lev: "<<i<<" R-type: 2"<<std::endl;
02192 fflush(stdout);
02193 }
02194 #endif
02195 createInterpolationType2(damg[i-1],damg[i],&damg[i]->R);
02196 }
02197 }
02198
02199 PROF_MG_SETUP_END
02200 }
02201
02202
02203
02204
02205
02206 PetscErrorCode createInterpolationType1(DAMG damgc, DAMG damgf, Mat *R)
02207 {
02208 PROF_MG_CREATE_RP1_BEGIN
02209
02210 ot::DA* dac = damgc->da;
02211 ot::DA* daf = damgf->da;
02212 unsigned int dof = damgc->dof;
02213 MPI_Comm comm = damgc->comm;
02214 unsigned int mc,mf;
02215
02216 mc=dac->getNodeSize();
02217 mf=daf->getNodeSize();
02218 TransferOpData* data = new TransferOpData;
02219
02220 unsigned int localCoarseIndSize = dac->getIndependentSize();
02221
02222 if(dac->iAmActive()) {
02223 par::Mpi_Allreduce<unsigned int>(&localCoarseIndSize, &(data->minIndependentSize), 1,
02224 MPI_MIN, dac->getCommActive());
02225 } else {
02226 data->minIndependentSize = 0;
02227 }
02228
02229 data->dac = dac;
02230 data->daf = daf;
02231 data->suppressedDOFc = damgc->suppressedDOF;
02232 data->suppressedDOFf = damgf->suppressedDOF;
02233 data->comm = comm;
02234 data->dof = dof;
02235 data->fineTouchedFlags = new std::vector<ot::FineTouchedStatus>;
02236
02237 data->tmp = NULL;
02238 data->addRtmp = NULL;
02239 data->addPtmp = NULL;
02240
02241 data->sendSzP = NULL;
02242 data->sendOffP = NULL;
02243 data->recvSzP = NULL;
02244 data->recvOffP = NULL;
02245 data->sendSzR = NULL;
02246 data->sendOffR = NULL;
02247 data->recvSzR = NULL;
02248 data->recvOffR = NULL;
02249
02250 iC(MatCreateShell(comm, mc*dof ,mf*dof,PETSC_DETERMINE,PETSC_DETERMINE, (void*)(data), R));
02251 iC(MatShellSetOperation(*R,MATOP_MULT_TRANSPOSE,(void(*)(void))prolongMatVecType1));
02252 iC(MatShellSetOperation(*R,MATOP_MULT,(void(*)(void))restrictMatVecType1));
02253 iC(MatShellSetOperation(*R,MATOP_MULT_ADD,(void(*)(void))addRestrictMatVec));
02254 iC(MatShellSetOperation(*R,MATOP_MULT_TRANSPOSE_ADD,(void(*)(void))addProlongMatVec));
02255 iC(MatShellSetOperation(*R ,MATOP_DESTROY, (void(*)(void)) rpMatDestroy));
02256
02257
02258 data->daf->createVector<ot::FineTouchedStatus >(*(data->fineTouchedFlags), false, false, 1);
02259 dummyRestrictMatVecType1(data);
02260
02261 PROF_MG_CREATE_RP1_END
02262 }
02263
02264 PetscErrorCode createInterpolationType2(DAMG damgc, DAMG damgf, Mat *R)
02265 {
02266 PROF_MG_CREATE_RP2_BEGIN
02267
02268 ot::DA* dac = damgc->da;
02269 ot::DA* daf = damgf->da;
02270 ot::DA* daf_aux = damgf->da_aux;
02271 unsigned int dof = damgc->dof;
02272 MPI_Comm comm = damgc->comm;
02273 unsigned int mc,mf;
02274
02275 mc=dac->getNodeSize();
02276 mf=daf->getNodeSize();
02277 TransferOpData* data = new TransferOpData;
02278
02279 unsigned int localCoarseIndSize = dac->getIndependentSize();
02280
02281 if(dac->iAmActive()) {
02282 par::Mpi_Allreduce<unsigned int>(&localCoarseIndSize, &(data->minIndependentSize), 1,
02283 MPI_MIN, dac->getCommActive());
02284 } else {
02285 data->minIndependentSize = 0;
02286 }
02287
02288 data->dac = dac;
02289 data->daf = daf_aux;
02290 data->suppressedDOFc = damgc->suppressedDOF;
02291 data->suppressedDOFf = damgf->suppressedDOFaux;
02292 data->comm = comm;
02293 data->dof = dof;
02294 data->fineTouchedFlags = new std::vector<ot::FineTouchedStatus>;
02295 daf_aux->createVector(data->tmp, false, false, dof);
02296 data->addRtmp = NULL;
02297 data->addPtmp = NULL;
02298
02299 data->sendSzP = NULL;
02300 data->sendOffP = NULL;
02301 data->recvSzP = NULL;
02302 data->recvOffP = NULL;
02303 data->sendSzR = NULL;
02304 data->sendOffR = NULL;
02305 data->recvSzR = NULL;
02306 data->recvOffR = NULL;
02307
02308 iC(MatCreateShell(comm, mc*dof ,mf*dof,PETSC_DETERMINE,PETSC_DETERMINE, (void*)(data), R));
02309 iC(MatShellSetOperation(*R,MATOP_MULT_TRANSPOSE,(void(*)(void))prolongMatVecType2));
02310 iC(MatShellSetOperation(*R,MATOP_MULT,(void(*)(void))restrictMatVecType2));
02311 iC(MatShellSetOperation(*R,MATOP_MULT_TRANSPOSE_ADD,(void(*)(void))addProlongMatVec));
02312 iC(MatShellSetOperation(*R,MATOP_MULT_ADD,(void(*)(void))addRestrictMatVec));
02313 iC(MatShellSetOperation(*R ,MATOP_DESTROY, (void(*)(void)) rpMatDestroy));
02314
02315
02316
02317
02318
02319 data->daf->createVector<ot::FineTouchedStatus >(*(data->fineTouchedFlags), false, false, 1);
02320 dummyRestrictMatVecType1(data);
02321
02322 PROF_MG_CREATE_RP2_END
02323 }
02324
02325 PetscErrorCode rpMatDestroy(Mat R) {
02326 TransferOpData *data;
02327 PetscFunctionBegin;
02328 iC(MatShellGetContext( R, (void **)&data));
02329 if(data) {
02330 if(data->fineTouchedFlags) {
02331 data->fineTouchedFlags->clear();
02332 delete data->fineTouchedFlags;
02333 data->fineTouchedFlags = NULL;
02334 }
02335 if(data->tmp) {
02336 iC(VecDestroy(data->tmp));
02337 data->tmp = NULL;
02338 }
02339 if(data->addRtmp) {
02340 VecDestroy(data->addRtmp);
02341 data->addRtmp = NULL;
02342 }
02343 if(data->addPtmp) {
02344 VecDestroy(data->addPtmp);
02345 data->addPtmp = NULL;
02346 }
02347 if(data->sendSzP) {
02348 delete [] data->sendSzP;
02349 data->sendSzP = NULL;
02350 }
02351 if(data->sendOffP) {
02352 delete [] data->sendOffP;
02353 data->sendOffP = NULL;
02354 }
02355 if(data->recvSzP) {
02356 delete [] data->recvSzP;
02357 data->recvSzP = NULL;
02358 }
02359 if(data->recvOffP) {
02360 delete [] data->recvOffP;
02361 data->recvOffP = NULL;
02362 }
02363 if(data->sendSzR) {
02364 delete [] data->sendSzR;
02365 data->sendSzR = NULL;
02366 }
02367 if(data->sendOffR) {
02368 delete [] data->sendOffR;
02369 data->sendOffR = NULL;
02370 }
02371 if(data->recvSzR) {
02372 delete [] data->recvSzR;
02373 data->recvSzR = NULL;
02374 }
02375 if(data->recvOffR) {
02376 delete [] data->recvOffR;
02377 data->recvOffR = NULL;
02378 }
02379 delete data;
02380 data = NULL;
02381 }
02382
02383 PetscFunctionReturn(0);
02384 }
02385
02386 void PrintDAMG(DAMG* damg) {
02387 int nlevels = damg[0]->nlevels;
02388 int rank;
02389 MPI_Comm_rank(damg[0]->comm,&rank);
02390 for (int i = 0; i < nlevels; i++) {
02391 MPI_Comm activeComm = damg[i]->da->getCommActive();
02392 MPI_Comm activeAuxComm;
02393
02394 if(damg[i]->da_aux) {
02395 activeAuxComm = damg[i]->da_aux->getCommActive();
02396 }
02397
02398 DendroIntL globalBlocksSize;
02399 DendroIntL globalBlocksSizeAux;
02400 DendroIntL localBlocksSize = damg[i]->da->getNumBlocks();
02401 DendroIntL localBlocksSizeAux;
02402
02403 if(damg[i]->da_aux) {
02404 localBlocksSizeAux = damg[i]->da_aux->getNumBlocks();
02405 }
02406
02407 DendroIntL elemSizeLocal = damg[i]->da->getElementSize();
02408 DendroIntL ghostedElemSizeLocal = damg[i]->da->getGhostedElementSize();
02409 DendroIntL elemSizeLocalAux;
02410 DendroIntL ghostedElemSizeLocalAux;
02411
02412 if(damg[i]->da_aux) {
02413 elemSizeLocalAux = damg[i]->da_aux->getElementSize();
02414 ghostedElemSizeLocalAux = damg[i]->da_aux->getGhostedElementSize();
02415 }
02416
02417 DendroIntL elemSizeGlobal;
02418 DendroIntL minElemSize;
02419 DendroIntL maxElemSize;
02420 DendroIntL minGhostedElemSize;
02421 DendroIntL maxGhostedElemSize;
02422
02423 DendroIntL elemSizeGlobalAux;
02424 DendroIntL minElemSizeAux;
02425 DendroIntL maxElemSizeAux;
02426 DendroIntL minGhostedElemSizeAux;
02427 DendroIntL maxGhostedElemSizeAux;
02428
02429 if(damg[i]->da->iAmActive()) {
02430 par::Mpi_Reduce<DendroIntL>(&localBlocksSize, &globalBlocksSize, 1,
02431 MPI_SUM, 0, activeComm);
02432
02433 par::Mpi_Reduce<DendroIntL>(&elemSizeLocal, &elemSizeGlobal, 1,
02434 MPI_SUM, 0, activeComm);
02435
02436 par::Mpi_Reduce<DendroIntL>(&elemSizeLocal, &minElemSize, 1,
02437 MPI_MIN, 0, activeComm);
02438
02439 par::Mpi_Reduce<DendroIntL>(&elemSizeLocal, &maxElemSize, 1,
02440 MPI_MAX, 0, activeComm);
02441
02442 par::Mpi_Reduce<DendroIntL>(&ghostedElemSizeLocal, &minGhostedElemSize, 1,
02443 MPI_MIN, 0, activeComm);
02444
02445 par::Mpi_Reduce<DendroIntL>(&ghostedElemSizeLocal, &maxGhostedElemSize, 1,
02446 MPI_MAX, 0, activeComm);
02447 }
02448
02449 if(damg[i]->da_aux) {
02450 if(damg[i]->da_aux->iAmActive()) {
02451 par::Mpi_Reduce<DendroIntL>(&localBlocksSizeAux, &globalBlocksSizeAux, 1,
02452 MPI_SUM, 0, activeAuxComm);
02453
02454 par::Mpi_Reduce<DendroIntL>(&elemSizeLocalAux, &elemSizeGlobalAux, 1,
02455 MPI_SUM, 0, activeAuxComm);
02456
02457 par::Mpi_Reduce<DendroIntL>(&elemSizeLocalAux, &minElemSizeAux, 1,
02458 MPI_MIN, 0, activeAuxComm);
02459
02460 par::Mpi_Reduce<DendroIntL>(&elemSizeLocalAux, &maxElemSizeAux, 1,
02461 MPI_MAX, 0, activeAuxComm);
02462
02463 par::Mpi_Reduce<DendroIntL>(&ghostedElemSizeLocalAux, &minGhostedElemSizeAux, 1,
02464 MPI_MIN, 0, activeAuxComm);
02465
02466 par::Mpi_Reduce<DendroIntL>(&ghostedElemSizeLocalAux, &maxGhostedElemSizeAux, 1,
02467 MPI_MAX, 0, activeAuxComm);
02468 }
02469 }
02470
02471 DendroIntL nodeSizeLocal = damg[i]->da->getNodeSize();
02472 DendroIntL nodeSizeLocalAux;
02473
02474 if(damg[i]->da_aux) {
02475 nodeSizeLocalAux = damg[i]->da_aux->getNodeSize();
02476 }
02477
02478 DendroIntL nodeSizeGlobal;
02479 DendroIntL minNodeSize;
02480 DendroIntL maxNodeSize;
02481
02482 if(damg[i]->da->iAmActive()) {
02483 par::Mpi_Reduce<DendroIntL>(&nodeSizeLocal, &nodeSizeGlobal, 1,
02484 MPI_SUM, 0, activeComm);
02485
02486 par::Mpi_Reduce<DendroIntL>(&nodeSizeLocal, &minNodeSize, 1,
02487 MPI_MIN, 0, activeComm);
02488
02489 par::Mpi_Reduce<DendroIntL>(&nodeSizeLocal, &maxNodeSize, 1,
02490 MPI_MAX, 0, activeComm);
02491 }
02492
02493 DendroIntL nodeSizeGlobalAux;
02494 DendroIntL minNodeSizeAux;
02495 DendroIntL maxNodeSizeAux;
02496
02497 if(damg[i]->da_aux) {
02498 if(damg[i]->da_aux->iAmActive()) {
02499 par::Mpi_Reduce<DendroIntL>(&nodeSizeLocalAux, &nodeSizeGlobalAux, 1,
02500 MPI_SUM, 0, activeAuxComm);
02501
02502 par::Mpi_Reduce<DendroIntL>(&nodeSizeLocalAux, &minNodeSizeAux, 1,
02503 MPI_MIN, 0, activeAuxComm);
02504
02505 par::Mpi_Reduce<DendroIntL>(&nodeSizeLocalAux, &maxNodeSizeAux, 1,
02506 MPI_MAX, 0, activeAuxComm);
02507 }
02508 }
02509
02510 unsigned int globalMinLev = getGlobalMinLevel(damg[i]->da);
02511 unsigned int globalMaxLev = getGlobalMaxLevel(damg[i]->da);
02512
02513 unsigned int globalMinLevAux;
02514 if(damg[i]->da_aux) {
02515 globalMinLevAux = getGlobalMinLevel(damg[i]->da_aux);
02516 }
02517 unsigned int globalMaxLevAux;
02518 if(damg[i]->da_aux) {
02519 globalMaxLevAux = getGlobalMaxLevel(damg[i]->da_aux);
02520 }
02521
02522 int activeNpes = damg[i]->da->getNpesActive();
02523 int activeNpesAux;
02524 if(damg[i]->da_aux) {
02525 activeNpesAux = damg[i]->da_aux->getNpesActive();
02526 }
02527 if(!rank) {
02528 std::cout<<"At Level "<<i<<" Elem size: "<<elemSizeGlobal
02529 <<", GlobalBlocksSize: "<<globalBlocksSize
02530 <<", node size: "<<nodeSizeGlobal
02531 <<", minElemSize: "<<minElemSize
02532 <<", maxElemSize: "<<maxElemSize
02533 <<", minGhostedElemSize: "<<minGhostedElemSize
02534 <<", maxGhostedElemSize: "<<maxGhostedElemSize
02535 <<", minNodeSize: "<<minNodeSize
02536 <<", maxNodeSize: "<<maxNodeSize
02537 <<", min Lev: "<<globalMinLev
02538 <<", max Lev: "<<globalMaxLev
02539 <<", Active Npes: "<<activeNpes<<std::endl;
02540 fflush(stdout);
02541 if(damg[i]->da_aux) {
02542 std::cout<<"At AUX Level "<<i<<" Elem size: "<<elemSizeGlobalAux
02543 <<", GlobalBlocksSize: "<<globalBlocksSizeAux
02544 <<", node size: "<<nodeSizeGlobalAux
02545 <<", minElemSize: "<<minElemSizeAux
02546 <<", maxElemSize: "<<maxElemSizeAux
02547 <<", minGhostedElemSize: "<<minGhostedElemSizeAux
02548 <<", maxGhostedElemSize: "<<maxGhostedElemSizeAux
02549 <<", minNodeSize: "<<minNodeSizeAux
02550 <<", maxNodeSize: "<<maxNodeSizeAux
02551 <<", min Lev: "<<globalMinLevAux
02552 <<", max Lev: "<<globalMaxLevAux
02553 <<", Active Npes: "<<activeNpesAux<<std::endl;
02554 fflush(stdout);
02555 }
02556 }
02557 }
02558 }
02559
02560
02561
02562 PetscErrorCode PC_KSP_Shell_SetUp(void* ctx) {
02563
02564 PROF_PC_KSP_SHELL_SETUP_BEGIN
02565
02566 PC_KSP_Shell* data = static_cast<PC_KSP_Shell*>(ctx);
02567
02568 MPI_Comm commActive = data->commActive;
02569 bool iAmActive = data->iAmActive;
02570
02571
02572 PC pc = data->pc;
02573
02574 Mat Amat;
02575 PCGetOperators(pc, &Amat, NULL, NULL);
02576 PetscTruth isshell;
02577 PetscTypeCompare((PetscObject)Amat, MATSHELL, &isshell);
02578
02579 if(!isshell) {
02580 SETERRQ(PETSC_ERR_SUP, " Expected a MATSHELL.");
02581 }
02582
02583
02584 if(iAmActive) {
02585 Mat Amat_private;
02586 Mat Pmat_private;
02587 MatStructure pFlag;
02588
02589 if(getPrivateMatricesForKSP_Shell) {
02590 (*getPrivateMatricesForKSP_Shell)(Amat, &Amat_private,
02591 &Pmat_private, &pFlag);
02592 } else {
02593 SETERRQ(PETSC_ERR_USER,
02594 " Expected function to be set:\
02595 getPrivateMatricesForKSP_Shell");
02596 }
02597
02598
02599 assert(Amat_private != NULL);
02600
02601 PetscInt globalRowSize;
02602 PetscInt globalColSize;
02603 PetscInt localRowSize;
02604 PetscInt localColSize;
02605
02606 PetscInt globalRowSizePrivate;
02607 PetscInt globalColSizePrivate;
02608 PetscInt localRowSizePrivate;
02609 PetscInt localColSizePrivate;
02610
02611 MatGetSize(Amat, &globalRowSize, &globalColSize);
02612 MatGetSize(Amat_private, &globalRowSizePrivate, &globalColSizePrivate);
02613
02614 MatGetLocalSize(Amat, &localRowSize, &localColSize);
02615 MatGetLocalSize(Amat_private, &localRowSizePrivate, &localColSizePrivate);
02616
02617 assert(globalRowSize == globalRowSizePrivate);
02618 assert(globalColSize == globalColSizePrivate);
02619
02620 assert(localRowSize == localRowSizePrivate);
02621 assert(localColSize == localColSizePrivate);
02622
02623 MPI_Comm privateComm;
02624 PetscObjectGetComm((PetscObject)Amat_private, &privateComm);
02625
02626 int commCompareResult;
02627
02628
02629
02630
02631 MPI_Comm_compare(privateComm, commActive, &commCompareResult);
02632
02633 assert( (commCompareResult == MPI_CONGRUENT) || (commCompareResult == MPI_IDENT));
02634
02635 if(pc->setupcalled == 0) {
02636 assert(data->ksp_private == NULL);
02637 assert(data->rhs_private == NULL);
02638 assert(data->sol_private == NULL);
02639 } else {
02640 assert(data->ksp_private != NULL);
02641 assert(data->rhs_private != NULL);
02642 assert(data->sol_private != NULL);
02643 }
02644
02645 if(pc->setupcalled == 0) {
02646 KSPCreate(commActive, &(data->ksp_private));
02647
02648 const char *prefix;
02649 PCGetOptionsPrefix(pc, &prefix);
02650
02651
02652 KSPSetOptionsPrefix(data->ksp_private, prefix);
02653 KSPAppendOptionsPrefix(data->ksp_private, "private_");
02654
02655
02656 KSPSetType(data->ksp_private, KSPPREONLY);
02657
02658 PC privatePC;
02659 KSPGetPC(data->ksp_private, &privatePC);
02660 PCSetType(privatePC, PCLU);
02661
02662
02663
02664 KSPSetFromOptions(data->ksp_private);
02665
02666 MatGetVecs(Amat_private, &(data->sol_private), &(data->rhs_private));
02667 }
02668
02669 KSPSetOperators(data->ksp_private, Amat_private, Pmat_private, pFlag);
02670
02671 } else {
02672 data->sol_private = NULL;
02673 data->rhs_private = NULL;
02674 data->ksp_private = NULL;
02675 }
02676
02677 PROF_PC_KSP_SHELL_SETUP_END
02678
02679 }
02680
02681 PetscErrorCode PC_KSP_Shell_Destroy(void* ctx) {
02682
02683 PROF_PC_KSP_SHELL_DESTROY_BEGIN
02684
02685 PC_KSP_Shell* data = static_cast<PC_KSP_Shell*>(ctx);
02686
02687 if(data) {
02688 if(data->ksp_private) {
02689 KSPDestroy(data->ksp_private);
02690 data->ksp_private = NULL;
02691 }
02692
02693 if(data->rhs_private) {
02694 VecDestroy(data->rhs_private);
02695 data->rhs_private = NULL;
02696 }
02697
02698 if(data->sol_private) {
02699 VecDestroy(data->sol_private);
02700 data->sol_private = NULL;
02701 }
02702
02703 delete data;
02704 data = NULL;
02705 }
02706
02707 PROF_PC_KSP_SHELL_DESTROY_END
02708 }
02709
02710 PetscErrorCode PC_KSP_Shell_Apply(void* ctx, Vec rhs, Vec sol) {
02711
02712 PROF_PC_KSP_SHELL_APPLY_BEGIN
02713
02714 PC_KSP_Shell* data = static_cast<PC_KSP_Shell*>(ctx);
02715
02716 if(data->iAmActive) {
02717 PetscScalar* rhsArray;
02718 PetscScalar* solArray;
02719
02720
02721
02722 VecGetArray(rhs, &rhsArray);
02723 VecGetArray(sol, &solArray);
02724
02725 VecPlaceArray(data->rhs_private, rhsArray);
02726 VecPlaceArray(data->sol_private, solArray);
02727
02728 KSPSolve(data->ksp_private, data->rhs_private, data->sol_private);
02729
02730 VecResetArray(data->rhs_private);
02731 VecResetArray(data->sol_private);
02732
02733 VecRestoreArray(rhs, &rhsArray);
02734 VecRestoreArray(sol, &solArray);
02735 }
02736
02737 PROF_PC_KSP_SHELL_APPLY_END
02738 }
02739
02740 int scatterValues(Vec in, Vec out, PetscInt inSz, PetscInt outSz,
02741 int *& sendSz, int *& sendOff, int *& recvSz, int *& recvOff, MPI_Comm comm ) {
02742
02743 PROF_MG_SCATTER_BEGIN
02744
02745 bool isNew = (sendSz == NULL);
02746
02747 if(isNew) {
02748 int rank, npes;
02749
02750 MPI_Comm_size(comm, &npes);
02751 MPI_Comm_rank(comm, &rank);
02752
02753 MPI_Request request;
02754 MPI_Status status;
02755
02756 PetscInt inSize, outSize;
02757
02758 VecGetLocalSize(in, &inSize);
02759 VecGetLocalSize(out, &outSize);
02760
02761 assert(inSize == inSz);
02762 assert(outSize == outSz);
02763
02764 PetscInt off1 = 0, off2 = 0;
02765 PetscInt* scnIn = NULL;
02766 if(inSz) {
02767 scnIn = new PetscInt [inSz];
02768 }
02769
02770
02771 PetscInt zero=0;
02772 if(inSz) {
02773 scnIn[0] = 1;
02774 for (PetscInt i = 1; i < inSz; i++) {
02775 scnIn[i] = scnIn[i-1] + 1;
02776 }
02777
02778 par::Mpi_Scan<PetscInt>(scnIn+inSz-1, &off1, 1, MPI_SUM, comm );
02779 } else{
02780 par::Mpi_Scan<PetscInt>(&zero, &off1, 1, MPI_SUM, comm );
02781 }
02782
02783
02784 if (rank < (npes-1)){
02785 par::Mpi_Issend<PetscInt>( &off1, 1, rank+1, 0, comm, &request );
02786 }
02787 if (rank){
02788 par::Mpi_Recv<PetscInt>( &off2, 1, rank-1, 0, comm, &status );
02789 } else{
02790 off2 = 0;
02791 }
02792
02793
02794 for (PetscInt i = 0; i < inSz; i++) {
02795 scnIn[i] = scnIn[i] + off2;
02796 }
02797
02798
02799 PetscInt* outCnts;
02800 outCnts = new PetscInt[npes];
02801
02802 if(rank < (npes-1)) {
02803 MPI_Status statusWait;
02804 MPI_Wait(&request, &statusWait);
02805 }
02806
02807 if( outSz ) {
02808 par::Mpi_Scan<PetscInt>( &outSz, &off1, 1, MPI_SUM, comm );
02809 }else {
02810 par::Mpi_Scan<PetscInt>( &zero, &off1, 1, MPI_SUM, comm );
02811 }
02812
02813 par::Mpi_Allgather<PetscInt>( &off1, outCnts, 1, comm);
02814
02815 #ifdef __DEBUG_MG__
02816 assert(sendSz == NULL);
02817 assert(recvSz == NULL);
02818 assert(sendOff == NULL);
02819 assert(recvOff == NULL);
02820 #endif
02821
02822 sendSz = new int [npes];
02823 recvSz = new int [npes];
02824 sendOff = new int [npes];
02825 recvOff = new int [npes];
02826
02827
02828
02829 for (int i = 0; i < npes; i++) {
02830 sendSz[i] = 0;
02831 }
02832
02833
02834
02835 PetscInt inCnt = 0;
02836 int pCnt = 0;
02837 while( (inCnt < inSz) && (pCnt < npes) ) {
02838 if( scnIn[inCnt] <= outCnts[pCnt] ) {
02839 sendSz[pCnt]++;
02840 inCnt++;
02841 }else {
02842 pCnt++;
02843 }
02844 }
02845
02846
02847
02848 par::Mpi_Alltoall<int>(sendSz, recvSz, 1, comm);
02849
02850 PetscInt nn=0;
02851 for (int i = 0; i < npes; i++) {
02852 nn += recvSz[i];
02853 }
02854
02855
02856 sendOff[0] = 0;
02857 recvOff[0] = 0;
02858 for (int i=1; i<npes; i++) {
02859 sendOff[i] = sendOff[i-1] + sendSz[i-1];
02860 recvOff[i] = recvOff[i-1] + recvSz[i-1];
02861 }
02862
02863 assert(nn == outSz);
02864
02865 if(scnIn) {
02866 delete [] scnIn;
02867 }
02868 delete [] outCnts;
02869 }
02870
02871
02872 PetscScalar * inArr;
02873 PetscScalar * outArr;
02874
02875 VecGetArray(in,&inArr);
02876 VecGetArray(out,&outArr);
02877
02878 par::Mpi_Alltoallv_sparse<PetscScalar>(inArr, sendSz, sendOff,
02879 outArr, recvSz, recvOff, comm);
02880
02881 VecRestoreArray(in,&inArr);
02882 VecRestoreArray(out,&outArr);
02883
02884 PROF_MG_SCATTER_END
02885 }
02886
02887 }
02888
02889
02890