Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

omg.C

Go to the documentation of this file.
00001 
00007 #include "dtypes.h"
00008 #include "petscpc.h"
00009 #include "petscmg.h"
00010 #include "petscmat.h"
00011 #include "private/pcimpl.h"
00012 #include "omg.h"
00013 #include "oda.h"
00014 #include "odaUtils.h" 
00015 #include "parUtils.h"
00016 #include <iostream>
00017 #include "dendro.h"
00018 
00019 #ifndef iC
00020 #define iC(fun) {CHKERRQ(fun);}
00021 #endif
00022 
00023 #ifdef __DEBUG__
00024 #ifndef __DEBUG_MG__
00025 #define __DEBUG_MG__
00026 #endif
00027 #endif
00028 
00029 namespace ot {
00030 
00031   extern double ***** RmatType1Stencil;
00032   extern double **** RmatType2Stencil;
00033   extern unsigned short**** VtxMap1; 
00034   extern unsigned short***** VtxMap2; 
00035   extern unsigned short***** VtxMap3; 
00036   extern unsigned short****** VtxMap4; 
00037 
00038   extern void (*getPrivateMatricesForKSP_Shell)(Mat mat,
00039       Mat *AmatPrivate, Mat *PmatPrivate, MatStructure* pFlag);
00040 
00041   //Public Functions
00042 
00043   int DAMGCreateSuppressedDOFs(DAMG* damg) {
00044     int       nlevels = damg[0]->nlevels; //number of multigrid levels
00045     unsigned int dof = damg[0]->dof;
00046     for(int i = 0; i < nlevels; i++) {
00047       unsigned int sz = (dof*(damg[i]->da->getLocalBufferSize()));
00048       if(sz) {
00049         damg[i]->suppressedDOF = new unsigned char[sz];
00050       }
00051 
00052       if(damg[i]->da_aux) {
00053         unsigned int sz2 = (dof*(damg[i]->da_aux->getLocalBufferSize()));
00054         if(sz2) {
00055           damg[i]->suppressedDOFaux = new unsigned char[sz2];
00056         }
00057       }
00058     }
00059     return 1;
00060   }
00061 
00062   PetscErrorCode DAMG_Initialize(MPI_Comm comm) {
00063     PROF_MG_INIT_BEGIN 
00064 
00065       ot::DA_Initialize(comm);
00066 
00067 #ifdef __USE_MG_INIT_TYPE3__
00068     ot::DAMG_InitPrivateType3(comm);
00069 #else
00070 #ifdef __USE_MG_INIT_TYPE2__
00071     ot::DAMG_InitPrivateType2(comm);
00072 #else
00073     ot::DAMG_InitPrivateType1(comm);
00074 #endif
00075 #endif
00076 
00077     PROF_MG_INIT_END  
00078   }
00079 
00080   void DAMG_InitPrivateType3(MPI_Comm comm) {
00081 
00082     int rank;
00083     MPI_Comm_rank(comm, &rank);
00084 
00085     IreadRmatType1Stencil(RmatType1Stencil, rank);
00086     IreadRmatType2Stencil(RmatType2Stencil, rank);
00087     IreadVtxMaps(VtxMap1, VtxMap2, VtxMap3, VtxMap4, rank);
00088 
00089   }
00090 
00091   void DAMG_InitPrivateType2(MPI_Comm comm) {
00092 
00093     int rank, npes;
00094     MPI_Comm_rank(comm, &rank);
00095     MPI_Comm_size(comm, &npes);
00096 
00097     const int THOUSAND = 1000;
00098     int numGroups = (npes/THOUSAND);
00099     if( (numGroups*THOUSAND) < npes) {
00100       numGroups++;
00101     }
00102 
00103     MPI_Comm newComm;
00104 
00105     bool* isEmptyList = new bool[npes];
00106     for(int i = 0; i < numGroups; i++) {
00107       for(int j = 0; (j < (i*THOUSAND)) && (j < npes); j++) {
00108         isEmptyList[j] = true;
00109       }
00110       for(int j = (i*THOUSAND); (j < ((i+1)*THOUSAND)) && (j < npes); j++) {
00111         isEmptyList[j] = false;
00112       }
00113       for(int j = ((i + 1)*THOUSAND); j < npes; j++) {
00114         isEmptyList[j] = true;
00115       }
00116       MPI_Comm tmpComm;
00117       par::splitComm2way(isEmptyList, &tmpComm, comm);
00118       if(!(isEmptyList[rank])) {
00119         newComm = tmpComm;
00120       }
00121     }//end for i
00122     delete [] isEmptyList;
00123 
00124     if( (rank % THOUSAND) == 0) {
00125       IreadRmatType1Stencil(RmatType1Stencil, (rank/THOUSAND));
00126       IreadRmatType2Stencil(RmatType2Stencil, (rank/THOUSAND));
00127       IreadVtxMaps(VtxMap1, VtxMap2, VtxMap3, VtxMap4, (rank/THOUSAND));
00128     } else {
00129       //Other processors simply allocate the required amount of memory
00130       typedef double**** double4Ptr;
00131       typedef double*** double3Ptr;
00132       typedef double** double2Ptr;
00133       typedef double* doublePtr;
00134 
00135       RmatType1Stencil = new double4Ptr[8];
00136       for(int i = 0; i < 8; i++) {
00137         RmatType1Stencil[i] = new double3Ptr[8];
00138         for(int j = 0; j < 8; j++) {
00139           RmatType1Stencil[i][j] = new double2Ptr[18];
00140           for(int k = 0; k < 18; k++) {
00141             RmatType1Stencil[i][j][k] = new doublePtr[8];
00142             for(int l = 0; l < 8; l++) {
00143               RmatType1Stencil[i][j][k][l] = new double[8];
00144             }//end for l
00145           }//end for k
00146         }//end for j
00147       }//end for i
00148 
00149       RmatType2Stencil  = new double3Ptr[8];
00150       for(int j = 0; j < 8; j++) {
00151         RmatType2Stencil[j] = new double2Ptr[18];
00152         for(int k = 0; k < 18; k++) {
00153           RmatType2Stencil[j][k] = new doublePtr[8];
00154           for(int l = 0; l < 8; l++) {
00155             RmatType2Stencil[j][k][l] = new double[8];
00156           }//end for l
00157         }//end for k
00158       }//end for j
00159 
00160       typedef unsigned short***** us5Ptr;
00161       typedef unsigned short**** us4Ptr;
00162       typedef unsigned short*** us3Ptr;
00163       typedef unsigned short** us2Ptr;
00164       typedef unsigned short* usPtr;
00165 
00166       VtxMap1 = new us3Ptr[8];
00167       VtxMap2 = new us4Ptr[8];
00168       VtxMap3 = new us4Ptr[7];
00169       VtxMap4 = new us5Ptr[7];
00170       for(int i = 0; i < 8; i++) {
00171         VtxMap1[i] = new us2Ptr[8];
00172         VtxMap2[i] = new us3Ptr[8];
00173         for(int j = 0; j < 8; j++) {
00174           VtxMap1[i][j] = new usPtr[18];
00175           VtxMap2[i][j] = new us2Ptr[8];
00176           for(int k = 0; k < 18; k++) {
00177             VtxMap1[i][j][k] = new unsigned short[8];
00178           }
00179           for(int k = 0; k < 8; k++) {
00180             VtxMap2[i][j][k] = new usPtr[18];
00181             for(int l = 0; l < 18; l++) {
00182               VtxMap2[i][j][k][l] = new unsigned short[8];
00183             }//end for l
00184           }//end for k
00185         }//end for j
00186       }//end for i
00187 
00188       for(int i = 0; i < 7; i++) {
00189         VtxMap3[i] = new us3Ptr[2];
00190         VtxMap4[i] = new us4Ptr[2];
00191         for(int j = 0; j < 2; j++) {
00192           VtxMap3[i][j] = new us2Ptr[8];
00193           VtxMap4[i][j] = new us3Ptr[8]; 
00194           for(int k = 0; k < 8; k++) {
00195             VtxMap3[i][j][k] = new usPtr[18];
00196             VtxMap4[i][j][k] = new us2Ptr[8];
00197             for(int l = 0; l < 18; l++) {
00198               VtxMap3[i][j][k][l] =new unsigned short[8];
00199             }
00200             for(int l = 0; l < 8; l++) {
00201               VtxMap4[i][j][k][l] = new usPtr[18];
00202               for(int m = 0; m < 18; m++) {
00203                 VtxMap4[i][j][k][l][m] = new unsigned short[8];
00204               }
00205             }//end for l
00206           }//end for k
00207         }//end for j
00208       }//end for i
00209 
00210     }//end if processor reads
00211 
00212     //Processor 0 in each comm  Broadcasts to other processors in the comm
00213     //doubles ...
00214     //RmatType1[8][8][18][8][8]: 73728
00215     //RmatType2[8][18][8][8]: 9216
00216     double * tmpRmats = new double [82944];
00217 
00218     if((rank % THOUSAND) == 0) {
00219       unsigned int ctr = 0;
00220       for(int i = 0; i < 8; i++) {
00221         for(int j = 0; j < 8; j++) {
00222           for(int k = 0; k < 18; k++) {
00223             for(int l = 0; l < 8; l++) {
00224               for(int m = 0; m < 8; m++) {
00225                 tmpRmats[ctr] = RmatType1Stencil[i][j][k][l][m];
00226                 ctr++;
00227               }//end for m
00228             }//end for l
00229           }//end for k
00230         }//end for j
00231       }//end for i
00232       for(int j = 0; j < 8; j++) {
00233         for(int k = 0; k < 18; k++) {
00234           for(int l = 0; l < 8; l++) {
00235             for(int m = 0; m < 8; m++) {
00236               tmpRmats[ctr] = RmatType2Stencil[j][k][l][m];
00237               ctr++;
00238             }//end for m
00239           }//end for l
00240         }//end for k
00241       }//end for j
00242     }
00243 
00244     par::Mpi_Bcast<double>(tmpRmats, 82944, 0, newComm);
00245 
00246     if((rank % THOUSAND) != 0) {
00247       unsigned int ctr = 0;
00248       for(int i = 0; i < 8; i++) {
00249         for(int j = 0; j < 8; j++) {
00250           for(int k = 0; k < 18; k++) {
00251             for(int l = 0; l < 8; l++) {
00252               for(int m = 0; m < 8; m++) {
00253                 RmatType1Stencil[i][j][k][l][m] = tmpRmats[ctr];
00254                 ctr++;
00255               }//end for m
00256             }//end for l
00257           }//end for k
00258         }//end for j
00259       }//end for i
00260       for(int j = 0; j < 8; j++) {
00261         for(int k = 0; k < 18; k++) {
00262           for(int l = 0; l < 8; l++) {
00263             for(int m = 0; m < 8; m++) {
00264               RmatType2Stencil[j][k][l][m] = tmpRmats[ctr];
00265               ctr++;
00266             }//end for m
00267           }//end for l
00268         }//end for k
00269       }//end for j
00270     }
00271 
00272     delete [] tmpRmats;
00273 
00274     //unsigned shorts...
00275     //map1[8][8][18][8]: 9216
00276     //map2[8][8][8][18][8]: 73728
00277     //map3[7][2][8][18][8]: 16128
00278     //map4[7][2][8][8][18][8]: 129024
00279     unsigned short * tmpVtxMaps = new unsigned short[228096];
00280 
00281     if((rank % THOUSAND) == 0) {
00282       unsigned int ctr = 0;
00283       for(int i = 0; i < 8; i++) {
00284         for(int j = 0; j < 8; j++) {
00285           for(int k = 0; k < 18; k++) {
00286             for(int l = 0; l < 8; l++) {
00287               tmpVtxMaps[ctr] = VtxMap1[i][j][k][l]; 
00288               ctr++;
00289             }//end for l
00290           }//end for k
00291         }//end for j
00292       }//end for i
00293       for(int i = 0; i < 8; i++) {
00294         for(int j = 0; j < 8; j++) {
00295           for(int k = 0; k < 8; k++) {
00296             for(int l = 0; l < 18; l++) {
00297               for(int m = 0; m < 8; m++) {
00298                 tmpVtxMaps[ctr] = VtxMap2[i][j][k][l][m]; 
00299                 ctr++;
00300               }//end for m
00301             }//end for l
00302           }//end for k
00303         }//end for j
00304       }//end for i
00305       for(int i = 0; i < 7; i++) {
00306         for(int j = 0; j < 2; j++) {
00307           for(int k = 0; k < 8; k++) {
00308             for(int l = 0; l < 18; l++) {
00309               for(int m = 0; m < 8; m++) {
00310                 tmpVtxMaps[ctr] = VtxMap3[i][j][k][l][m]; 
00311                 ctr++;
00312               }//end for m
00313             }//end for l
00314           }//end for k
00315         }//end for j
00316       }//end for i
00317       for(int i = 0; i < 7; i++) {
00318         for(int j = 0; j < 2; j++) {
00319           for(int k = 0; k < 8; k++) {
00320             for(int l = 0; l < 8; l++) {
00321               for(int m = 0; m < 18; m++) {
00322                 for(int n = 0; n < 8; n++) {
00323                   tmpVtxMaps[ctr] = VtxMap4[i][j][k][l][m][n]; 
00324                   ctr++;
00325                 }//end for n
00326               }//end for m
00327             }//end for l
00328           }//end for k
00329         }//end for j
00330       }//end for i
00331     }
00332 
00333     par::Mpi_Bcast<unsigned short>(tmpVtxMaps, 228096, 0, newComm);
00334 
00335     if((rank % THOUSAND) != 0) {
00336       unsigned int ctr = 0;
00337       for(int i = 0; i < 8; i++) {
00338         for(int j = 0; j < 8; j++) {
00339           for(int k = 0; k < 18; k++) {
00340             for(int l = 0; l < 8; l++) {
00341               VtxMap1[i][j][k][l] = tmpVtxMaps[ctr]; 
00342               ctr++;
00343             }//end for l
00344           }//end for k
00345         }//end for j
00346       }//end for i
00347       for(int i = 0; i < 8; i++) {
00348         for(int j = 0; j < 8; j++) {
00349           for(int k = 0; k < 8; k++) {
00350             for(int l = 0; l < 18; l++) {
00351               for(int m = 0; m < 8; m++) {
00352                 VtxMap2[i][j][k][l][m] = tmpVtxMaps[ctr]; 
00353                 ctr++;
00354               }//end for m
00355             }//end for l
00356           }//end for k
00357         }//end for j
00358       }//end for i
00359       for(int i = 0; i < 7; i++) {
00360         for(int j = 0; j < 2; j++) {
00361           for(int k = 0; k < 8; k++) {
00362             for(int l = 0; l < 18; l++) {
00363               for(int m = 0; m < 8; m++) {
00364                 VtxMap3[i][j][k][l][m] = tmpVtxMaps[ctr]; 
00365                 ctr++;
00366               }//end for m
00367             }//end for l
00368           }//end for k
00369         }//end for j
00370       }//end for i
00371       for(int i = 0; i < 7; i++) {
00372         for(int j = 0; j < 2; j++) {
00373           for(int k = 0; k < 8; k++) {
00374             for(int l = 0; l < 8; l++) {
00375               for(int m = 0; m < 18; m++) {
00376                 for(int n = 0; n < 8; n++) {
00377                   VtxMap4[i][j][k][l][m][n] = tmpVtxMaps[ctr]; 
00378                   ctr++;
00379                 }//end for n
00380               }//end for m
00381             }//end for l
00382           }//end for k
00383         }//end for j
00384       }//end for i
00385     }
00386 
00387     delete [] tmpVtxMaps;
00388 
00389   }
00390 
00391   void DAMG_InitPrivateType1(MPI_Comm comm) {
00392 
00393     int rank;
00394     MPI_Comm_rank(comm, &rank);
00395 
00396     if(!rank) {
00397       //Processor 0 reads the stencils
00398       readRmatType1Stencil(RmatType1Stencil);
00399       readRmatType2Stencil(RmatType2Stencil);
00400       readVtxMaps(VtxMap1, VtxMap2, VtxMap3, VtxMap4);
00401     } else {
00402       //Other processors simply allocate the required amount of memory
00403       typedef double**** double4Ptr;
00404       typedef double*** double3Ptr;
00405       typedef double** double2Ptr;
00406       typedef double* doublePtr;
00407 
00408       RmatType1Stencil = new double4Ptr[8];
00409       for(int i = 0; i < 8; i++) {
00410         RmatType1Stencil[i] = new double3Ptr[8];
00411         for(int j = 0; j < 8; j++) {
00412           RmatType1Stencil[i][j] = new double2Ptr[18];
00413           for(int k = 0; k < 18; k++) {
00414             RmatType1Stencil[i][j][k] = new doublePtr[8];
00415             for(int l = 0; l < 8; l++) {
00416               RmatType1Stencil[i][j][k][l] = new double[8];
00417             }
00418           }
00419         }
00420       }
00421 
00422       RmatType2Stencil  = new double3Ptr[8];
00423       for(int j = 0; j < 8; j++) {
00424         RmatType2Stencil[j] = new double2Ptr[18];
00425         for(int k = 0; k < 18; k++) {
00426           RmatType2Stencil[j][k] = new doublePtr[8];
00427           for(int l = 0; l < 8; l++) {
00428             RmatType2Stencil[j][k][l] = new double[8];
00429           }
00430         }
00431       }
00432 
00433       typedef unsigned short***** us5Ptr;
00434       typedef unsigned short**** us4Ptr;
00435       typedef unsigned short*** us3Ptr;
00436       typedef unsigned short** us2Ptr;
00437       typedef unsigned short* usPtr;
00438 
00439       VtxMap1 = new us3Ptr[8];
00440       VtxMap2 = new us4Ptr[8];
00441       VtxMap3 = new us4Ptr[7];
00442       VtxMap4 = new us5Ptr[7];
00443       for(int i = 0; i < 8; i++) {
00444         VtxMap1[i] = new us2Ptr[8];
00445         VtxMap2[i] = new us3Ptr[8];
00446         for(int j = 0; j < 8; j++) {
00447           VtxMap1[i][j] = new usPtr[18];
00448           VtxMap2[i][j] = new us2Ptr[8];
00449           for(int k = 0; k < 18; k++) {
00450             VtxMap1[i][j][k] = new unsigned short[8];
00451           }
00452           for(int k = 0; k < 8; k++) {
00453             VtxMap2[i][j][k] = new usPtr[18];
00454             for(int l = 0; l < 18; l++) {
00455               VtxMap2[i][j][k][l] = new unsigned short[8];
00456             }
00457           }
00458         }
00459       }//end for i
00460 
00461       for(int i = 0; i < 7; i++) {
00462         VtxMap3[i] = new us3Ptr[2];
00463         VtxMap4[i] = new us4Ptr[2];
00464         for(int j = 0; j < 2; j++) {
00465           VtxMap3[i][j] = new us2Ptr[8];
00466           VtxMap4[i][j] = new us3Ptr[8]; 
00467           for(int k = 0; k < 8; k++) {
00468             VtxMap3[i][j][k] = new usPtr[18];
00469             VtxMap4[i][j][k] = new us2Ptr[8];
00470             for(int l = 0; l < 18; l++) {
00471               VtxMap3[i][j][k][l] =new unsigned short[8];
00472             }
00473             for(int l = 0; l < 8; l++) {
00474               VtxMap4[i][j][k][l] = new usPtr[18];
00475               for(int m = 0; m < 18; m++) {
00476                 VtxMap4[i][j][k][l][m] = new unsigned short[8];
00477               }
00478             }
00479           }
00480         }
00481       }//end for i
00482 
00483     }//end if p0
00484 
00485     //Processor 0 Broadcasts to other processors
00486     //doubles ...
00487     //RmatType1[8][8][18][8][8]: 73728
00488     //RmatType2[8][18][8][8]: 9216
00489     double * tmpRmats = new double [82944];
00490 
00491     if(!rank) {
00492       unsigned int ctr = 0;
00493       for(int i = 0; i < 8; i++) {
00494         for(int j = 0; j < 8; j++) {
00495           for(int k = 0; k < 18; k++) {
00496             for(int l = 0; l < 8; l++) {
00497               for(int m = 0; m < 8; m++) {
00498                 tmpRmats[ctr] = RmatType1Stencil[i][j][k][l][m];
00499                 ctr++;
00500               }//end for m
00501             }//end for l
00502           }//end for k
00503         }//end for j
00504       }//end for i
00505       for(int j = 0; j < 8; j++) {
00506         for(int k = 0; k < 18; k++) {
00507           for(int l = 0; l < 8; l++) {
00508             for(int m = 0; m < 8; m++) {
00509               tmpRmats[ctr] = RmatType2Stencil[j][k][l][m];
00510               ctr++;
00511             }//end for m
00512           }//end for l
00513         }//end for k
00514       }//end for j
00515     }
00516 
00517     par::Mpi_Bcast<double>(tmpRmats, 82944, 0, comm);
00518 
00519     if(rank) {
00520       unsigned int ctr = 0;
00521       for(int i = 0; i < 8; i++) {
00522         for(int j = 0; j < 8; j++) {
00523           for(int k = 0; k < 18; k++) {
00524             for(int l = 0; l < 8; l++) {
00525               for(int m = 0; m < 8; m++) {
00526                 RmatType1Stencil[i][j][k][l][m] = tmpRmats[ctr];
00527                 ctr++;
00528               }//end for m
00529             }//end for l
00530           }//end for k
00531         }//end for j
00532       }//end for i
00533       for(int j = 0; j < 8; j++) {
00534         for(int k = 0; k < 18; k++) {
00535           for(int l = 0; l < 8; l++) {
00536             for(int m = 0; m < 8; m++) {
00537               RmatType2Stencil[j][k][l][m] = tmpRmats[ctr];
00538               ctr++;
00539             }//end for m
00540           }//end for l
00541         }//end for k
00542       }//end for j
00543     }
00544 
00545     delete [] tmpRmats;
00546 
00547     //unsigned shorts...
00548     //map1[8][8][18][8]: 9216
00549     //map2[8][8][8][18][8]: 73728
00550     //map3[7][2][8][18][8]: 16128
00551     //map4[7][2][8][8][18][8]: 129024
00552     unsigned short * tmpVtxMaps = new unsigned short[228096];
00553 
00554     if(!rank) {
00555       unsigned int ctr = 0;
00556       for(int i = 0; i < 8; i++) {
00557         for(int j = 0; j < 8; j++) {
00558           for(int k = 0; k < 18; k++) {
00559             for(int l = 0; l < 8; l++) {
00560               tmpVtxMaps[ctr] = VtxMap1[i][j][k][l]; 
00561               ctr++;
00562             }//end for l
00563           }//end for k
00564         }//end for j
00565       }//end for i
00566       for(int i = 0; i < 8; i++) {
00567         for(int j = 0; j < 8; j++) {
00568           for(int k = 0; k < 8; k++) {
00569             for(int l = 0; l < 18; l++) {
00570               for(int m = 0; m < 8; m++) {
00571                 tmpVtxMaps[ctr] = VtxMap2[i][j][k][l][m]; 
00572                 ctr++;
00573               }//end for m
00574             }//end for l
00575           }//end for k
00576         }//end for j
00577       }//end for i
00578       for(int i = 0; i < 7; i++) {
00579         for(int j = 0; j < 2; j++) {
00580           for(int k = 0; k < 8; k++) {
00581             for(int l = 0; l < 18; l++) {
00582               for(int m = 0; m < 8; m++) {
00583                 tmpVtxMaps[ctr] = VtxMap3[i][j][k][l][m]; 
00584                 ctr++;
00585               }//end for m
00586             }//end for l
00587           }//end for k
00588         }//end for j
00589       }//end for i
00590       for(int i = 0; i < 7; i++) {
00591         for(int j = 0; j < 2; j++) {
00592           for(int k = 0; k < 8; k++) {
00593             for(int l = 0; l < 8; l++) {
00594               for(int m = 0; m < 18; m++) {
00595                 for(int n = 0; n < 8; n++) {
00596                   tmpVtxMaps[ctr] = VtxMap4[i][j][k][l][m][n]; 
00597                   ctr++;
00598                 }//end for n
00599               }//end for m
00600             }//end for l
00601           }//end for k
00602         }//end for j
00603       }//end for i
00604     }
00605 
00606     par::Mpi_Bcast<unsigned short>(tmpVtxMaps, 228096, 0, comm);
00607 
00608     if(rank) {
00609       unsigned int ctr = 0;
00610       for(int i = 0; i < 8; i++) {
00611         for(int j = 0; j < 8; j++) {
00612           for(int k = 0; k < 18; k++) {
00613             for(int l = 0; l < 8; l++) {
00614               VtxMap1[i][j][k][l] = tmpVtxMaps[ctr]; 
00615               ctr++;
00616             }//end for l
00617           }//end for k
00618         }//end for j
00619       }//end for i
00620       for(int i = 0; i < 8; i++) {
00621         for(int j = 0; j < 8; j++) {
00622           for(int k = 0; k < 8; k++) {
00623             for(int l = 0; l < 18; l++) {
00624               for(int m = 0; m < 8; m++) {
00625                 VtxMap2[i][j][k][l][m] = tmpVtxMaps[ctr]; 
00626                 ctr++;
00627               }//end for m
00628             }//end for l
00629           }//end for k
00630         }//end for j
00631       }//end for i
00632       for(int i = 0; i < 7; i++) {
00633         for(int j = 0; j < 2; j++) {
00634           for(int k = 0; k < 8; k++) {
00635             for(int l = 0; l < 18; l++) {
00636               for(int m = 0; m < 8; m++) {
00637                 VtxMap3[i][j][k][l][m] = tmpVtxMaps[ctr]; 
00638                 ctr++;
00639               }//end for m
00640             }//end for l
00641           }//end for k
00642         }//end for j
00643       }//end for i
00644       for(int i = 0; i < 7; i++) {
00645         for(int j = 0; j < 2; j++) {
00646           for(int k = 0; k < 8; k++) {
00647             for(int l = 0; l < 8; l++) {
00648               for(int m = 0; m < 18; m++) {
00649                 for(int n = 0; n < 8; n++) {
00650                   VtxMap4[i][j][k][l][m][n] = tmpVtxMaps[ctr]; 
00651                   ctr++;
00652                 }//end for n
00653               }//end for m
00654             }//end for l
00655           }//end for k
00656         }//end for j
00657       }//end for i
00658     }
00659 
00660     delete [] tmpVtxMaps;
00661 
00662   }
00663 
00664   PetscErrorCode DAMG_Finalize() {
00665     PROF_MG_FINAL_BEGIN
00666 
00667       ot::DA_Finalize();
00668 
00669     destroyRmatType1Stencil(RmatType1Stencil);
00670     destroyRmatType2Stencil(RmatType2Stencil);
00671     destroyVtxMaps(VtxMap1, VtxMap2, VtxMap3, VtxMap4);
00672 
00673     PROF_MG_FINAL_END  
00674   }
00675 
00676   PetscErrorCode DAMGDestroy(DAMG* damg)
00677   {
00678     PetscErrorCode ierr;
00679     int       i,nlevels = damg[0]->nlevels;
00680 
00681     PetscFunctionBegin;
00682 
00683     int rank;
00684     MPI_Comm_rank(damg[0]->comm,&rank);
00685 
00686     if (!damg) SETERRQ(PETSC_ERR_ARG_NULL,"Passing null as DAMG");
00687 
00688     for (i = 1; i < nlevels; i++) {
00689       if (damg[i]->R) {
00690         ierr = MatDestroy(damg[i]->R);
00691         CHKERRQ(ierr);
00692       }
00693     }
00694 
00695     for (i = 0; i < nlevels; i++) {
00696       if (damg[i]->x)       {
00697         ierr = VecDestroy(damg[i]->x);
00698         CHKERRQ(ierr);
00699       }
00700 
00701       if (damg[i]->b)       {
00702         ierr = VecDestroy(damg[i]->b);
00703         CHKERRQ(ierr);
00704       }
00705 
00706       if (damg[i]->r)       {
00707         ierr = VecDestroy(damg[i]->r);
00708         CHKERRQ(ierr);
00709       }
00710 
00711       if(damg[i]->suppressedDOF) {
00712         delete [] (damg[i]->suppressedDOF);
00713         damg[i]->suppressedDOF = NULL;
00714       }
00715 
00716       if(damg[i]->suppressedDOFaux) {
00717         delete [] (damg[i]->suppressedDOFaux);
00718         damg[i]->suppressedDOFaux = NULL;
00719       }
00720 
00721       if (damg[i]->B && (damg[i]->B != damg[i]->J)) {
00722         ierr = MatDestroy(damg[i]->B);CHKERRQ(ierr);
00723       }
00724 
00725       if (damg[i]->J)         {
00726         ierr = MatDestroy(damg[i]->J);CHKERRQ(ierr);
00727       }
00728 
00729       if (damg[i]->ksp) {
00730         ierr = KSPDestroy(damg[i]->ksp);CHKERRQ(ierr);
00731       }
00732 
00733       if (damg[i]->da)      {
00734         delete damg[i]->da; 
00735         damg[i]->da = NULL;
00736       }
00737 
00738       if (damg[i]->da_aux)      {
00739         delete damg[i]->da_aux; 
00740         damg[i]->da_aux = NULL;
00741       }
00742 
00743       delete damg[i];
00744       damg[i] = NULL;
00745     }//end for all levels
00746 
00747     delete [] damg;
00748     damg = NULL;  
00749     PetscFunctionReturn(0);
00750   }
00751 
00752   //RS: This is the function used to create the J matrix. 
00753   //This must be called before calling DAMGSetKSP or DAMGSetSNES if you want to set
00754   //J and B to be different. If this is not called, by default J and B will be equal.
00755   PetscErrorCode DAMGCreateJMatrix(DAMG damg, PetscErrorCode (*crjac)(DAMG,Mat*)) {
00756     PetscErrorCode ierr;
00757     PetscFunctionBegin;         
00758     if(crjac){
00759       ierr = (*crjac)(damg,&(damg->J)); CHKERRQ(ierr);
00760     }else{
00761       SETERRQ(PETSC_ERR_ARG_NULL,"ot::DA can not create a Matrix for you! Pass a function handle.");  
00762     }
00763     PetscFunctionReturn(0);
00764   }
00765 
00766   //crjac is the function to create the B matrix for each level
00767   //compJac is an optional function to change the entries of the matrix. This
00768   //could be used if you don't want to re-build the matrix but only want to
00769   //change its entries. It is not useful for a single stand-alone solve.
00770   PetscErrorCode DAMGSetKSP(DAMG *damg, PetscErrorCode (*crjac)(DAMG,Mat*), 
00771       PetscErrorCode (*compJac)(DAMG,Mat,Mat), PetscErrorCode (*rhs)(DAMG,Vec) )        
00772   {
00773     PetscErrorCode ierr;
00774     int  nlevels = damg[0]->nlevels;
00775 
00776     //Galerkin coarsening is NOT an option.
00777     PROF_MG_SET_KSP_BEGIN
00778 
00779       int rank;
00780     MPI_Comm_rank(damg[0]->comm, &rank);
00781 
00782     if (!damg) SETERRQ(PETSC_ERR_ARG_NULL,"Passing null as DAMG");  
00783     if (!crjac) SETERRQ(PETSC_ERR_ARG_NULL,"ot::DA can not create a Matrix for you! Pass a function handle.");
00784 
00785     if (!damg[0]->ksp) {
00786       /* create solvers for each level if they don't already exist*/
00787       for (int i = 0; i < nlevels; i++) {
00788         if (!damg[i]->B) {              
00789           ierr = (*crjac)(damg[i],&(damg[i]->B));CHKERRQ(ierr);
00790         }
00791         if (!damg[i]->J) {
00792 #ifdef __DEBUG_MG__
00793           if(!rank) {   
00794             std::cout<<"J and B are the same for lev: "<<i<<std::endl;
00795             fflush(stdout);
00796           }
00797 #endif
00798           damg[i]->J = damg[i]->B;
00799         }else {
00800 #ifdef __DEBUG_MG__
00801           if(!rank) {   
00802             std::cout<<"J and B are different for lev: "<<i<<std::endl;
00803             fflush(stdout);
00804           }
00805 #endif
00806           assert(damg[i]->J != damg[i]->B);
00807         }
00808 
00809         ierr = KSPCreate(damg[i]->comm,&(damg[i]->ksp));CHKERRQ(ierr);
00810 
00811         //No prefix for the finest level
00812         if(i < (nlevels-1)) {
00813           char prefix[256];
00814           sprintf(prefix,"damg_levels_%d_",(int)i);
00815           KSPSetOptionsPrefix(damg[i]->ksp,prefix);
00816         }
00817 
00818         ierr = DAMGSetUpLevel(damg,damg[i]->ksp,i+1); CHKERRQ(ierr);
00819 
00820         ierr = KSPSetFromOptions(damg[i]->ksp);CHKERRQ(ierr);
00821 
00822         if( (damg[0]->da->getNpesActive()) < (damg[0]->da->getNpesAll()) ) {
00823           //If all processors are NOT active on the coarsest grid
00824           //reset the solver on the coarsest grid to be KSP_Shell 
00825           PC pc;
00826           const char* clearOptionPrefix;
00827           char optionName[256];
00828           if(i == 0) {
00829             KSPGetOptionsPrefix(damg[0]->ksp, &clearOptionPrefix);
00830 
00831             sprintf(optionName, "-%sksp_type",clearOptionPrefix);
00832             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00833             ierr = KSPSetType(damg[0]->ksp, KSPRICHARDSON); CHKERRQ(ierr);
00834 
00835             sprintf(optionName, "-%sksp_knoll",clearOptionPrefix);
00836             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00837             ierr = KSPSetInitialGuessKnoll(damg[0]->ksp, PETSC_FALSE);
00838             CHKERRQ(ierr);
00839 
00840             sprintf(optionName, "-%sksp_richardson_scale",clearOptionPrefix);
00841             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00842             ierr = KSPRichardsonSetScale(damg[0]->ksp, 1.0);
00843             CHKERRQ(ierr);
00844 
00845             sprintf(optionName, "-%sksp_right_pc",clearOptionPrefix);
00846             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00847             sprintf(optionName, "-%sksp_symmetric_pc",clearOptionPrefix);
00848             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00849             ierr = KSPSetPreconditionerSide(damg[0]->ksp, PC_LEFT);
00850             CHKERRQ(ierr);
00851 
00852             sprintf(optionName, "-%sksp_norm_type",clearOptionPrefix);
00853             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00854             ierr = KSPSetNormType(damg[0]->ksp, KSP_NORM_NO);
00855             CHKERRQ(ierr);
00856 
00857             sprintf(optionName, "-%sksp_rtol",clearOptionPrefix);
00858             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00859             sprintf(optionName, "-%sksp_atol",clearOptionPrefix);
00860             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00861             sprintf(optionName, "-%sksp_divtol",clearOptionPrefix);
00862             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00863             sprintf(optionName, "-%sksp_max_it",clearOptionPrefix);
00864             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00865             ierr = KSPSetTolerances(damg[0]->ksp, PETSC_DEFAULT, PETSC_DEFAULT,
00866                 PETSC_DEFAULT, 1); 
00867             CHKERRQ(ierr);
00868 
00869             ierr = KSPSetConvergenceTest(damg[0]->ksp, KSPSkipConverged,
00870                 PETSC_NULL);
00871             CHKERRQ(ierr);
00872 
00873             ierr = KSPSetInitialGuessNonzero(damg[0]->ksp, PETSC_FALSE);
00874             CHKERRQ(ierr);
00875 
00876             ierr  = KSPGetPC(damg[0]->ksp, &pc); CHKERRQ(ierr);
00877 
00878             PCGetOptionsPrefix(pc, &clearOptionPrefix);
00879             sprintf(optionName, "-%spc_type",clearOptionPrefix);
00880             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00881             ierr  = PCSetType(pc, PCSHELL); CHKERRQ(ierr);
00882             ierr = PCShellSetName(pc, "PC_KSP_Shell"); CHKERRQ(ierr);
00883 
00884             PC_KSP_Shell* pcShellContext = new PC_KSP_Shell;
00885             pcShellContext->sol_private = NULL;
00886             pcShellContext->rhs_private = NULL;
00887             pcShellContext->ksp_private = NULL;
00888             pcShellContext->pc = pc;
00889             pcShellContext->iAmActive = damg[0]->da->iAmActive();
00890             pcShellContext->commActive = damg[0]->da->getCommActive();
00891             ierr = PCShellSetContext(pc, pcShellContext); CHKERRQ(ierr);
00892             ierr = PCShellSetSetUp(pc, PC_KSP_Shell_SetUp); CHKERRQ(ierr);
00893             ierr = PCShellSetApply(pc, PC_KSP_Shell_Apply); CHKERRQ(ierr);
00894             ierr = PCShellSetDestroy(pc, PC_KSP_Shell_Destroy); CHKERRQ(ierr);
00895           } else {
00896             ierr  = KSPGetPC(damg[i]->ksp, &pc); CHKERRQ(ierr);
00897           }
00898 
00899           PetscTruth ismg;
00900           PetscTypeCompare((PetscObject)pc, PCMG, &ismg);
00901 
00902           if(ismg) {
00903             PetscTruth useRTLMG;
00904             ierr = PetscOptionsHasName(PETSC_NULL,"-damg_useRTLMG",&useRTLMG);
00905             CHKERRQ(ierr);
00906             KSP lksp;
00907             if(useRTLMG) {
00908               while(ismg) {
00909                 ierr = PCMGGetSmoother(pc,0,&lksp);CHKERRQ(ierr);
00910                 ierr  = KSPGetPC(lksp, &pc); CHKERRQ(ierr);
00911                 PetscTypeCompare((PetscObject)pc, PCMG, &ismg);
00912               }
00913             } else {
00914               ierr = PCMGGetSmoother(pc,0,&lksp);CHKERRQ(ierr);
00915             }
00916 
00917             KSPGetOptionsPrefix(lksp, &clearOptionPrefix);
00918 
00919             sprintf(optionName, "-%sksp_type",clearOptionPrefix);
00920             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00921             ierr = KSPSetType(lksp, KSPRICHARDSON); CHKERRQ(ierr);              
00922 
00923             sprintf(optionName, "-%sksp_knoll",clearOptionPrefix);
00924             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00925             ierr = KSPSetInitialGuessKnoll(lksp, PETSC_FALSE);
00926             CHKERRQ(ierr);
00927 
00928             sprintf(optionName, "-%sksp_richardson_scale",clearOptionPrefix);
00929             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00930             ierr = KSPRichardsonSetScale(lksp, 1.0);
00931             CHKERRQ(ierr);
00932 
00933             sprintf(optionName, "-%sksp_right_pc",clearOptionPrefix);
00934             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00935             sprintf(optionName, "-%sksp_symmetric_pc",clearOptionPrefix);
00936             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00937             ierr = KSPSetPreconditionerSide(lksp, PC_LEFT);
00938             CHKERRQ(ierr);
00939 
00940             sprintf(optionName, "-%sksp_norm_type",clearOptionPrefix);
00941             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00942             ierr = KSPSetNormType(lksp, KSP_NORM_NO);
00943             CHKERRQ(ierr);
00944 
00945             sprintf(optionName, "-%sksp_rtol",clearOptionPrefix);
00946             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00947             sprintf(optionName, "-%sksp_atol",clearOptionPrefix);
00948             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00949             sprintf(optionName, "-%sksp_divtol",clearOptionPrefix);
00950             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00951             sprintf(optionName, "-%sksp_max_it",clearOptionPrefix);
00952             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00953             ierr = KSPSetTolerances(lksp, PETSC_DEFAULT, PETSC_DEFAULT,
00954                 PETSC_DEFAULT, 1); 
00955             CHKERRQ(ierr);
00956 
00957             ierr = KSPSetConvergenceTest(lksp, KSPSkipConverged, PETSC_NULL);
00958             CHKERRQ(ierr);
00959 
00960             ierr = KSPSetInitialGuessNonzero(lksp, PETSC_FALSE);
00961             CHKERRQ(ierr);
00962 
00963             ierr  = KSPGetPC(lksp, &pc); CHKERRQ(ierr);
00964 
00965             PCGetOptionsPrefix(pc, &clearOptionPrefix);
00966             sprintf(optionName, "-%spc_type",clearOptionPrefix);
00967             ierr = PetscOptionsClearValue(optionName); CHKERRQ(ierr);
00968             ierr  = PCSetType(pc, PCSHELL); CHKERRQ(ierr);
00969             ierr = PCShellSetName(pc, "PC_KSP_Shell"); CHKERRQ(ierr);
00970 
00971             PC_KSP_Shell* pcShellContext = new PC_KSP_Shell;
00972             pcShellContext->sol_private = NULL;
00973             pcShellContext->rhs_private = NULL;
00974             pcShellContext->ksp_private = NULL;
00975             pcShellContext->pc = pc;
00976             pcShellContext->iAmActive = damg[0]->da->iAmActive();
00977             pcShellContext->commActive = damg[0]->da->getCommActive();
00978             ierr = PCShellSetContext(pc, pcShellContext); CHKERRQ(ierr);
00979             ierr = PCShellSetSetUp(pc, PC_KSP_Shell_SetUp); CHKERRQ(ierr);
00980             ierr = PCShellSetApply(pc, PC_KSP_Shell_Apply); CHKERRQ(ierr);
00981             ierr = PCShellSetDestroy(pc, PC_KSP_Shell_Destroy); CHKERRQ(ierr);
00982           }//end if PC==MG
00983         }//end if all procs active on coarsest level
00984 
00985         damg[i]->solve = DAMGSolveKSP;
00986         damg[i]->rhs   = rhs;
00987       }//end for i
00988     }//end if ksp
00989 
00990 #ifdef __DEBUG_MG__
00991     MPI_Barrier(damg[0]->comm);
00992     if(!rank) {
00993       std::cout<<"Finished setting up ksp for all levels."<<std::endl;
00994       fflush(stdout);
00995     }
00996     MPI_Barrier(damg[0]->comm);
00997 #endif
00998 
00999     //Evaluate Matrix at each level
01000     for (int i = 0; i < nlevels; i++) {
01001       if(compJac) {
01002         ierr = (*compJac)(damg[i],damg[i]->J,damg[i]->B); CHKERRQ(ierr);
01003       }
01004     }
01005 
01006     //Set operators at each level.   
01007     for(int level = 0; level < nlevels; level++) {
01008       KSPSetOperators(damg[level]->ksp, damg[level]->J,
01009           damg[level]->B, SAME_NONZERO_PATTERN);
01010 
01011       PC pc;
01012       KSPGetPC(damg[level]->ksp, &pc);
01013 
01014       PetscTruth ismg;
01015       PetscTypeCompare((PetscObject)pc, PCMG, &ismg);
01016 
01017       if(ismg) {
01018         for(int i = 0; i <= level; i++) {
01019           KSP lksp;
01020           PCMGGetSmoother(pc, i, &lksp);
01021           KSPSetOperators(lksp, damg[i]->J, damg[i]->B, SAME_NONZERO_PATTERN);
01022         }//end for i
01023       }
01024     }//end for level
01025 
01026 #ifdef __DEBUG_MG__
01027     MPI_Barrier(damg[0]->comm);
01028     if(!rank) {
01029       std::cout<<"Finished building matrices for all levels."<<std::endl;
01030       fflush(stdout);
01031     }
01032     MPI_Barrier(damg[0]->comm);
01033 #endif
01034 
01035     PROF_MG_SET_KSP_END
01036   }
01037 
01038   PetscErrorCode DAMGSetNullSpace(DAMG* damg, PetscTruth has_cnst, PetscInt n,
01039       PetscErrorCode (*func)(DAMG,Vec[]))
01040   {
01041     PetscErrorCode ierr;
01042     int       i, j, nlevels = damg[0]->nlevels;
01043     Vec            *nulls = 0;
01044     MatNullSpace   nullsp;
01045     KSP            iksp;
01046     PC             pc,ipc;
01047     PetscTruth     ismg,isred;
01048 
01049     PetscFunctionBegin;
01050     if (!damg) SETERRQ(PETSC_ERR_ARG_NULL,"Passing null as DAMG");
01051     if (!damg[0]->ksp) SETERRQ(PETSC_ERR_ORDER,"Must call AFTER DAMGSetKSP() or DAMGSetSNES()");
01052     if ((n && !func) || (!n && func)) SETERRQ(PETSC_ERR_ARG_INCOMP,"Both n and func() must be set together");
01053     if (n < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Cannot have negative number of vectors in null space n = %D",n)
01054 
01055       for (i = 0; i < nlevels; i++) {
01056         if (n) {
01057           ierr = VecDuplicateVecs(damg[i]->b,n,&nulls);CHKERRQ(ierr);
01058           ierr = (*func)(damg[i],nulls);CHKERRQ(ierr);
01059         }
01060         ierr = MatNullSpaceCreate(damg[i]->comm,has_cnst,n,nulls,&nullsp);CHKERRQ(ierr);
01061         ierr = KSPSetNullSpace(damg[i]->ksp,nullsp);CHKERRQ(ierr);
01062         for (j = i; j < nlevels; j++) {
01063           ierr = KSPGetPC(damg[j]->ksp,&pc);CHKERRQ(ierr);
01064           ierr = PetscTypeCompare((PetscObject)pc,PCMG,&ismg);CHKERRQ(ierr);
01065           if (ismg) {
01066             ierr = PCMGGetSmoother(pc,i,&iksp);CHKERRQ(ierr);
01067             ierr = KSPSetNullSpace(iksp, nullsp);CHKERRQ(ierr);
01068           }
01069         }
01070         ierr = MatNullSpaceDestroy(nullsp);CHKERRQ(ierr);
01071         if (n) {
01072           ierr = PetscFree(nulls);CHKERRQ(ierr);
01073         }
01074       }//end for i
01075 
01076     /* make all the coarse grid solvers have LU shift since they are singular */
01077     for (i = 0; i < nlevels; i++) {
01078       ierr = KSPGetPC(damg[i]->ksp,&pc);CHKERRQ(ierr);
01079       ierr = PetscTypeCompare((PetscObject)pc,PCMG,&ismg);CHKERRQ(ierr);
01080       if (ismg) {
01081         ierr = PCMGGetSmoother(pc,0,&iksp);CHKERRQ(ierr);
01082         ierr = KSPGetPC(iksp,&ipc);CHKERRQ(ierr);
01083         ierr = PetscTypeCompare((PetscObject)ipc,PCREDUNDANT,&isred);CHKERRQ(ierr);
01084         if (isred) {
01085           ierr = PCRedundantGetPC(ipc,&ipc);CHKERRQ(ierr);
01086         }
01087         ierr = PCFactorSetShiftPd(ipc,PETSC_TRUE);CHKERRQ(ierr); 
01088       }
01089     }//end for i
01090 
01091     PetscFunctionReturn(0);
01092   }
01093 
01094   PetscErrorCode DAMGSetInitialGuess(DAMG* damg, PetscErrorCode (*guess)(DAMG, Vec)) {
01095     int i, nlevels = damg[0]->nlevels;
01096     for(i = 0; i < nlevels; i++) {
01097       damg[i]->initialguess = guess;
01098     }
01099     return(0);
01100   }
01101 
01102   PetscErrorCode DAMGInitialGuessCurrent(DAMG damg, Vec vec) {
01103     return (0);
01104   }
01105 
01106   PetscErrorCode DAMGSolve(DAMG* damg)
01107   {
01108     PetscErrorCode ierr;
01109     int       i,nlevels = damg[0]->nlevels;
01110     PetscTruth     gridseq,vecmonitor;
01111 
01112     PetscFunctionBegin;
01113     ierr = PetscOptionsHasName(0,"-damg_grid_sequence",&gridseq);CHKERRQ(ierr);
01114     ierr = PetscOptionsHasName(0,"-damg_vecmonitor",&vecmonitor);CHKERRQ(ierr);
01115     if (gridseq) {    
01116       if(damg[0]->initialguess) {
01117         (*(damg[0]->initialguess))(damg[0],damg[0]->x);
01118         KSPSetInitialGuessNonzero(damg[0]->ksp, PETSC_TRUE);
01119       }
01120       for (i=0; i<nlevels-1; i++) {
01121         ierr = (*damg[i]->solve)(damg,i);CHKERRQ(ierr);
01122         if (vecmonitor) {
01123           ierr = VecView(damg[i]->x,PETSC_VIEWER_DRAW_(damg[i]->comm));CHKERRQ(ierr);
01124         }
01125         ierr = MatInterpolate(damg[i+1]->R,damg[i]->x,damg[i+1]->x);CHKERRQ(ierr);      
01126         KSPSetInitialGuessNonzero(damg[i+1]->ksp, PETSC_TRUE);
01127       }    
01128     }else {
01129       if(damg[nlevels-1]->initialguess) {
01130         (*(damg[nlevels-1]->initialguess))(damg[nlevels-1], damg[nlevels-1]->x);
01131         KSPSetInitialGuessNonzero(damg[nlevels-1]->ksp, PETSC_TRUE);
01132       }
01133     }
01134     ierr = (*DAMGGetFine(damg)->solve)(damg, nlevels-1);CHKERRQ(ierr);
01135     if (vecmonitor) {
01136       ierr = VecView(damg[nlevels-1]->x,PETSC_VIEWER_DRAW_(damg[nlevels-1]->comm));CHKERRQ(ierr);
01137     }  
01138     PetscFunctionReturn(0);
01139   }
01140 
01141   PetscErrorCode DAMGSolveKSP(DAMG *damg, int level)
01142   {
01143     PetscErrorCode ierr;
01144 
01145     PetscFunctionBegin;
01146 
01147     if (damg[level]->rhs) {
01148       ierr = (*damg[level]->rhs)(damg[level],damg[level]->b);CHKERRQ(ierr); 
01149     }
01150 
01151     ierr = KSPSolve(damg[level]->ksp, damg[level]->b, damg[level]->x);CHKERRQ(ierr);
01152 
01153     PetscFunctionReturn(0);
01154   }
01155 
01156   PetscErrorCode DAMGSetUpLevel(DAMG* damg, KSP ksp, int nlevels)
01157   {
01158     PetscErrorCode ierr;
01159     PC             pc;
01160     PetscTruth     ismg,monitor,ismf,isshell,ismffd;
01161     PetscTruth     useRTLMG;
01162     KSP            lksp; /* solver internal to the multigrid preconditioner */
01163     MPI_Comm       *comms,comm;
01164     PetscViewer    ascii;
01165 
01166     PetscFunctionBegin;
01167     if (!damg) SETERRQ(PETSC_ERR_ARG_NULL,"Passing null as DAMG");
01168 
01169     ierr = PetscOptionsHasName(PETSC_NULL,"-damg_ksp_monitor",&monitor);CHKERRQ(ierr);
01170     ierr = PetscOptionsHasName(PETSC_NULL,"-damg_useRTLMG",&useRTLMG);CHKERRQ(ierr);
01171 
01172     if (monitor) {
01173       ierr = PetscObjectGetComm((PetscObject)ksp,&comm);CHKERRQ(ierr);
01174       ierr = PetscViewerASCIIOpen(comm,"stdout",&ascii);CHKERRQ(ierr);
01175       ierr = PetscViewerASCIISetTab(ascii,1+(damg[0]->nlevels)-nlevels);CHKERRQ(ierr);
01176       ierr = KSPMonitorSet(ksp, KSPMonitorDefault, ascii,
01177           (PetscErrorCode(*)(void*))PetscViewerDestroy);CHKERRQ(ierr);
01178     }
01179 
01180     /* use fgmres on outer iteration by default */
01181     ierr  = KSPSetType(ksp, KSPFGMRES); CHKERRQ(ierr);
01182     ierr  = KSPGetPC(ksp,&pc); CHKERRQ(ierr);
01183     ierr  = PCSetType(pc,PCMG); CHKERRQ(ierr);
01184 
01185     const char *mainKSPprefix;
01186     KSPGetOptionsPrefix(ksp, &mainKSPprefix);
01187 
01188     if(useRTLMG) {
01189       char prefixName[256];
01190       sprintf(prefixName, "rtlmg_finest_");
01191       PCSetOptionsPrefix(pc, mainKSPprefix);
01192       PCAppendOptionsPrefix(pc, prefixName);
01193       //finest to coarsest
01194       for(int i = (nlevels-1); i >= 1; i--) {
01195         ierr  = PetscMalloc(2*sizeof(MPI_Comm),&comms);CHKERRQ(ierr);
01196         for (int j = 0; j < 2; j++) {
01197           comms[j] = damg[j]->comm;
01198         }
01199         PCMGSetLevels(pc,2,comms);
01200         PetscFree(comms);
01201         PCMGSetType(pc,PC_MG_FULL);
01202         PetscTypeCompare((PetscObject)pc,PCMG,&ismg);
01203         if (ismg) {
01204           /*Finer Level*/
01205           PCMGGetSmoother(pc,1,&lksp);
01206           KSPSetOperators(lksp,damg[i]->B,damg[i]->B,DIFFERENT_NONZERO_PATTERN);
01207           PCMGSetR(pc,1,damg[i]->r);
01208           PCMGSetResidual(pc,1,PCMGDefaultResidual,damg[i]->B);
01209           /*Coarser Level*/
01210           PCMGGetSmoother(pc,0,&lksp);
01211           KSPSetOperators(lksp,damg[i-1]->J,damg[i-1]->J,DIFFERENT_NONZERO_PATTERN);
01212           PCMGSetX(pc,0,damg[i-1]->x);
01213           PCMGSetRhs(pc,0,damg[i-1]->b);
01214           /* Set interpolation/restriction between levels */
01215           PCMGSetInterpolation(pc,1,damg[i]->R);
01216           PCMGSetRestriction(pc,1,damg[i]->R);
01217           if(i > 1) {
01218             /*Get PC for the next level*/
01219             KSPSetType(lksp,KSPFGMRES);
01220             KSPGetPC(lksp,&pc);
01221             PCSetType(pc,PCMG);
01222             sprintf(prefixName, "rtlmg_levels_%d_",((int)(i-1)));
01223             PCSetOptionsPrefix(pc, mainKSPprefix);
01224             PCAppendOptionsPrefix(pc, prefixName);
01225           }
01226         }//end if mg
01227       }//end for i
01228     } else {
01229       //Standard Scheme same as original DMMG 
01230       ierr  = PetscMalloc(nlevels*sizeof(MPI_Comm),&comms);CHKERRQ(ierr);
01231       for (int i = 0; i < nlevels; i++) {
01232         comms[i] = damg[i]->comm;
01233       }
01234       ierr  = PCMGSetLevels(pc,nlevels,comms);CHKERRQ(ierr);
01235       ierr =  PCMGSetType(pc,PC_MG_FULL);CHKERRQ(ierr);
01236 
01237       ierr  = PetscFree(comms);CHKERRQ(ierr); 
01238 
01239       ierr = PetscTypeCompare((PetscObject)pc,PCMG,&ismg);CHKERRQ(ierr);
01240       if (ismg) {
01241         /* set solvers for each level */
01242         for (int i = 0; i < nlevels; i++) {
01243           ierr = PCMGGetSmoother(pc, i, &lksp);CHKERRQ(ierr);
01244 
01245           ierr = KSPSetOperators(lksp, damg[i]->J, damg[i]->B,
01246               DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
01247 
01248           if (i < nlevels-1) { /* don't set for finest level, they are set in PCApply_MG()*/
01249             ierr = PCMGSetX(pc,i,damg[i]->x);CHKERRQ(ierr); 
01250             ierr = PCMGSetRhs(pc,i,damg[i]->b);CHKERRQ(ierr); 
01251           }
01252           if (i > 0) {
01253             ierr = PCMGSetR(pc,i,damg[i]->r);CHKERRQ(ierr); 
01254             ierr = PCMGSetResidual(pc,i,PCMGDefaultResidual,damg[i]->J);CHKERRQ(ierr);
01255           }
01256           if (monitor) {
01257             ierr = PetscObjectGetComm((PetscObject)lksp,&comm);CHKERRQ(ierr);
01258             ierr = PetscViewerASCIIOpen(comm,"stdout",&ascii);CHKERRQ(ierr);
01259             ierr = PetscViewerASCIISetTab(ascii,1+damg[0]->nlevels-i);CHKERRQ(ierr);
01260             ierr = KSPMonitorSet(lksp,KSPMonitorDefault,ascii,
01261                 (PetscErrorCode(*)(void*))PetscViewerDestroy);CHKERRQ(ierr);
01262           }
01263           /* If using a matrix free multiply and did not provide an explicit matrix to build
01264              the preconditioner then must use no preconditioner 
01265              */
01266           ierr = PetscTypeCompare((PetscObject)damg[i]->B,MATSHELL,&isshell);CHKERRQ(ierr);
01267           ierr = PetscTypeCompare((PetscObject)damg[i]->B,MATDAAD,&ismf);CHKERRQ(ierr);
01268           ierr = PetscTypeCompare((PetscObject)damg[i]->B,MATMFFD,&ismffd);CHKERRQ(ierr);
01269           if (isshell || ismf || ismffd) {
01270             PC  lpc;
01271             ierr = KSPGetPC(lksp,&lpc);CHKERRQ(ierr);
01272             //This is only the default value. It can be modified later
01273             ierr = PCSetType(lpc,PCNONE);CHKERRQ(ierr);
01274           }
01275         }//end for i
01276 
01277         /* Set interpolation/restriction between levels */
01278         for (int i=1; i<nlevels; i++) {
01279           ierr = PCMGSetInterpolation(pc,i,damg[i]->R);CHKERRQ(ierr); 
01280           ierr = PCMGSetRestriction(pc,i,damg[i]->R);CHKERRQ(ierr); 
01281         }//end for i
01282       }//end if mg
01283     }//end if TLMG
01284 
01285     PetscFunctionReturn(0);
01286   }//end fn.
01287 
01288   //level = 0 is the coarsest, level = (nlevels-1) is the finest.
01289   //nlevels for each level is the number of levels finer than this level.
01290   //New implementation. Written on April 24, 2008
01291   PetscErrorCode DAMGCreateAndSetDA(MPI_Comm comm, int & nlevels, 
01292       void* user, DAMG** damg, std::vector<ot::TreeNode>& finestOctree,
01293       unsigned int dof, double loadFac,
01294       bool compressLut, bool incCorner) {
01295 
01296     PROF_MG_SET_DA_BEGIN
01297 
01298       PROF_SET_DA_STAGE1_BEGIN
01299 
01300       PetscErrorCode ierr;
01301     int rank, npes;
01302     MPI_Comm_rank(comm, &rank);
01303     MPI_Comm_size(comm, &npes);
01304 
01305 #ifndef __SILENT_MODE__
01306     if(!rank) {
01307       std::cout<<"MG-Load Fac: "<<loadFac<<std::endl;
01308       fflush(stdout);
01309     }
01310 #endif
01311 
01312     par::partitionW<ot::TreeNode>(finestOctree, NULL, comm);
01313 
01314     assert(nlevels > 0);
01315     if(finestOctree.empty()) {
01316       std::cout<<"Processor "<<rank<<
01317         " called DAMGCreateAndSetDA with an empty finest octree."<<std::endl;
01318       fflush(stdout);
01319       assert(false);
01320     }
01321 
01322     unsigned int dim = finestOctree[0].getDim();
01323     unsigned int maxDepth = finestOctree[0].getMaxDepth();
01324 
01325     PROF_SET_DA_STAGE1_END
01326 #ifdef __PROF_WITH_BARRIER__
01327       MPI_Barrier(comm);
01328 #endif
01329     PROF_SET_DA_STAGE2_BEGIN
01330 
01331       int idxOfCoarsestLev = -1; 
01332 
01333     std::vector<ot::TreeNode>* coarserOctrees = NULL;
01334 
01335     bool* activeStatesInCoarseBal = NULL;
01336     MPI_Comm* activeCommsInCoarseBal = NULL;
01337     int* activeNpesInCoarseBal = NULL;
01338 
01339     if(nlevels > 1) {
01340       coarserOctrees = new std::vector<ot::TreeNode> [nlevels-1];
01341       activeStatesInCoarseBal = new bool[nlevels - 1];
01342       activeCommsInCoarseBal = new MPI_Comm[nlevels - 1];
01343       activeNpesInCoarseBal = new int[nlevels - 1];
01344 
01345       //Default value is false so if a processor becomes inactive at some
01346       //level and exits the loop it will still have the correct value for the
01347       //remaining levels 
01348       for(int i = 0; i < (nlevels-1); i++) {
01349         activeStatesInCoarseBal[i] = false;
01350       }//end for i
01351 
01352       MPI_Comm tmpComm1 = comm;
01353       MPI_Comm tmpComm2;
01354       bool iAmActiveForCoarsening = true;
01355       bool repeatLoop = true;
01356       while( (idxOfCoarsestLev < (nlevels-2)) && (repeatLoop) ) {
01357         std::vector<ot::TreeNode> tmpOctree;      
01358         //First coarsen
01359         if (idxOfCoarsestLev == -1) {
01360           //We can skip Partition for the first call to coarsen since
01361           //finestOctree is explicitly partitioned above. 
01362           ot::coarsenOctree(finestOctree, tmpOctree, dim, maxDepth, 
01363               tmpComm1, true, &tmpComm2, &iAmActiveForCoarsening);
01364         } else {
01365           //We cannot skip Partition here since there is no explicit call to
01366           //partition at the end of balancing and after the intial block
01367           //partition, balancing could have introduced a load imbalance across
01368           //the processors
01369           ot::coarsenOctree(coarserOctrees[idxOfCoarsestLev], tmpOctree,
01370               dim, maxDepth, tmpComm1, false, &tmpComm2, &iAmActiveForCoarsening);
01371         }//end if finest level
01372 
01373         idxOfCoarsestLev++;
01374 
01375         //Then balance
01376         if(iAmActiveForCoarsening) {
01377           ot::balanceOctree(tmpOctree, coarserOctrees[idxOfCoarsestLev],
01378               dim, maxDepth, incCorner, tmpComm2, &tmpComm1, &iAmActiveForCoarsening);
01379         }
01380         tmpOctree.clear();
01381 
01382         //All active processors for this level will have the correct comm set
01383         //and that's all we care. We do not care about the comms for inactive
01384         //processors
01385         activeStatesInCoarseBal[idxOfCoarsestLev] = iAmActiveForCoarsening;
01386         activeCommsInCoarseBal[idxOfCoarsestLev] = tmpComm1;
01387 
01388         if(iAmActiveForCoarsening) {
01389           MPI_Comm_size(tmpComm1, (activeNpesInCoarseBal + idxOfCoarsestLev));
01390           if( (activeNpesInCoarseBal[idxOfCoarsestLev] == 1) &&
01391               (coarserOctrees[idxOfCoarsestLev].size() < 500) ) {
01392             repeatLoop = false;
01393           }
01394         } else {
01395           assert(coarserOctrees[idxOfCoarsestLev].empty());
01396           break;
01397         }
01398 
01399       }//end while
01400     }//end if initial nlevels > 1
01401 
01402     PROF_SET_DA_STAGE2_END
01403 #ifdef __PROF_WITH_BARRIER__
01404       MPI_Barrier(comm);
01405 #endif
01406     PROF_SET_DA_STAGE3_BEGIN
01407 
01408       //Only processor 0 is guaranteed to have the correct idxOfCoarsestLev
01409       par::Mpi_Bcast<int>(&idxOfCoarsestLev, 1, 0, comm);
01410 
01411     //Reset nlevels
01412     nlevels = (idxOfCoarsestLev + 2);
01413 
01414     //Need to synchronize activeNpesInCoarseBal across all processors.
01415     //Only processor 0 is guaranteed to be active at all levels. Hence, only it
01416     //will have the correct values at all levels. Inactive processors will have
01417     //junk values.  Similarly, only processor 0 will have the correct
01418     //globalSizes for all levels since only it is active on all levels.
01419     //Inactive processors will have junk values.
01420     if(nlevels > 1) {
01421       par::Mpi_Bcast<int>(activeNpesInCoarseBal, (nlevels - 1), 0, comm);
01422     }
01423 
01424 #ifdef __DEBUG_MG__
01425     char fname[256];
01426     for(int lev = 0; lev < (nlevels - 1); lev++) {
01427       sprintf(fname, "coarseOctBeforeBdy_%d_%d_%d.ot", lev, rank, npes);
01428       ot::writeNodesToFile(fname, coarserOctrees[lev]);
01429     }
01430     sprintf(fname, "finestOctBeforeBdy_%d_%d.ot", rank, npes);
01431     ot::writeNodesToFile(fname, finestOctree);
01432 #endif
01433 
01434 #ifdef __USE_PVT_DA_IN_MG__
01435     //All processors are active on the finest level
01436     std::vector<ot::TreeNode> positiveBoundaries;
01437     ot::addBoundaryNodesType1(finestOctree, positiveBoundaries, dim, maxDepth);
01438 
01439     //update maxdepth
01440     maxDepth = maxDepth + 1;
01441 
01442     //Most processors will not add any positive boundaries. So, there is no need
01443     //to unnecessarily distribute positive boundaries on all procs just for
01444     //sorting. So only the few processors touching the positive boundary will
01445     //participate in the parallel sort
01446     MPI_Comm bdyComm;
01447     par::splitComm2way((positiveBoundaries.empty()), &bdyComm, comm);
01448 
01449     if(!(positiveBoundaries.empty())) {
01450       //Call Sample Sort  
01451       std::vector<ot::TreeNode > tmpVecTN;
01452       par::sampleSort<ot::TreeNode>(positiveBoundaries, tmpVecTN, bdyComm);
01453       positiveBoundaries = tmpVecTN;
01454       tmpVecTN.clear();
01455     }
01456 
01457     par::concatenate<ot::TreeNode>(finestOctree, positiveBoundaries, comm);
01458     positiveBoundaries.clear();
01459 
01460     //Now for all the coarser levels
01461     for (int lev = 0; lev < (nlevels - 1); lev++) {
01462       if(activeStatesInCoarseBal[lev]) {        
01463         ot::addBoundaryNodesType1(coarserOctrees[lev], positiveBoundaries, dim, maxDepth-1);
01464 
01465         //Most processors will not add any positive boundaries. So, there is no need
01466         //to unnecessarily distribute positive boundaries on all procs just for
01467         //sorting. So only the few processors touching the positive boundary will
01468         //participate in the parallel sort
01469         par::splitComm2way((positiveBoundaries.empty()), &bdyComm, activeCommsInCoarseBal[lev]);
01470 
01471         if(!(positiveBoundaries.empty())) {
01472           //Call Sample Sort  
01473           std::vector<ot::TreeNode > tmpVecTN;
01474           par::sampleSort<ot::TreeNode>(positiveBoundaries, tmpVecTN, bdyComm);
01475           positiveBoundaries = tmpVecTN;
01476           tmpVecTN.clear();
01477         }
01478 
01479         par::concatenate<ot::TreeNode>(coarserOctrees[lev], positiveBoundaries, activeCommsInCoarseBal[lev]);
01480         positiveBoundaries.clear();
01481       }
01482     }//end for lev
01483 
01484 #endif
01485 
01486 #ifdef __DEBUG_MG__
01487     for(int lev = 0; lev < (nlevels - 1); lev++) {
01488       sprintf(fname, "coarseOctAfterBdy_%d_%d_%d.ot", lev, rank, npes);
01489       ot::writeNodesToFile(fname, coarserOctrees[lev]);
01490     }
01491     sprintf(fname, "finestOctAfterBdy_%d_%d.ot", rank, npes);
01492     ot::writeNodesToFile(fname, finestOctree);
01493 #endif
01494 
01495 
01496     //All processors should know the global size at each level
01497     DendroIntL* localOctreeSizeForThisLevel = new DendroIntL[nlevels];
01498     DendroIntL* globalOctreeSizeForThisLevel = new DendroIntL[nlevels];
01499     localOctreeSizeForThisLevel[0] = finestOctree.size();
01500     for(int lev = 0; lev < (nlevels - 1); lev++) {
01501       localOctreeSizeForThisLevel[lev + 1] = coarserOctrees[lev].size();
01502     }//end for lev
01503     par::Mpi_Allreduce<DendroIntL>(localOctreeSizeForThisLevel,
01504         globalOctreeSizeForThisLevel, nlevels, MPI_SUM, comm);
01505     delete [] localOctreeSizeForThisLevel;
01506 
01507 #ifdef __DEBUG_MG__
01508     MPI_Barrier(comm);
01509     for(int i = 0; i < (nlevels - 1); i++) {
01510       if(rank < activeNpesInCoarseBal[i]) {
01511         assert(activeStatesInCoarseBal[i]);
01512       } else {
01513         assert(!(activeStatesInCoarseBal[i]));
01514       }
01515     }//end for i
01516     MPI_Barrier(comm);
01517 #endif
01518 
01519     //0 is the finest and (nlevels-1) is the coarsest
01520     int *maxProcsForThisLevel = new int [nlevels];
01521 
01522     for(int i = 0; i < nlevels; i++) {
01523       const DendroIntL THOUSAND = 1000;
01524       if(globalOctreeSizeForThisLevel[i] < (THOUSAND*npes)) {
01525         int maxProcsToUse = (globalOctreeSizeForThisLevel[i]/THOUSAND);
01526         if(maxProcsToUse == 0) {
01527           maxProcsToUse = 1;
01528         }
01529         maxProcsForThisLevel[i] = maxProcsToUse;
01530       } else {
01531         //use all the processors
01532         maxProcsForThisLevel[i] = npes;
01533       }
01534     }//end for i
01535 
01536     //maxProcsForThisLevel was computed above using the final balanced octree
01537     //size for that level. However, the octree itself may be distributed on
01538     //(a) fewer or (b) more processors. The former case occurs when too many
01539     //new octants were generated during the balancing step. The latter case
01540     //occurs when too many extra boundaries were removed. Here we measure the
01541     //discrepancy between the two and decide which one to pick. On one hand if
01542     //we change the number of active processors, then we must call scatter to 
01543     //redistribute the octants. On the other hand if the grain size is much
01544     //larger than what it should be we will unnecessarily increase the cost
01545     //for subsequent operations. We will do this only for the coarsest level,
01546     //this will automatically influence the finer levels in the subsequent
01547     //step. 
01548     if(nlevels > 1) {
01549       if(activeNpesInCoarseBal[nlevels - 2] < maxProcsForThisLevel[nlevels - 1]) {
01550         DendroIntL currentGrainSize = 
01551           (globalOctreeSizeForThisLevel[nlevels - 1]/activeNpesInCoarseBal[nlevels - 2]);
01552 
01553         DendroIntL expectedGrainSize = 
01554           (globalOctreeSizeForThisLevel[nlevels - 1]/maxProcsForThisLevel[nlevels - 1]);
01555 
01556         if(static_cast<double>(currentGrainSize) < 
01557             (1.5*static_cast<double>(expectedGrainSize))) {
01558           maxProcsForThisLevel[nlevels - 1] = activeNpesInCoarseBal[nlevels - 2];
01559         }
01560       }
01561     }
01562 
01563     //avoid repartitioning at every level.
01564     //Loop from coarsest to finest
01565     //If the fine level does not fit on all processors, then
01566     //the coarser levels will not fit on all p either.
01567     //The procs for the coarse level will be >= 1/8 that
01568     //of the finer level and <= the finer level procs. This is because the
01569     //number of elements on the coarse level can be anything from almost same
01570     //(just 8 octants coarsened) to a 8 times reduction in size.  
01571     //So the grain size for the fine octree computed on the coarse grid procs
01572     //will be >= the true grain size and <= 8*(the true grain size) 
01573     //If P_coarse = P_fine, then the cost of the matvec goes up, but there is
01574     //no additional meshing and no scatters during transfers
01575     //If P_coarse < P_fine, then there is an extra fine grid mesh using P_fine
01576     //procs and extra scatters, but the cost of the matvec goes down (due to
01577     //smaller grain size)
01578 
01579     for(int i = (nlevels-2); i >= 0; i--) {
01580       if(maxProcsForThisLevel[i] > maxProcsForThisLevel[i+1]) {
01581         DendroIntL avgGrainSize = (globalOctreeSizeForThisLevel[i]/maxProcsForThisLevel[i]);
01582 
01583         DendroIntL avgGrainSizeUsing1LevCoarserNpes = 
01584           (globalOctreeSizeForThisLevel[i]/maxProcsForThisLevel[i+1]);
01585 
01586         if(static_cast<double>(avgGrainSizeUsing1LevCoarserNpes) <
01587             (1.5*static_cast<double>(avgGrainSize))) {
01588           maxProcsForThisLevel[i] = maxProcsForThisLevel[i+1];
01589         }
01590       } else {
01591         assert(maxProcsForThisLevel[i] == maxProcsForThisLevel[i+1]);
01592       }
01593     }//end for i
01594 
01595 #ifndef __SILENT_MODE__
01596     if(!rank) {
01597       std::cout<<" Using "<<nlevels<<" Multigrid Levels."<<std::endl; 
01598       fflush(stdout);
01599       for(int i = 0; i < nlevels; i++) {
01600         std::cout<<" maxNpes for lev "<< i <<": "<< maxProcsForThisLevel[i] <<std::endl;
01601         fflush(stdout);
01602       }
01603     }
01604 #endif
01605 
01606     PROF_SET_DA_STAGE3_END
01607 #ifdef __PROF_WITH_BARRIER__
01608       MPI_Barrier(comm);
01609 #endif
01610     PROF_SET_DA_STAGE4_BEGIN
01611 
01612       //Create activeComms for each level.
01613       //0 is the finest and (nlevels-1) is the coarsest
01614       MPI_Comm* activeComms = new MPI_Comm[nlevels];
01615 
01616     if(maxProcsForThisLevel[0] == npes) {
01617       activeComms[0] = comm;
01618     } else {
01619 #ifndef __SILENT_MODE__
01620       if(!rank) {
01621         std::cout<<"splitting Comm in SetDA (using comm) "<<npes<<" -> "<<maxProcsForThisLevel[0]<<std::endl; 
01622       }
01623 #endif
01624       par::splitCommUsingSplittingRank(maxProcsForThisLevel[0], activeComms, comm);
01625     }
01626     for(int lev = 1; lev < nlevels; lev++) {
01627       if(maxProcsForThisLevel[lev] == maxProcsForThisLevel[lev - 1]) {
01628         //coarse and fine grid use the same comm
01629         activeComms[lev] = activeComms[lev - 1];
01630       } else {
01631         assert(maxProcsForThisLevel[lev] < maxProcsForThisLevel[lev - 1]);
01632         if(maxProcsForThisLevel[lev] == activeNpesInCoarseBal[lev - 1]) {
01633           //The correct comm for this level was already created during coarsening
01634           activeComms[lev] = activeCommsInCoarseBal[lev - 1];
01635         } else if(maxProcsForThisLevel[lev] < activeNpesInCoarseBal[lev - 1]) {
01636           //The correct comm is a subset of the comm that was already created for this level
01637           if(maxProcsForThisLevel[lev - 1] < activeNpesInCoarseBal[lev - 1]) {
01638 #ifndef __SILENT_MODE__
01639             if(!rank) {
01640               std::cout<<"splitting Comm in SetDA (using fine activeComm) "<<
01641                 maxProcsForThisLevel[lev - 1]<<" -> "<<maxProcsForThisLevel[lev]<<std::endl; 
01642             }
01643 #endif
01644             if(rank < maxProcsForThisLevel[lev - 1]) {
01645               par::splitCommUsingSplittingRank(maxProcsForThisLevel[lev],
01646                   (activeComms + lev), activeComms[lev - 1]);
01647             }
01648           } else {
01649 #ifndef __SILENT_MODE__
01650             if(!rank) {
01651               std::cout<<"splitting Comm in SetDA (using coarseBalComm) "<<
01652                 activeNpesInCoarseBal[lev - 1]<<" -> "<<maxProcsForThisLevel[lev]<<std::endl; 
01653             }
01654 #endif
01655             if(rank < activeNpesInCoarseBal[lev - 1]) {
01656               par::splitCommUsingSplittingRank(maxProcsForThisLevel[lev],
01657                   (activeComms + lev), activeCommsInCoarseBal[lev - 1]);
01658             }
01659           }
01660         } else {
01661 #ifndef __SILENT_MODE__
01662           if(!rank) {
01663             std::cout<<"splitting Comm in SetDA (using fine activeComm) "<<
01664               maxProcsForThisLevel[lev - 1]<<" -> "<<maxProcsForThisLevel[lev]<<std::endl; 
01665           }
01666 #endif
01667           if(rank < maxProcsForThisLevel[lev - 1]) {
01668             par::splitCommUsingSplittingRank(maxProcsForThisLevel[lev],
01669                 (activeComms + lev), activeComms[lev - 1]);
01670           }
01671         }
01672       }
01673     }//end for lev
01674 
01675     PROF_SET_DA_STAGE4_END
01676 #ifdef __PROF_WITH_BARRIER__
01677       MPI_Barrier(comm);
01678 #endif
01679     PROF_SET_DA_STAGE5_BEGIN
01680 
01681       //Distribute the octrees on maxProcsForThisLevel processors
01682       //finestOctree is not empty on any processor and so all processors
01683       //participate in this scatter
01684       if(maxProcsForThisLevel[0] < npes) {
01685         std::vector<ot::TreeNode> tmpOctree;
01686         if(rank < maxProcsForThisLevel[0]) {
01687           DendroIntL avgGlobalOctSize = (globalOctreeSizeForThisLevel[0]/maxProcsForThisLevel[0]);
01688           int extraOcts = (globalOctreeSizeForThisLevel[0] % maxProcsForThisLevel[0]);
01689           if(rank < extraOcts) {
01690             par::scatterValues<ot::TreeNode>(finestOctree, tmpOctree,
01691                 (avgGlobalOctSize+1), comm);
01692           }else {
01693             par::scatterValues<ot::TreeNode>(finestOctree, tmpOctree,
01694                 avgGlobalOctSize, comm);
01695           }
01696         }else {
01697           par::scatterValues<ot::TreeNode>(finestOctree, tmpOctree,
01698               0, comm);
01699         }
01700         finestOctree = tmpOctree;
01701         tmpOctree.clear();
01702       }
01703 
01704     //coarseOctrees[i] is only distributed on activeCommsInCoarseBal[i]
01705     //It is empty on all other processors
01706     //So by using "activeCommsInCoarseBal[i]" instead of "comm", we can skip idle
01707     //processors from participating in the scatter. 
01708     //If "activeCommsInCoarseBal[i]" is different from "comm", it would have
01709     //been created using the function "splitCommUsingSplittingRank" inside
01710     //"CoarsenOctree" or "BalanceOctree". Moreover, this will happen only when
01711     //the the total input size for that function is < 1000*npes. When this
01712     //happens the input would be redistributed to the first few processors.
01713     //"splitCommUsingSplittingRank" preserves the relative order of ranks in
01714     //the old and new comms. Hence, on all active processors the "rank" in
01715     //"comm" and in "activeCommsInCoarseBal[i]" will be the same. Similarly,
01716     //"activeComms" is also created above using "splitCommUsingSplittingRank",
01717     //hence the ranks in "activeComms" and "comm" will also be the same. 
01718 
01719     for(int i = 1; i < nlevels; i++) {
01720       if(activeNpesInCoarseBal[i-1] < maxProcsForThisLevel[i]) {
01721         //This case happens when the grain size using activeNpes is more than
01722         //1.5 times the grain size using maxProcs 
01723         //We need to expand the communicator
01724         //Both active and inactive processors need to participate here
01725         //But not all inactive processors need to participate. We have already
01726         //created the final activeComms above. We can use that here
01727         std::vector<ot::TreeNode> tmpOctree;
01728         if(rank < maxProcsForThisLevel[i]) {
01729           DendroIntL avgGlobalOctSize = (globalOctreeSizeForThisLevel[i]/maxProcsForThisLevel[i]);
01730           int extraOcts = (globalOctreeSizeForThisLevel[i] % maxProcsForThisLevel[i]);
01731           if(rank < extraOcts) {
01732             par::scatterValues<ot::TreeNode>(coarserOctrees[i-1], tmpOctree,
01733                 (avgGlobalOctSize+1), activeComms[i]);
01734           }else {
01735             par::scatterValues<ot::TreeNode>(coarserOctrees[i-1], tmpOctree,
01736                 avgGlobalOctSize, activeComms[i]);
01737           }
01738         }
01739         coarserOctrees[i-1] = tmpOctree;
01740         tmpOctree.clear();
01741       } else if(activeNpesInCoarseBal[i-1] > maxProcsForThisLevel[i]) {
01742         //We need to shrink the communicator
01743         //This case happens when we decide to use the same number of processors
01744         //as the immediate coarser level or too many extra boundaries were
01745         //discarded resulting in new empty processors
01746         //Inactive processors can be skipped here
01747         if(activeStatesInCoarseBal[i-1]) {
01748           std::vector<ot::TreeNode> tmpOctree;
01749           if(rank < maxProcsForThisLevel[i]) {
01750             DendroIntL avgGlobalOctSize = (globalOctreeSizeForThisLevel[i]/maxProcsForThisLevel[i]);
01751             int extraOcts = (globalOctreeSizeForThisLevel[i] % maxProcsForThisLevel[i]);
01752             if(rank < extraOcts) {
01753               par::scatterValues<ot::TreeNode>(coarserOctrees[i-1], tmpOctree,
01754                   (avgGlobalOctSize+1), activeCommsInCoarseBal[i-1]);
01755             }else {
01756               par::scatterValues<ot::TreeNode>(coarserOctrees[i-1], tmpOctree,
01757                   avgGlobalOctSize, activeCommsInCoarseBal[i-1]);
01758             }
01759           }else {
01760             par::scatterValues<ot::TreeNode>(coarserOctrees[i-1], tmpOctree,
01761                 0, activeCommsInCoarseBal[i-1]);
01762           }
01763           coarserOctrees[i-1] = tmpOctree;
01764           tmpOctree.clear();
01765         }//end if active
01766       }//end if expanding or shrinking
01767     }//end for i
01768 
01769 #ifdef __USE_PVT_DA_IN_MG__
01770     //A simple implementation for now. This just calls flagNodesType3 for all
01771     //levels. A smarter implementation would use the fact that regular coarse
01772     //grid nodes remain regular on all finer octrees 
01773     ot::markHangingNodesAtAllLevels(finestOctree, nlevels, coarserOctrees,
01774         activeComms, dim, maxDepth);
01775 #endif
01776 
01777     if(activeStatesInCoarseBal) {
01778       delete [] activeStatesInCoarseBal;
01779       activeStatesInCoarseBal = NULL;
01780     }
01781 
01782     if(activeCommsInCoarseBal) {
01783       delete [] activeCommsInCoarseBal;
01784       activeCommsInCoarseBal = NULL;
01785     }
01786 
01787     if(activeNpesInCoarseBal) {
01788       delete [] activeNpesInCoarseBal;
01789       activeNpesInCoarseBal = NULL;
01790     }
01791 
01792     //nlevels has been corrected if necessary. Create the DAMG now...
01793     DAMG     *tmpDAMG = new DAMG[nlevels];  
01794     for (int i = 0; i < nlevels; i++) {
01795       tmpDAMG[i] = new _p_DAMG;    
01796       tmpDAMG[i]->nlevels  = nlevels - i;
01797       tmpDAMG[i]->totalLevels = nlevels;
01798       tmpDAMG[i]->comm     = comm;
01799       tmpDAMG[i]->user     = user;    
01800       tmpDAMG[i]->initialguess = NULL;
01801       tmpDAMG[i]->suppressedDOF = NULL; 
01802       tmpDAMG[i]->suppressedDOFaux = NULL; 
01803       tmpDAMG[i]->dof = dof;
01804       tmpDAMG[i]->da = NULL;
01805       tmpDAMG[i]->da_aux = NULL;
01806       tmpDAMG[i]->x = NULL;
01807       tmpDAMG[i]->b = NULL;
01808       tmpDAMG[i]->r = NULL;
01809       tmpDAMG[i]->J = NULL;
01810       tmpDAMG[i]->B = NULL;
01811       tmpDAMG[i]->R = NULL;
01812       tmpDAMG[i]->solve = NULL;         
01813       // KSP only 
01814       tmpDAMG[i]->ksp = NULL;             
01815       tmpDAMG[i]->rhs = NULL;
01816     }//end for i  
01817     *damg = tmpDAMG;
01818 
01819     PROF_SET_DA_STAGE5_END
01820 #ifdef __PROF_WITH_BARRIER__
01821       MPI_Barrier(comm);
01822 #endif
01823     PROF_SET_DA_STAGE6_BEGIN
01824 
01825       if(nlevels == 1) {
01826         //Single level only
01827 
01828 #ifndef __USE_PVT_DA_IN_MG__
01829         tmpDAMG[0]->da = new DA(finestOctree, comm, activeComms[0], compressLut, NULL, NULL);
01830 #else
01831         tmpDAMG[0]->da = new DA(1, finestOctree, comm, activeComms[0], compressLut, NULL, NULL);
01832 #endif
01833 
01834         if(!rank) {
01835           int activeNpes = tmpDAMG[0]->da->getNpesActive();
01836           if(activeNpes < npes) {
01837             std::cout<<"WARNING: Some processors will be"<<
01838               " idle even on the finest level."<<
01839               " You might want to use fewer processors."<<std::endl;
01840             fflush(stdout);
01841           }
01842         }
01843 
01844         finestOctree.clear();
01845 
01846         //Free memory
01847         delete [] maxProcsForThisLevel;
01848         maxProcsForThisLevel = NULL;
01849 
01850         delete [] globalOctreeSizeForThisLevel;
01851         globalOctreeSizeForThisLevel = NULL;
01852 
01853         delete [] activeComms;
01854         activeComms = NULL;
01855 
01856         //If initial nlevels was 1, then coarserOctree would never have been
01857         //created. If nlevels was reset to 1 later then coarserOctrees would
01858         //have been created.
01859         if(coarserOctrees != NULL) {
01860           delete [] coarserOctrees;
01861           coarserOctrees = NULL;
01862         }
01863 
01864         PROF_SET_DA_STAGE6_END
01865 
01866           ierr = DAMGSetUp(tmpDAMG);CHKERRQ(ierr); 
01867 
01868         PROF_MG_SET_DA_END
01869       }//special case: single grid  
01870 
01871     //Start with the coarsest octree and mesh. 
01872     std::vector<ot::TreeNode>* blocksPtr = NULL;
01873     ot::DA* newDa = NULL;
01874     while(idxOfCoarsestLev >= 0) {
01875 #ifdef __DEBUG_MG__
01876       MPI_Barrier(comm);
01877       if(!rank) {
01878         std::cout<<"Meshing 1st DA for lev: "<<idxOfCoarsestLev<<std::endl;
01879       }
01880       MPI_Barrier(comm);
01881 #endif
01882 
01883       if(newDa == NULL) {
01884 #ifndef __USE_PVT_DA_IN_MG__
01885         newDa = new DA(coarserOctrees[idxOfCoarsestLev], comm, 
01886             activeComms[idxOfCoarsestLev + 1], compressLut,
01887             blocksPtr, NULL);
01888 #else
01889         newDa = new DA(1, coarserOctrees[idxOfCoarsestLev], comm, 
01890             activeComms[idxOfCoarsestLev + 1], compressLut,
01891             blocksPtr, NULL);
01892 #endif
01893       }
01894 
01895       //The constructor will modify the input octree. So it's useless
01896       //afterwards.
01897       coarserOctrees[idxOfCoarsestLev].clear();
01898       tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da = newDa;
01899       newDa = NULL;
01900 
01901       bool procsNotUsedWell;
01902       if(!rank) {  
01903         int activeNpes = tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da->getNpesActive();
01904         if( (activeNpes < maxProcsForThisLevel[idxOfCoarsestLev+1]) ||
01905             (maxProcsForThisLevel[idxOfCoarsestLev+1] <
01906              maxProcsForThisLevel[idxOfCoarsestLev]) ) {
01907           procsNotUsedWell = true;
01908         } else {
01909           procsNotUsedWell = false;
01910         }
01911       }
01912 
01913       par::Mpi_Bcast<bool>(&procsNotUsedWell, 1, 0, comm);
01914 
01915       //For the mat-vecs, 
01916       //Communication cost: O(dependent)
01917       //Computation cost: O(own + pre-Ghost)
01918       //Heuristic: Give 75% weight to computation and 25% to communication
01919       //load =  0.75*computation + 0.25*communication  
01920       double minMaxLoad[2]; //0 for minLoad, 1 for maxLoad
01921 
01922       if(tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da->iAmActive()) {
01923         ot::DA* currDa = tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da;
01924 
01925         unsigned int localPreGhostSize = currDa->getIdxElementBegin();
01926 
01927         unsigned int localIndependentSize = currDa->getIndependentSize();
01928 
01929         unsigned int localOwnSize = currDa->getElementSize();
01930 
01931         unsigned int localDependentSize = (localOwnSize - localIndependentSize);
01932 
01933 #ifdef __NO_GHOST_LOOP__
01934         double computationLoad = localOwnSize;
01935 #else
01936         double computationLoad = (localOwnSize + localPreGhostSize);
01937 #endif
01938         double communicationLoad = localDependentSize;
01939 
01940         double localLoad = ((0.75*computationLoad) + (0.25*communicationLoad));
01941 
01942         par::Mpi_Reduce<double>(&localLoad, &(minMaxLoad[0]), 1, 
01943             MPI_MIN, 0, currDa->getCommActive());
01944 
01945         par::Mpi_Reduce<double>(&localLoad, &(minMaxLoad[1]), 1, 
01946             MPI_MAX, 0, currDa->getCommActive());
01947       }
01948 
01949       par::Mpi_Bcast<double>(minMaxLoad, 2, 0, comm);
01950 
01951       //Give the finer grids a chance to be well load balanced and
01952       //be distributed on as many procs as possible.
01953       bool needAuxGrid = (procsNotUsedWell ||
01954           (minMaxLoad[1] > (minMaxLoad[0]*loadFac)));
01955 
01956       //Get the coarse grid partition. This will be needed whether or not the
01957       //aux_mesh is required
01958       if(blocksPtr == NULL) {
01959         blocksPtr = new std::vector<ot::TreeNode>;
01960         (*blocksPtr) = tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da->getBlocks();
01961       }
01962 
01963       if(needAuxGrid) {
01964         //Need DA_AUX for the immediate finer octree
01965         //Create DA_AUX for the immediate finer octree using the current
01966         //partition
01967         //We don't want to alter the actual fine octree. So we work with a copy
01968         //instead.
01969         std::vector<ot::TreeNode> fineOctreeCopy;
01970         if(idxOfCoarsestLev > 0) {
01971           fineOctreeCopy = coarserOctrees[idxOfCoarsestLev-1];
01972         }else {
01973           fineOctreeCopy = finestOctree;
01974         }
01975 
01976         //Need to redistribute fineOctreeCopy such that the input is only
01977         //distributed on the processors that are active on the coarse grid.
01978         bool iAmActive = tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da->iAmActive();
01979         MPI_Comm newComm = tmpDAMG[nlevels-(2+idxOfCoarsestLev)]->da->getCommActive();
01980 
01981         DendroIntL mySize = fineOctreeCopy.size();
01982         DendroIntL totalSize = globalOctreeSizeForThisLevel[idxOfCoarsestLev];   
01983 
01984         std::vector<ot::TreeNode> fineOctAfterPart;
01985 
01986         if(rank < maxProcsForThisLevel[idxOfCoarsestLev]) {
01987           if(iAmActive) {
01988             int newRank;
01989             int newNpes;
01990             MPI_Comm_rank(newComm, &newRank);
01991             MPI_Comm_size(newComm, &newNpes);
01992             DendroIntL avgSize = (totalSize / newNpes);
01993             int leftOvers = (totalSize % newNpes);
01994             if(newRank < leftOvers) {
01995               par::scatterValues<ot::TreeNode>(fineOctreeCopy, fineOctAfterPart,
01996                   (avgSize+1), activeComms[idxOfCoarsestLev]);
01997             }else {
01998               par::scatterValues<ot::TreeNode>(fineOctreeCopy, fineOctAfterPart,
01999                   avgSize, activeComms[idxOfCoarsestLev]);
02000             }
02001           }else {
02002             par::scatterValues<ot::TreeNode>(fineOctreeCopy, fineOctAfterPart,
02003                 0, activeComms[idxOfCoarsestLev]);
02004           }//end if active on coarse grid
02005         }//end if active on fine grid
02006 
02007         fineOctreeCopy.clear();
02008 
02009 #ifdef __DEBUG_MG__
02010         MPI_Barrier(comm);
02011         if(!rank) {
02012           std::cout<<"Meshing 2nd DA for lev: "<<idxOfCoarsestLev<<std::endl;
02013         }
02014         MPI_Barrier(comm);
02015 #endif
02016 
02017         //This DA is aligned with the coarser grid
02018 #ifndef __USE_PVT_DA_IN_MG__
02019         newDa = new DA(fineOctAfterPart, comm, newComm, compressLut, blocksPtr, NULL);
02020 #else
02021         newDa = new DA(1, fineOctAfterPart, comm, newComm, compressLut, blocksPtr, NULL);
02022 #endif
02023 
02024         fineOctAfterPart.clear();
02025 
02026         //Re-calculate imbalance. Check if aux mesh was really necessary
02027         //a-priori estimates could be misleading at times...
02028 
02029         if(!procsNotUsedWell) {
02030           if(newDa->iAmActive()) {
02031             ot::DA* currDa = newDa;
02032 
02033             unsigned int localPreGhostSize = currDa->getIdxElementBegin();
02034 
02035             unsigned int localOwnSize = currDa->getElementSize();
02036 
02037             unsigned int localIndependentSize = currDa->getIndependentSize();
02038 
02039             unsigned int localDependentSize = (localOwnSize - localIndependentSize);
02040 
02041 #ifdef __NO_GHOST_LOOP__
02042             double computationLoad = localOwnSize;
02043 #else
02044             double computationLoad = (localOwnSize + localPreGhostSize);
02045 #endif
02046             double communicationLoad = localDependentSize;
02047 
02048             double localLoad = ((0.75*computationLoad) + (0.25*communicationLoad));
02049 
02050             par::Mpi_Reduce<double>(&localLoad, &minMaxLoad[0], 1, 
02051                 MPI_MIN, 0, currDa->getCommActive());
02052 
02053             par::Mpi_Reduce<double>(&localLoad, &minMaxLoad[1], 1, 
02054                 MPI_MAX, 0, currDa->getCommActive());
02055           }
02056 
02057           par::Mpi_Bcast<double>(minMaxLoad, 2, 0, comm);
02058 
02059           needAuxGrid = (minMaxLoad[1] > (minMaxLoad[0]*loadFac));
02060         } else {
02061           needAuxGrid = true;
02062         }
02063 
02064         if(needAuxGrid) {
02065           tmpDAMG[nlevels-(1+idxOfCoarsestLev)]->da_aux = newDa;
02066           newDa = NULL;
02067 
02068           //We don't want to use the same partition for the next finer level.
02069           blocksPtr->clear();
02070           delete blocksPtr;
02071           blocksPtr = NULL;
02072         }
02073       }//end check for aux_mesh 
02074 
02075       idxOfCoarsestLev--;
02076     }//end loop to mesh coarser grids
02077 
02078     //Free the memory for the pointers to the coarser octrees.
02079     delete [] coarserOctrees;
02080     coarserOctrees = NULL;
02081 
02082     delete [] globalOctreeSizeForThisLevel;
02083     globalOctreeSizeForThisLevel = NULL;
02084 
02085     delete [] maxProcsForThisLevel;
02086     maxProcsForThisLevel = NULL;
02087 
02088 #ifdef __DEBUG_MG__
02089     MPI_Barrier(comm);
02090     if(!rank) {
02091       std::cout<<"Meshing finest octree: "<<std::endl;
02092     }
02093     MPI_Barrier(comm);
02094 #endif
02095 
02096     //mesh finest level here
02097     if(newDa == NULL) {
02098 #ifndef __USE_PVT_DA_IN_MG__
02099       newDa = new DA(finestOctree, comm, activeComms[0], compressLut, blocksPtr, NULL);
02100 #else
02101       newDa = new DA(1, finestOctree, comm, activeComms[0], compressLut, blocksPtr, NULL);
02102 #endif
02103     }
02104 
02105 #ifdef __DEBUG_MG__
02106     MPI_Barrier(comm);
02107     if(!rank) {
02108       std::cout<<"Finished meshing finest octree: "<<std::endl;
02109     }
02110     MPI_Barrier(comm);
02111 #endif
02112 
02113     tmpDAMG[nlevels-1]->da = newDa;
02114     newDa = NULL;
02115 
02116     finestOctree.clear();
02117 
02118     delete [] activeComms;
02119     activeComms = NULL;
02120 
02121     if(!rank) {
02122       int activeNpes = tmpDAMG[nlevels-1]->da->getNpesActive();
02123       if(activeNpes < npes) {
02124         std::cout<<"WARNING: Some processors will be idle"
02125           <<" even on the finest level."
02126           <<" You might want to use fewer processors."<<std::endl;
02127         fflush(stdout);
02128       }
02129     }
02130 
02131     if(blocksPtr != NULL) {
02132       blocksPtr->clear();
02133       delete blocksPtr;
02134       blocksPtr = NULL;
02135     }
02136 
02137     PROF_SET_DA_STAGE6_END
02138 
02139       ierr = DAMGSetUp(tmpDAMG); CHKERRQ(ierr); 
02140 
02141     PROF_MG_SET_DA_END
02142 
02143   }//end fn.
02144 
02145   PetscErrorCode DAMGSetUp(DAMG* damg)
02146   {
02147     PROF_MG_SETUP_BEGIN
02148 
02149       int       i,nlevels = damg[0]->nlevels;
02150 
02151 
02152 #ifdef __DEBUG_MG__
02153     int rank;
02154     MPI_Comm_rank(damg[0]->comm,&rank);
02155     if(!rank) {
02156       std::cout<<"In DAMGSetUp."<<std::endl;
02157       fflush(stdout);
02158     }   
02159 #endif
02160 
02161     /* Create work vectors and matrix for each level */
02162     damg[0]->da->createVector(damg[0]->x, false, false, damg[0]->dof);
02163     VecDuplicate(damg[0]->x,&damg[0]->b); 
02164     VecDuplicate(damg[0]->x,&damg[0]->r); 
02165 
02166 #ifdef __DEBUG_MG__
02167     assert(damg[0]->da_aux == NULL);
02168 #endif
02169 
02170     for (i = 1; i < nlevels; i++) {
02171       damg[i]->da->createVector(damg[i]->x, false, false, damg[i]->dof);
02172       VecDuplicate(damg[i]->x,&damg[i]->b); 
02173       VecDuplicate(damg[i]->x,&damg[i]->r); 
02174 
02175       /* Create interpolation/restriction between levels. */  
02176       //nlevels must be atleast 2 for Restriction/Interpolation  
02177       //i is fine, i-1 is coarse
02178       if(damg[i]->da_aux == NULL) {
02179         //Type-1. Assume Coarse and Fine are aligned. Do not use aux.
02180 #ifdef __DEBUG_MG__
02181         if(!rank) {
02182           std::cout<<"Lev: "<<i<<" R-type: 1"<<std::endl;
02183           fflush(stdout);
02184         }
02185 #endif
02186         createInterpolationType1(damg[i-1],damg[i],&damg[i]->R); 
02187       }else {
02188         //Type-2. Use Aux. Coarse and Fine are not aligned.
02189 #ifdef __DEBUG_MG__
02190         if(!rank) {
02191           std::cout<<"Lev: "<<i<<" R-type: 2"<<std::endl;
02192           fflush(stdout);
02193         }
02194 #endif
02195         createInterpolationType2(damg[i-1],damg[i],&damg[i]->R); 
02196       }
02197     }//end for i
02198 
02199     PROF_MG_SETUP_END
02200   }
02201 
02202   /*Matrix-Free Intergrid Transfer Operators 
02203 Type1: Assume Coarse and Fine are aligned. Do not use aux.
02204 Type2: Use Aux. Coarse and Fine are not aligned.
02205 */
02206   PetscErrorCode createInterpolationType1(DAMG damgc, DAMG damgf, Mat *R)
02207   {
02208     PROF_MG_CREATE_RP1_BEGIN
02209 
02210       ot::DA*   dac = damgc->da; 
02211     ot::DA*   daf = damgf->da;  
02212     unsigned int dof = damgc->dof;
02213     MPI_Comm comm = damgc->comm;
02214     unsigned int  mc,mf;
02215     //The size this processor owns ( without ghosts).
02216     mc=dac->getNodeSize();
02217     mf=daf->getNodeSize();
02218     TransferOpData* data = new TransferOpData;
02219 
02220     unsigned int localCoarseIndSize =  dac->getIndependentSize();
02221 
02222     if(dac->iAmActive()) {
02223       par::Mpi_Allreduce<unsigned int>(&localCoarseIndSize, &(data->minIndependentSize), 1,
02224           MPI_MIN, dac->getCommActive());
02225     } else {
02226       data->minIndependentSize = 0;
02227     }
02228 
02229     data->dac = dac;
02230     data->daf = daf;
02231     data->suppressedDOFc = damgc->suppressedDOF;
02232     data->suppressedDOFf = damgf->suppressedDOF;
02233     data->comm = comm;
02234     data->dof = dof;
02235     data->fineTouchedFlags = new std::vector<ot::FineTouchedStatus>;
02236 
02237     data->tmp = NULL;
02238     data->addRtmp = NULL;
02239     data->addPtmp = NULL;
02240 
02241     data->sendSzP = NULL;
02242     data->sendOffP = NULL;
02243     data->recvSzP = NULL;
02244     data->recvOffP = NULL;
02245     data->sendSzR = NULL;
02246     data->sendOffR = NULL;
02247     data->recvSzR = NULL;
02248     data->recvOffR = NULL;
02249 
02250     iC(MatCreateShell(comm, mc*dof ,mf*dof,PETSC_DETERMINE,PETSC_DETERMINE, (void*)(data), R));  
02251     iC(MatShellSetOperation(*R,MATOP_MULT_TRANSPOSE,(void(*)(void))prolongMatVecType1));
02252     iC(MatShellSetOperation(*R,MATOP_MULT,(void(*)(void))restrictMatVecType1));
02253     iC(MatShellSetOperation(*R,MATOP_MULT_ADD,(void(*)(void))addRestrictMatVec));
02254     iC(MatShellSetOperation(*R,MATOP_MULT_TRANSPOSE_ADD,(void(*)(void))addProlongMatVec));
02255     iC(MatShellSetOperation(*R ,MATOP_DESTROY, (void(*)(void)) rpMatDestroy));
02256 
02257     //dummy call to restrict-type-1
02258     data->daf->createVector<ot::FineTouchedStatus >(*(data->fineTouchedFlags), false, false, 1);
02259     dummyRestrictMatVecType1(data);
02260 
02261     PROF_MG_CREATE_RP1_END
02262   }
02263 
02264   PetscErrorCode  createInterpolationType2(DAMG damgc, DAMG damgf, Mat *R)      
02265   {
02266     PROF_MG_CREATE_RP2_BEGIN
02267 
02268       ot::DA*   dac = damgc->da; 
02269     ot::DA*   daf = damgf->da;  
02270     ot::DA*  daf_aux = damgf->da_aux;
02271     unsigned int dof = damgc->dof;
02272     MPI_Comm comm = damgc->comm;
02273     unsigned int  mc,mf;
02274     //The size this processor owns ( without ghosts).
02275     mc=dac->getNodeSize();
02276     mf=daf->getNodeSize();
02277     TransferOpData* data = new TransferOpData;
02278 
02279     unsigned int localCoarseIndSize =  dac->getIndependentSize();
02280 
02281     if(dac->iAmActive()) {
02282       par::Mpi_Allreduce<unsigned int>(&localCoarseIndSize, &(data->minIndependentSize), 1,
02283           MPI_MIN, dac->getCommActive());
02284     } else {
02285       data->minIndependentSize = 0;
02286     }
02287 
02288     data->dac = dac;
02289     data->daf = daf_aux;        
02290     data->suppressedDOFc = damgc->suppressedDOF;
02291     data->suppressedDOFf = damgf->suppressedDOFaux;
02292     data->comm = comm;
02293     data->dof = dof;
02294     data->fineTouchedFlags = new std::vector<ot::FineTouchedStatus>;
02295     daf_aux->createVector(data->tmp, false, false, dof);
02296     data->addRtmp = NULL;
02297     data->addPtmp = NULL;
02298 
02299     data->sendSzP = NULL;
02300     data->sendOffP = NULL;
02301     data->recvSzP = NULL;
02302     data->recvOffP = NULL;
02303     data->sendSzR = NULL;
02304     data->sendOffR = NULL;
02305     data->recvSzR = NULL;
02306     data->recvOffR = NULL;
02307 
02308     iC(MatCreateShell(comm, mc*dof ,mf*dof,PETSC_DETERMINE,PETSC_DETERMINE, (void*)(data), R));  
02309     iC(MatShellSetOperation(*R,MATOP_MULT_TRANSPOSE,(void(*)(void))prolongMatVecType2));
02310     iC(MatShellSetOperation(*R,MATOP_MULT,(void(*)(void))restrictMatVecType2));
02311     iC(MatShellSetOperation(*R,MATOP_MULT_TRANSPOSE_ADD,(void(*)(void))addProlongMatVec));
02312     iC(MatShellSetOperation(*R,MATOP_MULT_ADD,(void(*)(void))addRestrictMatVec));
02313     iC(MatShellSetOperation(*R ,MATOP_DESTROY, (void(*)(void)) rpMatDestroy));
02314 
02315     //dummy call to restrict-type-1.
02316     //Note type2 is not called. Since we don't need to scatter values for the
02317     //dummy loop. In fact, it will be wrong to use scatter values since it will
02318     //create the scatter contexts the first time it is called.
02319     data->daf->createVector<ot::FineTouchedStatus >(*(data->fineTouchedFlags), false, false, 1);
02320     dummyRestrictMatVecType1(data);
02321 
02322     PROF_MG_CREATE_RP2_END
02323   }
02324 
02325   PetscErrorCode  rpMatDestroy(Mat R) {
02326     TransferOpData *data;
02327     PetscFunctionBegin;
02328     iC(MatShellGetContext( R, (void **)&data));
02329     if(data) {
02330       if(data->fineTouchedFlags) { 
02331         data->fineTouchedFlags->clear(); 
02332         delete data->fineTouchedFlags;
02333         data->fineTouchedFlags = NULL;
02334       }
02335       if(data->tmp) { 
02336         iC(VecDestroy(data->tmp));
02337         data->tmp = NULL;
02338       }  
02339       if(data->addRtmp) {
02340         VecDestroy(data->addRtmp);
02341         data->addRtmp = NULL;
02342       }
02343       if(data->addPtmp) {
02344         VecDestroy(data->addPtmp);
02345         data->addPtmp = NULL;
02346       }
02347       if(data->sendSzP) {
02348         delete [] data->sendSzP;
02349         data->sendSzP = NULL;
02350       }
02351       if(data->sendOffP) {
02352         delete [] data->sendOffP;
02353         data->sendOffP = NULL;
02354       }
02355       if(data->recvSzP) {
02356         delete [] data->recvSzP;
02357         data->recvSzP = NULL;
02358       }
02359       if(data->recvOffP) {
02360         delete [] data->recvOffP;
02361         data->recvOffP = NULL;
02362       }
02363       if(data->sendSzR) {
02364         delete [] data->sendSzR;
02365         data->sendSzR = NULL;
02366       }
02367       if(data->sendOffR) {
02368         delete [] data->sendOffR;
02369         data->sendOffR = NULL;
02370       }
02371       if(data->recvSzR) {
02372         delete [] data->recvSzR;
02373         data->recvSzR = NULL;
02374       }
02375       if(data->recvOffR) {
02376         delete [] data->recvOffR;
02377         data->recvOffR = NULL;
02378       }
02379       delete data;
02380       data = NULL;
02381     }
02382 
02383     PetscFunctionReturn(0);
02384   }
02385 
02386   void PrintDAMG(DAMG* damg) {
02387     int  nlevels = damg[0]->nlevels;
02388     int rank;
02389     MPI_Comm_rank(damg[0]->comm,&rank);
02390     for (int i = 0; i < nlevels; i++) {
02391       MPI_Comm activeComm = damg[i]->da->getCommActive();
02392       MPI_Comm activeAuxComm;
02393 
02394       if(damg[i]->da_aux) {
02395         activeAuxComm = damg[i]->da_aux->getCommActive();
02396       }
02397 
02398       DendroIntL globalBlocksSize;
02399       DendroIntL globalBlocksSizeAux;
02400       DendroIntL localBlocksSize = damg[i]->da->getNumBlocks();
02401       DendroIntL localBlocksSizeAux;
02402 
02403       if(damg[i]->da_aux) {
02404         localBlocksSizeAux = damg[i]->da_aux->getNumBlocks();
02405       }
02406 
02407       DendroIntL elemSizeLocal = damg[i]->da->getElementSize();
02408       DendroIntL ghostedElemSizeLocal = damg[i]->da->getGhostedElementSize();
02409       DendroIntL elemSizeLocalAux;      
02410       DendroIntL ghostedElemSizeLocalAux;       
02411 
02412       if(damg[i]->da_aux) {
02413         elemSizeLocalAux = damg[i]->da_aux->getElementSize();
02414         ghostedElemSizeLocalAux = damg[i]->da_aux->getGhostedElementSize();
02415       } 
02416 
02417       DendroIntL elemSizeGlobal;
02418       DendroIntL minElemSize;
02419       DendroIntL maxElemSize;
02420       DendroIntL minGhostedElemSize;
02421       DendroIntL maxGhostedElemSize;
02422 
02423       DendroIntL elemSizeGlobalAux;
02424       DendroIntL minElemSizeAux;
02425       DendroIntL maxElemSizeAux;
02426       DendroIntL minGhostedElemSizeAux;
02427       DendroIntL maxGhostedElemSizeAux;
02428 
02429       if(damg[i]->da->iAmActive()) {
02430         par::Mpi_Reduce<DendroIntL>(&localBlocksSize, &globalBlocksSize, 1, 
02431             MPI_SUM, 0, activeComm);
02432 
02433         par::Mpi_Reduce<DendroIntL>(&elemSizeLocal, &elemSizeGlobal, 1, 
02434             MPI_SUM, 0, activeComm);
02435 
02436         par::Mpi_Reduce<DendroIntL>(&elemSizeLocal, &minElemSize, 1, 
02437             MPI_MIN, 0, activeComm);
02438 
02439         par::Mpi_Reduce<DendroIntL>(&elemSizeLocal, &maxElemSize, 1, 
02440             MPI_MAX, 0, activeComm);
02441 
02442         par::Mpi_Reduce<DendroIntL>(&ghostedElemSizeLocal, &minGhostedElemSize, 1, 
02443             MPI_MIN, 0, activeComm);
02444 
02445         par::Mpi_Reduce<DendroIntL>(&ghostedElemSizeLocal, &maxGhostedElemSize, 1, 
02446             MPI_MAX, 0, activeComm);
02447       }
02448 
02449       if(damg[i]->da_aux) {
02450         if(damg[i]->da_aux->iAmActive()) {
02451           par::Mpi_Reduce<DendroIntL>(&localBlocksSizeAux, &globalBlocksSizeAux, 1, 
02452               MPI_SUM, 0, activeAuxComm);
02453 
02454           par::Mpi_Reduce<DendroIntL>(&elemSizeLocalAux, &elemSizeGlobalAux, 1, 
02455               MPI_SUM, 0, activeAuxComm);
02456 
02457           par::Mpi_Reduce<DendroIntL>(&elemSizeLocalAux, &minElemSizeAux, 1,  
02458               MPI_MIN, 0, activeAuxComm);
02459 
02460           par::Mpi_Reduce<DendroIntL>(&elemSizeLocalAux, &maxElemSizeAux, 1, 
02461               MPI_MAX, 0, activeAuxComm);
02462 
02463           par::Mpi_Reduce<DendroIntL>(&ghostedElemSizeLocalAux, &minGhostedElemSizeAux, 1, 
02464               MPI_MIN, 0, activeAuxComm);
02465 
02466           par::Mpi_Reduce<DendroIntL>(&ghostedElemSizeLocalAux, &maxGhostedElemSizeAux, 1, 
02467               MPI_MAX, 0, activeAuxComm);
02468         }
02469       } 
02470 
02471       DendroIntL nodeSizeLocal = damg[i]->da->getNodeSize();
02472       DendroIntL nodeSizeLocalAux;
02473 
02474       if(damg[i]->da_aux) {
02475         nodeSizeLocalAux = damg[i]->da_aux->getNodeSize();
02476       }
02477 
02478       DendroIntL nodeSizeGlobal;
02479       DendroIntL minNodeSize;
02480       DendroIntL maxNodeSize;
02481 
02482       if(damg[i]->da->iAmActive()) {
02483         par::Mpi_Reduce<DendroIntL>(&nodeSizeLocal, &nodeSizeGlobal, 1, 
02484             MPI_SUM, 0, activeComm);
02485 
02486         par::Mpi_Reduce<DendroIntL>(&nodeSizeLocal, &minNodeSize, 1, 
02487             MPI_MIN, 0, activeComm);
02488 
02489         par::Mpi_Reduce<DendroIntL>(&nodeSizeLocal, &maxNodeSize, 1, 
02490             MPI_MAX, 0, activeComm);
02491       }
02492 
02493       DendroIntL nodeSizeGlobalAux;
02494       DendroIntL minNodeSizeAux;
02495       DendroIntL maxNodeSizeAux;
02496 
02497       if(damg[i]->da_aux) {
02498         if(damg[i]->da_aux->iAmActive()) {
02499           par::Mpi_Reduce<DendroIntL>(&nodeSizeLocalAux, &nodeSizeGlobalAux, 1, 
02500               MPI_SUM, 0, activeAuxComm);
02501 
02502           par::Mpi_Reduce<DendroIntL>(&nodeSizeLocalAux, &minNodeSizeAux, 1, 
02503               MPI_MIN, 0, activeAuxComm);
02504 
02505           par::Mpi_Reduce<DendroIntL>(&nodeSizeLocalAux, &maxNodeSizeAux, 1, 
02506               MPI_MAX, 0, activeAuxComm);
02507         }
02508       }
02509 
02510       unsigned int globalMinLev = getGlobalMinLevel(damg[i]->da);
02511       unsigned int globalMaxLev = getGlobalMaxLevel(damg[i]->da);
02512 
02513       unsigned int globalMinLevAux;
02514       if(damg[i]->da_aux) {     
02515         globalMinLevAux = getGlobalMinLevel(damg[i]->da_aux);
02516       }
02517       unsigned int globalMaxLevAux;
02518       if(damg[i]->da_aux) {
02519         globalMaxLevAux = getGlobalMaxLevel(damg[i]->da_aux);
02520       }
02521 
02522       int activeNpes = damg[i]->da->getNpesActive();
02523       int activeNpesAux;
02524       if(damg[i]->da_aux) {
02525         activeNpesAux = damg[i]->da_aux->getNpesActive();
02526       }
02527       if(!rank) {
02528         std::cout<<"At Level "<<i<<" Elem size: "<<elemSizeGlobal
02529           <<", GlobalBlocksSize: "<<globalBlocksSize
02530           <<", node size: "<<nodeSizeGlobal
02531           <<", minElemSize: "<<minElemSize
02532           <<", maxElemSize: "<<maxElemSize
02533           <<", minGhostedElemSize: "<<minGhostedElemSize
02534           <<", maxGhostedElemSize: "<<maxGhostedElemSize
02535           <<", minNodeSize: "<<minNodeSize
02536           <<", maxNodeSize: "<<maxNodeSize
02537           <<", min Lev: "<<globalMinLev
02538           <<", max Lev: "<<globalMaxLev
02539           <<", Active Npes: "<<activeNpes<<std::endl; 
02540         fflush(stdout);
02541         if(damg[i]->da_aux) {
02542           std::cout<<"At AUX Level "<<i<<" Elem size: "<<elemSizeGlobalAux
02543             <<", GlobalBlocksSize: "<<globalBlocksSizeAux
02544             <<", node size: "<<nodeSizeGlobalAux
02545             <<", minElemSize: "<<minElemSizeAux
02546             <<", maxElemSize: "<<maxElemSizeAux
02547             <<", minGhostedElemSize: "<<minGhostedElemSizeAux
02548             <<", maxGhostedElemSize: "<<maxGhostedElemSizeAux
02549             <<", minNodeSize: "<<minNodeSizeAux
02550             <<", maxNodeSize: "<<maxNodeSizeAux
02551             <<", min Lev: "<<globalMinLevAux
02552             <<", max Lev: "<<globalMaxLevAux
02553             <<", Active Npes: "<<activeNpesAux<<std::endl; 
02554           fflush(stdout);
02555         }
02556       }//end if p0
02557     }//end for i
02558   }//end function
02559 
02560   //Private Functions...
02561 
02562   PetscErrorCode PC_KSP_Shell_SetUp(void* ctx) {
02563 
02564     PROF_PC_KSP_SHELL_SETUP_BEGIN
02565 
02566       PC_KSP_Shell* data = static_cast<PC_KSP_Shell*>(ctx); 
02567 
02568     MPI_Comm commActive = data->commActive;
02569     bool iAmActive = data->iAmActive;
02570 
02571     //This points to the shell itself
02572     PC pc = data->pc;
02573 
02574     Mat Amat;
02575     PCGetOperators(pc, &Amat, NULL, NULL);      
02576     PetscTruth isshell;
02577     PetscTypeCompare((PetscObject)Amat, MATSHELL, &isshell);
02578 
02579     if(!isshell) {
02580       SETERRQ(PETSC_ERR_SUP, " Expected a MATSHELL.");
02581     }
02582 
02583     //Create ksp_private, rhs_private, sol_private,
02584     if(iAmActive) {
02585       Mat Amat_private;
02586       Mat Pmat_private;
02587       MatStructure pFlag;
02588 
02589       if(getPrivateMatricesForKSP_Shell) {
02590         (*getPrivateMatricesForKSP_Shell)(Amat, &Amat_private,
02591             &Pmat_private, &pFlag);
02592       } else {
02593         SETERRQ(PETSC_ERR_USER,
02594             " Expected function to be set:\
02595             getPrivateMatricesForKSP_Shell");
02596       }
02597 
02598       //Sanity Checks
02599       assert(Amat_private != NULL);
02600 
02601       PetscInt globalRowSize;
02602       PetscInt globalColSize;
02603       PetscInt localRowSize;
02604       PetscInt localColSize;
02605 
02606       PetscInt globalRowSizePrivate;
02607       PetscInt globalColSizePrivate;
02608       PetscInt localRowSizePrivate;
02609       PetscInt localColSizePrivate;
02610 
02611       MatGetSize(Amat, &globalRowSize, &globalColSize);
02612       MatGetSize(Amat_private, &globalRowSizePrivate, &globalColSizePrivate);
02613 
02614       MatGetLocalSize(Amat, &localRowSize, &localColSize);
02615       MatGetLocalSize(Amat_private, &localRowSizePrivate, &localColSizePrivate);
02616 
02617       assert(globalRowSize == globalRowSizePrivate);
02618       assert(globalColSize == globalColSizePrivate);
02619 
02620       assert(localRowSize == localRowSizePrivate);
02621       assert(localColSize == localColSizePrivate);
02622 
02623       MPI_Comm privateComm;
02624       PetscObjectGetComm((PetscObject)Amat_private, &privateComm);
02625 
02626       int commCompareResult;
02627 
02628       //PetscHeaderCreate_Private duplicates comm
02629       //instead of a simple copy. So, the comms will not exactly be
02630       //identical. But, they will be equivalent  
02631       MPI_Comm_compare(privateComm, commActive, &commCompareResult);
02632 
02633       assert( (commCompareResult == MPI_CONGRUENT) || (commCompareResult == MPI_IDENT));
02634 
02635       if(pc->setupcalled == 0) {
02636         assert(data->ksp_private == NULL);
02637         assert(data->rhs_private == NULL);
02638         assert(data->sol_private == NULL);
02639       } else {
02640         assert(data->ksp_private != NULL);
02641         assert(data->rhs_private != NULL);
02642         assert(data->sol_private != NULL);
02643       }
02644 
02645       if(pc->setupcalled == 0) {
02646         KSPCreate(commActive, &(data->ksp_private));
02647 
02648         const char *prefix;
02649         PCGetOptionsPrefix(pc, &prefix);
02650 
02651         //These functions also set the correct prefix for the inner pc 
02652         KSPSetOptionsPrefix(data->ksp_private, prefix);
02653         KSPAppendOptionsPrefix(data->ksp_private, "private_");
02654 
02655         //Default Types for KSP and PC
02656         KSPSetType(data->ksp_private, KSPPREONLY);
02657 
02658         PC privatePC;
02659         KSPGetPC(data->ksp_private, &privatePC);
02660         PCSetType(privatePC, PCLU);
02661 
02662         //The command line options get higher precedence.
02663         //This also calls PCSetFromOptions for the private pc internally
02664         KSPSetFromOptions(data->ksp_private);  
02665 
02666         MatGetVecs(Amat_private, &(data->sol_private), &(data->rhs_private));
02667       }
02668 
02669       KSPSetOperators(data->ksp_private, Amat_private, Pmat_private, pFlag);
02670 
02671     } else {
02672       data->sol_private = NULL;
02673       data->rhs_private = NULL;
02674       data->ksp_private = NULL;
02675     }//end if active
02676 
02677     PROF_PC_KSP_SHELL_SETUP_END
02678 
02679   }
02680 
02681   PetscErrorCode PC_KSP_Shell_Destroy(void* ctx) {
02682 
02683     PROF_PC_KSP_SHELL_DESTROY_BEGIN
02684 
02685       PC_KSP_Shell* data = static_cast<PC_KSP_Shell*>(ctx); 
02686 
02687     if(data) {
02688       if(data->ksp_private) {
02689         KSPDestroy(data->ksp_private);
02690         data->ksp_private = NULL;
02691       }
02692 
02693       if(data->rhs_private) {
02694         VecDestroy(data->rhs_private);
02695         data->rhs_private = NULL;
02696       }
02697 
02698       if(data->sol_private) {
02699         VecDestroy(data->sol_private);
02700         data->sol_private = NULL;
02701       }
02702 
02703       delete data;
02704       data = NULL;
02705     }
02706 
02707     PROF_PC_KSP_SHELL_DESTROY_END
02708   }
02709 
02710   PetscErrorCode PC_KSP_Shell_Apply(void* ctx, Vec rhs, Vec sol) {
02711 
02712     PROF_PC_KSP_SHELL_APPLY_BEGIN
02713 
02714       PC_KSP_Shell* data = static_cast<PC_KSP_Shell*>(ctx); 
02715 
02716     if(data->iAmActive) {      
02717       PetscScalar* rhsArray;
02718       PetscScalar* solArray;
02719 
02720       //There are no copies and no mallocs involved.
02721 
02722       VecGetArray(rhs, &rhsArray);
02723       VecGetArray(sol, &solArray);
02724 
02725       VecPlaceArray(data->rhs_private, rhsArray);
02726       VecPlaceArray(data->sol_private, solArray);
02727 
02728       KSPSolve(data->ksp_private, data->rhs_private, data->sol_private);
02729 
02730       VecResetArray(data->rhs_private);
02731       VecResetArray(data->sol_private);
02732 
02733       VecRestoreArray(rhs, &rhsArray);
02734       VecRestoreArray(sol, &solArray);
02735     }
02736 
02737     PROF_PC_KSP_SHELL_APPLY_END
02738   }
02739 
02740   int scatterValues(Vec in, Vec out, PetscInt inSz, PetscInt outSz,
02741       int *& sendSz, int *& sendOff, int *& recvSz, int *& recvOff, MPI_Comm comm ) {
02742 
02743     PROF_MG_SCATTER_BEGIN 
02744 
02745       bool isNew = (sendSz == NULL);
02746 
02747     if(isNew) {
02748       int rank, npes;
02749 
02750       MPI_Comm_size(comm, &npes);
02751       MPI_Comm_rank(comm, &rank);
02752 
02753       MPI_Request request;
02754       MPI_Status status;
02755 
02756       PetscInt inSize, outSize; 
02757 
02758       VecGetLocalSize(in, &inSize);
02759       VecGetLocalSize(out, &outSize);
02760 
02761       assert(inSize == inSz);
02762       assert(outSize == outSz);
02763 
02764       PetscInt  off1 = 0, off2 = 0;
02765       PetscInt* scnIn = NULL;
02766       if(inSz) {
02767         scnIn = new PetscInt [inSz]; 
02768       }
02769 
02770       // perform a local scan first ...
02771       PetscInt zero=0;
02772       if(inSz) {
02773         scnIn[0] = 1;
02774         for (PetscInt i = 1; i < inSz; i++) {
02775           scnIn[i] = scnIn[i-1] + 1;
02776         }//end for
02777         // now scan with the final members of 
02778         par::Mpi_Scan<PetscInt>(scnIn+inSz-1, &off1, 1, MPI_SUM, comm ); 
02779       } else{
02780         par::Mpi_Scan<PetscInt>(&zero, &off1, 1, MPI_SUM, comm ); 
02781       }
02782 
02783       // communicate the offsets ...
02784       if (rank < (npes-1)){
02785         par::Mpi_Issend<PetscInt>( &off1, 1, rank+1, 0, comm, &request );
02786       }
02787       if (rank){
02788         par::Mpi_Recv<PetscInt>( &off2, 1, rank-1, 0, comm, &status );
02789       } else{
02790         off2 = 0; 
02791       }
02792 
02793       // add offset to local array
02794       for (PetscInt i = 0; i < inSz; i++) {
02795         scnIn[i] = scnIn[i] + off2;  // This has the global scan results now ...
02796       }//end for
02797 
02798       //Gather Scan of outCnts
02799       PetscInt* outCnts;
02800       outCnts = new PetscInt[npes];
02801 
02802       if(rank < (npes-1)) {
02803         MPI_Status statusWait;
02804         MPI_Wait(&request, &statusWait);
02805       }
02806 
02807       if( outSz ) {
02808         par::Mpi_Scan<PetscInt>( &outSz, &off1, 1, MPI_SUM, comm ); 
02809       }else {
02810         par::Mpi_Scan<PetscInt>( &zero, &off1, 1, MPI_SUM, comm ); 
02811       }
02812 
02813       par::Mpi_Allgather<PetscInt>( &off1, outCnts, 1, comm);
02814 
02815 #ifdef __DEBUG_MG__
02816       assert(sendSz == NULL);
02817       assert(recvSz == NULL);
02818       assert(sendOff == NULL);
02819       assert(recvOff == NULL);
02820 #endif
02821 
02822       sendSz = new int [npes];
02823       recvSz = new int [npes];
02824       sendOff = new int [npes];
02825       recvOff = new int [npes];
02826 
02827       // compute the partition offsets and sizes so that All2Allv can be performed.
02828       // initialize ...
02829       for (int i = 0; i < npes; i++) {
02830         sendSz[i] = 0;
02831       }
02832 
02833       //The Heart of the algorithm....
02834       //scnIn and outCnts are both sorted 
02835       PetscInt inCnt = 0;
02836       int pCnt = 0;
02837       while( (inCnt < inSz) && (pCnt < npes) ) {
02838         if( scnIn[inCnt] <= outCnts[pCnt]  ) {
02839           sendSz[pCnt]++;
02840           inCnt++;
02841         }else {
02842           pCnt++;
02843         }
02844       }
02845 
02846       // communicate with other procs how many you shall be sending and get how
02847       // many to recieve from whom.
02848       par::Mpi_Alltoall<int>(sendSz, recvSz, 1, comm);
02849 
02850       PetscInt nn=0; // new value of nlSize, ie the local nodes.
02851       for (int i = 0; i < npes; i++) {
02852         nn += recvSz[i];
02853       }
02854 
02855       // compute offsets ...
02856       sendOff[0] = 0;
02857       recvOff[0] = 0;
02858       for (int i=1; i<npes; i++) {
02859         sendOff[i] = sendOff[i-1] + sendSz[i-1];
02860         recvOff[i] = recvOff[i-1] + recvSz[i-1];
02861       }
02862 
02863       assert(nn == outSz);
02864       // clean up...
02865       if(scnIn) {
02866         delete [] scnIn;
02867       }
02868       delete [] outCnts;
02869     }//end if isNew
02870 
02871     // perform All2All  ... 
02872     PetscScalar * inArr;
02873     PetscScalar * outArr;
02874 
02875     VecGetArray(in,&inArr);
02876     VecGetArray(out,&outArr);
02877 
02878     par::Mpi_Alltoallv_sparse<PetscScalar>(inArr, sendSz, sendOff, 
02879         outArr, recvSz, recvOff, comm);
02880 
02881     VecRestoreArray(in,&inArr);
02882     VecRestoreArray(out,&outArr);
02883 
02884     PROF_MG_SCATTER_END 
02885   }//end function
02886 
02887 }//end namespace
02888 
02889 
02890 

Generated on Tue Mar 24 16:14:06 2009 for DENDRO by  doxygen 1.3.9.1