Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

ProlongMatVec.C

Go to the documentation of this file.
00001 
00008 #include "petsc.h"
00009 #include "petscmat.h"
00010 #include "omg.h"
00011 #include "oda.h"
00012 
00013 #ifndef iC
00014 #define iC(fun) {CHKERRQ(fun);}
00015 #endif
00016 
00017 #ifdef __DEBUG__
00018 #ifndef __DEBUG_MG__
00019 #define __DEBUG_MG__
00020 #endif
00021 #endif
00022 
00023 namespace ot {
00024 
00025   extern double **** RmatType2Stencil;
00026   extern double ***** RmatType1Stencil;
00027   extern unsigned short**** VtxMap1; 
00028   extern unsigned short***** VtxMap2; 
00029   extern unsigned short***** VtxMap3; 
00030   extern unsigned short****** VtxMap4; 
00031 
00032   PetscErrorCode addProlongMatVec(Mat R, Vec v1, Vec v2, Vec v3)
00033   {
00034     PetscScalar one = 1.0;
00035     PetscFunctionBegin;
00036     if((v2!=v3) && (v1!=v3)) {
00037       //Note This will fail only if v2==v3 or v1 ==v3!
00038       //(i.e they are identical copies pointing to the same memory location)
00039       iC(MatMultTranspose(R, v1, v3));//v3 = R*v1
00040       iC(VecAXPY(v3,one,v2));//v3 = v3+ v2=v2 + R*v1
00041     }else {
00042       //This is less efficient but failproof.
00043       TransferOpData *data;                     
00044       MatShellGetContext( R, (void **)&data);
00045       Vec tmp = data->addPtmp;
00046       if(tmp == NULL) {
00047         VecDuplicate(v3,&tmp);
00048         data->addPtmp = tmp;
00049       }
00050       iC(MatMultTranspose(R, v1, tmp));//tmp=R'*v1;
00051       iC(VecWAXPY(v3,one,v2,tmp));//v3 = (1*v2)+tmp=v2 + R'*v1
00052     }
00053     PetscFunctionReturn(0);
00054   }
00055 
00056   PetscErrorCode prolongMatVecType2(Mat R, Vec c, Vec f) {              
00057     TransferOpData *data;                       
00058     PetscFunctionBegin;
00059     iC(MatShellGetContext( R, (void **)&data));
00060     MPI_Comm comm = data->comm;
00061     Vec tmp = data->tmp;                                
00062     PetscInt tmpSz;
00063     PetscInt fSz;
00064     iC(VecGetLocalSize(tmp,&tmpSz));
00065     iC(VecGetLocalSize(f,&fSz));
00066     prolongMatVecType1(R,c,tmp);                
00067     scatterValues(tmp, f, tmpSz, fSz, data->sendSzP, data->sendOffP, 
00068         data->recvSzP, data->recvOffP, comm);
00069     PetscFunctionReturn(0);
00070   }
00071 
00072 #define ITLB_SET_VALUE_NO_SUPPRESSED_DOFS {\
00073   farr[fidx+l] += (Rval*carr[cidx+l]);\
00074 }
00075 
00076 #define ITLB_SET_VALUE_SUPPRESSED_DOFS {\
00077   if(!( (suppressedDOFc && suppressedDOFc[cidx+l]) ||\
00078         (suppressedDOFf && suppressedDOFf[fidx+l]) )) {\
00079     farr[fidx+l] += (Rval*carr[cidx+l]);\
00080   }\
00081 }
00082 
00083 #define INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE) {\
00084   /*The fine loop is always Writable, but the coarse loop*/\
00085   /*could be Independent or W_Dependent. Hence the fine counter must*/\
00086   /*be incremented properly to align with the coarse.*/\
00087   Point Cpt = dac->getCurrentOffset();\
00088   while(daf->getCurrentOffset() != Cpt) {\
00089     if(daf->isLUTcompressed()) {\
00090       daf->updateQuotientCounter();\
00091     }\
00092     daf->next<ot::DA_FLAGS::WRITABLE>();\
00093   } \
00094   unsigned char chnMask = dac->getHangingNodeIndex(dac->curr());\
00095   unsigned char cNumCoarse = dac->getChildNumber();\
00096   unsigned int cIndices[8];\
00097   dac->getNodeIndices(cIndices);\
00098   unsigned char ctype = 0;\
00099   GET_ETYPE_BLOCK(ctype,chnMask,cNumCoarse)\
00100   if(daf->getLevel(daf->curr()) == dac->getLevel(dac->curr())) {\
00101     /*The coarse and fine elements are the same,*/\
00102     /*so cNumCoarse = cNumFine. This is type-2*/\
00103     double** type2RmatPtr =  RmatType2Stencil[cNumCoarse][ctype];\
00104     unsigned char fhnMask = daf->getHangingNodeIndex(daf->curr());\
00105     unsigned int fIndices[8];\
00106     daf->getNodeIndices(fIndices);\
00107     for(unsigned int fCtr = 0; fCtr < 8; fCtr++) {\
00108       if(!(fhnMask & (1 << fCtr))) {\
00109         ot::FineTouchedStatus* fineTouchedStatusPtr = (&(fineTouchedFlagsArr[fIndices[fCtr]]));\
00110         unsigned int fidx = fIndices[fCtr]*dof;\
00111         for(unsigned int cCtr = 0; cCtr < 8; cCtr++) {\
00112           /*Read fineTouchedFlagsArr Directly*/\
00113           if( (fineTouchedStatusPtr->flags[fCtr]) & (1 << cCtr) ) {\
00114             unsigned int cidx = cIndices[cCtr]*dof;\
00115             double Rval = type2RmatPtr[cCtr][fCtr];\
00116             for(unsigned int l = 0; l < dof; l++) {\
00117               ITLB_SET_VALUE\
00118             }\
00119           }\
00120         }\
00121       }\
00122     }\
00123     daf->next<ot::DA_FLAGS::WRITABLE>();\
00124   }else {\
00125     for(unsigned char cNumFine = 0; cNumFine < 8; cNumFine++) {\
00126       /*The coarse and fine elements are NOT the same. This is type-1.*/\
00127       /*Loop over each of the 8 children of the coarse element.*/\
00128       /*These are the underlying fine elements.*/\
00129       double** type1RmatPtr =  RmatType1Stencil[cNumCoarse][cNumFine][ctype];\
00130       unsigned char fhnMask = daf->getHangingNodeIndex(daf->curr());\
00131       unsigned int fIndices[8];\
00132       daf->getNodeIndices(fIndices);\
00133       for(unsigned int fCtr = 0; fCtr < 8; fCtr++) {\
00134         if(!(fhnMask & (1 << fCtr))) {\
00135           ot::FineTouchedStatus* fineTouchedStatusPtr = (&(fineTouchedFlagsArr[fIndices[fCtr]]));\
00136           unsigned int fidx = fIndices[fCtr]*dof;\
00137           for(unsigned int cCtr = 0; cCtr < 8; cCtr++) {\
00138             /*Read fineTouchedFlagsArr Directly*/\
00139             if( (fineTouchedStatusPtr->flags[fCtr]) & (1 << cCtr) ) {\
00140               unsigned int cidx = cIndices[cCtr]*dof;\
00141               double Rval = type1RmatPtr[cCtr][fCtr];\
00142               for(unsigned int l = 0; l < dof; l++) {\
00143                 ITLB_SET_VALUE\
00144               }\
00145             }\
00146           }\
00147         }\
00148       }\
00149       daf->next<ot::DA_FLAGS::WRITABLE>();\
00150     }\
00151   }\
00152 }
00153 
00154 PetscErrorCode prolongMatVecType1(Mat R, Vec c, Vec f) {
00155 
00156   PROF_MG_PROLONG_BEGIN 
00157 
00158     TransferOpData *data;
00159   iC(MatShellGetContext(R, (void **)&data));
00160 
00161   unsigned int dof = data->dof;
00162 
00163   //Overlap 60% of independent computation with the first communication and
00164   //40% with the second communication. Since the message size for the fist 
00165   //communication will roughly be 1.25 times that for the second communication
00166   //This is because in the first communication, we exchange both coarse and
00167   //fine grid ghosts. In the second, we only exchange fine grid ghosts.
00168   //I estimate the number of fine grid ghosts to be 4 times that of the coarse
00169   //grid ghosts (1 surface, uniform refinement)
00170   unsigned int fop = 60;
00171 
00172   ot::DA * dac = data->dac;
00173   ot::DA * daf = data->daf;     
00174 
00175   unsigned char* suppressedDOFc = data->suppressedDOFc;
00176   unsigned char* suppressedDOFf = data->suppressedDOFf;
00177 
00178   PetscInt cSz;
00179   iC(VecGetLocalSize(c,&cSz));
00180 
00181   unsigned int fopCnt = (fop*cSz)/(100*dof);
00182 
00183   PetscScalar *farr = NULL;
00184   PetscScalar *carr = NULL;
00185 
00186   std::vector<ot::FineTouchedStatus >* fineTouchedFlags = data->fineTouchedFlags;
00187   ot::FineTouchedStatus* fineTouchedFlagsArr;
00188 
00189   dac->vecGetBuffer(c, carr, false, false, true, dof);//Read-only
00190   daf->vecGetBuffer<ot::FineTouchedStatus >(*fineTouchedFlags,
00191       fineTouchedFlagsArr, false, false, true, 1);//read-only 
00192 
00193   if(dac->iAmActive()) {
00194     dac->ReadFromGhostsBegin<PetscScalar>(carr, dof);           
00195   }
00196 
00197   if(daf->iAmActive()) {
00198     daf->ReadFromGhostsBegin<ot::FineTouchedStatus>(fineTouchedFlagsArr, 1);
00199   }
00200 
00201   VecZeroEntries(f);
00202   daf->vecGetBuffer(f, farr, false, false, false, dof);//Writable
00203 
00204   //Note: If Coarse is Independent, then the corresponding Fine is also independent.
00205   //Hence, overlapping comm with comp is possible.              
00206   //Order of the test condition is important. We want to store
00207   //the info before checking loopCtr.   
00208 
00209   if(dac->iAmActive()) {
00210     unsigned int loopCtr = 0;
00211     if(suppressedDOFc || suppressedDOFf) {
00212       for(dac->init<ot::DA_FLAGS::INDEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>();
00213           ( (daf->currWithInfo() == daf->currWithInfo()) && 
00214             (dac->currWithInfo() < dac->end<ot::DA_FLAGS::INDEPENDENT>()) &&
00215             (loopCtr < fopCnt) );
00216           dac->next<ot::DA_FLAGS::INDEPENDENT>(), loopCtr++) {
00217         INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_SUPPRESSED_DOFS)
00218       }//end Independent loop (overlapping with read from coarse ghosts)
00219     } else {
00220       for(dac->init<ot::DA_FLAGS::INDEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>();
00221           ( (daf->currWithInfo() == daf->currWithInfo()) && 
00222             (dac->currWithInfo() < dac->end<ot::DA_FLAGS::INDEPENDENT>()) &&
00223             (loopCtr < fopCnt) );
00224           dac->next<ot::DA_FLAGS::INDEPENDENT>(), loopCtr++) {
00225         INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_NO_SUPPRESSED_DOFS)
00226       }//end Independent loop (overlapping with read from coarse ghosts)
00227     }
00228   }
00229 
00230   if(dac->iAmActive()) {
00231     dac->ReadFromGhostsEnd<PetscScalar>(carr);
00232   }
00233 
00234   if(daf->iAmActive()) {
00235     daf->ReadFromGhostsEnd<ot::FineTouchedStatus>(fineTouchedFlagsArr);
00236   }
00237 
00238   if(dac->iAmActive()) {
00239     if(suppressedDOFc || suppressedDOFf) {
00240       for(dac->init<ot::DA_FLAGS::W_DEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>();
00241           dac->curr() < dac->end<ot::DA_FLAGS::W_DEPENDENT>(); dac->next<ot::DA_FLAGS::W_DEPENDENT>()) {
00242         INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_SUPPRESSED_DOFS)
00243       }//end dependent loop
00244     } else {
00245       for(dac->init<ot::DA_FLAGS::W_DEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>();
00246           dac->curr() < dac->end<ot::DA_FLAGS::W_DEPENDENT>(); dac->next<ot::DA_FLAGS::W_DEPENDENT>()) {
00247         INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_NO_SUPPRESSED_DOFS)
00248       }//end dependent loop
00249     }
00250   }
00251 
00252   if(daf->iAmActive()) {
00253     daf->WriteToGhostsBegin<PetscScalar>(farr, dof);
00254   }
00255 
00256   if(dac->iAmActive()) {
00257     //Continue Independent loop from where we left off.
00258     if(suppressedDOFc || suppressedDOFf) {
00259       for(dac->init<ot::DA_FLAGS::FROM_STORED>(), daf->init<ot::DA_FLAGS::FROM_STORED>();
00260           dac->curr() < dac->end<ot::DA_FLAGS::INDEPENDENT>(); dac->next<ot::DA_FLAGS::INDEPENDENT>()) {
00261         INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_SUPPRESSED_DOFS)
00262       }//end Independent loop (overlapping with write to fine ghosts) 
00263     } else {
00264       for(dac->init<ot::DA_FLAGS::FROM_STORED>(), daf->init<ot::DA_FLAGS::FROM_STORED>();
00265           dac->curr() < dac->end<ot::DA_FLAGS::INDEPENDENT>(); dac->next<ot::DA_FLAGS::INDEPENDENT>()) {
00266         INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_NO_SUPPRESSED_DOFS)
00267       }//end Independent loop (overlapping with write to fine ghosts) 
00268     }
00269   }
00270 
00271   if(daf->iAmActive()) {
00272     daf->WriteToGhostsEnd<PetscScalar>(farr, dof);
00273   }
00274 
00275   daf->vecRestoreBuffer(f, farr, false, false, false, dof);//Writable 
00276   dac->vecRestoreBuffer(c, carr, false, false, true, dof);//Read-only
00277   daf->vecRestoreBuffer<ot::FineTouchedStatus >(*fineTouchedFlags, fineTouchedFlagsArr, false, false, true, 1);//read-only 
00278 
00279 #ifdef PETSC_USE_LOG
00280   PetscLogFlops(128*dof*(daf->getElementSize()));
00281 #endif
00282 
00283   PROF_MG_PROLONG_END 
00284 }//end Prolong-3
00285 
00286 #undef ITLB_SET_VALUE_NO_SUPPRESSED_DOFS
00287 #undef ITLB_SET_VALUE_SUPPRESSED_DOFS
00288 #undef INTERGRID_TRANSFER_LOOP_BLOCK
00289 
00290 }//end namespace
00291 
00292 

Generated on Tue Mar 24 16:14:06 2009 for DENDRO by  doxygen 1.3.9.1