Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

RestrictMatVec.C

Go to the documentation of this file.
00001 
00008 #include "petsc.h"
00009 #include "petscmat.h"
00010 #include "omg.h"
00011 #include "oda.h"
00012 
00013 #ifndef iC
00014 #define iC(fun) {CHKERRQ(fun);}
00015 #endif
00016 
00017 #ifdef __DEBUG__
00018 #ifndef __DEBUG_MG__
00019 #define __DEBUG_MG__
00020 #endif
00021 #endif
00022 
00023 namespace ot {
00024 
00025   extern double **** RmatType2Stencil;
00026   extern double ***** RmatType1Stencil;
00027   extern unsigned short**** VtxMap1; 
00028   extern unsigned short***** VtxMap2; 
00029   extern unsigned short***** VtxMap3; 
00030   extern unsigned short****** VtxMap4; 
00031 
00032   PetscErrorCode  addRestrictMatVec(Mat R, Vec v1, Vec v2, Vec v3)      
00033   {
00034     PetscScalar one = 1.0;
00035     PetscFunctionBegin;
00036     if((v2!=v3) && (v1!=v3)) {
00037       //Note This will fail only if v2==v3 or v1 ==v3!(i.e they are identical copies pointing to the same memory location)
00038       iC(MatMult(R, v1, v3));//v3 = R*v1
00039       iC(VecAXPY(v3,one,v2));//v3 = v3+ v2=v2 + R*v1
00040     }else {
00041       //This is less efficient but failproof.
00042       TransferOpData *data;
00043       MatShellGetContext( R, (void **)&data);
00044       Vec tmp = data->addRtmp;
00045       if(tmp == NULL) {
00046         VecDuplicate(v3,&tmp);
00047         data->addRtmp = tmp;
00048       }
00049       iC(MatMult(R, v1, tmp));//tmp=R*v1;
00050       iC(VecWAXPY(v3,one,v2,tmp));//v3 = (1*v2)+tmp=v2 + R*v1
00051     }
00052     PetscFunctionReturn(0);
00053   }
00054 
00055   PetscErrorCode   restrictMatVecType2(Mat R, Vec f, Vec c) {
00056     TransferOpData *data;
00057     PetscFunctionBegin;
00058     iC(MatShellGetContext( R, (void **)&data));
00059     MPI_Comm comm = data->comm;
00060     Vec tmp = data->tmp;                
00061     PetscInt tmpSz;
00062     PetscInt fSz;
00063     iC(VecGetLocalSize(tmp,&tmpSz));
00064     iC(VecGetLocalSize(f,&fSz));
00065     scatterValues(f, tmp, fSz, tmpSz, data->sendSzR,
00066         data->sendOffR, data->recvSzR, data->recvOffR, comm);
00067     restrictMatVecType1(R, tmp, c);
00068     PetscFunctionReturn(0);
00069   }
00070 
00071 #define ITLB_SET_VALUE_NO_SUPPRESSED_DOFS {\
00072   carr[cidx+l] += (Rval*farr[fidx+l]);\
00073 }
00074 
00075 #define ITLB_SET_VALUE_SUPPRESSED_DOFS {\
00076   if(!( (suppressedDOFf && suppressedDOFf[fidx+l]) ||\
00077         (suppressedDOFc && suppressedDOFc[cidx+l]) )) {\
00078     carr[cidx+l] += (Rval*farr[fidx+l]);\
00079   }\
00080 }
00081 
00082 #define INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE) {\
00083   /*The fine loop is always Writable, but the coarse loop*/\
00084   /*could be Independent or W_Dependent. Hence the fine counter must*/\
00085   /*be incremented properly to align with the coarse.*/\
00086   Point Cpt = dac->getCurrentOffset();\
00087   while(daf->getCurrentOffset() != Cpt) {\
00088     if(daf->isLUTcompressed()) {\
00089       daf->updateQuotientCounter();\
00090     }\
00091     daf->next<ot::DA_FLAGS::WRITABLE>();\
00092   } \
00093   unsigned char chnMask = dac->getHangingNodeIndex(dac->curr());\
00094   unsigned char cNumCoarse = dac->getChildNumber();\
00095   unsigned int cIndices[8];\
00096   dac->getNodeIndices(cIndices);\
00097   unsigned char ctype = 0;\
00098   GET_ETYPE_BLOCK(ctype,chnMask,cNumCoarse)\
00099   if(daf->getLevel(daf->curr()) == dac->getLevel(dac->curr())) {\
00100     /*The coarse and fine elements are the same,*/\
00101     /*so cNumCoarse = cNumFine. This is type-2*/\
00102     double** type2RmatPtr = RmatType2Stencil[cNumCoarse][ctype];\
00103     unsigned char fhnMask = daf->getHangingNodeIndex(daf->curr());\
00104     unsigned int fIndices[8];\
00105     daf->getNodeIndices(fIndices);\
00106     for(unsigned char fCtr = 0; fCtr < 8; fCtr++) {\
00107       if(!(fhnMask & (1 << fCtr))) {\
00108         ot::FineTouchedStatus* fineTouchedStatusPtr = (&(fineTouchedFlagsArr[fIndices[fCtr]]));\
00109         unsigned int fidx = fIndices[fCtr]*dof;\
00110         for(unsigned char cCtr = 0; cCtr < 8; cCtr++) {\
00111           /*Read fineTouchedFlagsArr Directly*/\
00112           if( (fineTouchedStatusPtr->flags[fCtr]) & (1 << cCtr) ) {\
00113             unsigned int cidx = cIndices[cCtr]*dof;\
00114             double Rval = type2RmatPtr[cCtr][fCtr];\
00115             for(unsigned int l = 0; l < dof; l++) {\
00116               ITLB_SET_VALUE\
00117             }\
00118           }\
00119         }\
00120       }\
00121     }\
00122     daf->next<ot::DA_FLAGS::WRITABLE>();\
00123   }else {\
00124     for(unsigned char cNumFine = 0; cNumFine < 8; cNumFine++) {\
00125       /*The coarse and fine elements are NOT the same. This is type-1.*/\
00126       /*Loop over each of the 8 children of the coarse element.*/\
00127       /*These are the underlying fine elements.*/\
00128       double** type1RmatPtr = RmatType1Stencil[cNumCoarse][cNumFine][ctype];\
00129       unsigned char fhnMask = daf->getHangingNodeIndex(daf->curr());\
00130       unsigned int fIndices[8];\
00131       daf->getNodeIndices(fIndices);\
00132       for(unsigned int fCtr = 0; fCtr < 8; fCtr++) {\
00133         if(!(fhnMask & (1 << fCtr))) {\
00134           ot::FineTouchedStatus* fineTouchedStatusPtr = (&(fineTouchedFlagsArr[fIndices[fCtr]]));\
00135           unsigned int fidx = fIndices[fCtr]*dof;\
00136           for(unsigned int cCtr = 0; cCtr < 8; cCtr++) {\
00137             /*Read fineTouchedFlagsArr Directly*/\
00138             if( (fineTouchedStatusPtr->flags[fCtr]) & (1 << cCtr) ) {\
00139               unsigned int cidx = cIndices[cCtr]*dof;\
00140               double Rval = type1RmatPtr[cCtr][fCtr];\
00141               for(unsigned int l = 0; l < dof; l++) {\
00142                 ITLB_SET_VALUE\
00143               }\
00144             }\
00145           }\
00146         }\
00147       }\
00148       daf->next<ot::DA_FLAGS::WRITABLE>();\
00149     }\
00150   }\
00151 }
00152 
00153 #define INTERGRID_TRANSFER_LOOP_BLOCK_DUMMY {\
00154   /*The fine loop is always Writable, but the coarse loop*/\
00155   /*could be Independent or W_Dependent. Hence the fine counter must*/\
00156   /*be incremented properly to align with the coarse.*/\
00157   Point Cpt = dac->getCurrentOffset();\
00158   while(daf->getCurrentOffset() != Cpt) {\
00159     if(daf->isLUTcompressed()) {\
00160       daf->updateQuotientCounter();\
00161     }\
00162     daf->next<ot::DA_FLAGS::WRITABLE>();\
00163   }\
00164   unsigned char chnMask = dac->getHangingNodeIndex(dac->curr());\
00165   unsigned char cNumCoarse = dac->getChildNumber();\
00166   unsigned int cIndices[8];\
00167   dac->getNodeIndices(cIndices);\
00168   unsigned char ctype = 0;\
00169   GET_ETYPE_BLOCK(ctype,chnMask,cNumCoarse)\
00170   if(daf->getLevel(daf->curr()) == dac->getLevel(dac->curr())) {\
00171     /*The coarse and fine elements are the same,*/\
00172     /*so cNumCoarse = cNumFine. This is type-2*/\
00173     unsigned char fhnMask = daf->getHangingNodeIndex(daf->curr());\
00174     unsigned int fIndices[8];\
00175     daf->getNodeIndices(fIndices);\
00176     for(unsigned char fCtr = 0; fCtr < 8; fCtr++) {\
00177       if(!(fhnMask & (1 << fCtr))) {\
00178         unsigned char thisElemLev = daf->getLevel(daf->curr());\
00179         unsigned char refElemLev = daf->getLevel(fIndices[fCtr]);\
00180         ot::FineTouchedDummyStatus* fineTouchedDummyStatusPtr = \
00181         (&(fineTouchedDummyFlagsArr[fIndices[fCtr]]));\
00182         if(thisElemLev == refElemLev) {\
00183           /*Set the dummy flag for this fine element and fine node pair*/\
00184           /*Since, I deal with this one byte at a time. It is consistent*/\
00185           /* across all computers. Endian issues do not enter. Hence, I*/\
00186           /* choose to manipulate chars instead of shorts. Besides, the*/\
00187           /* size of short is not guaranteed to be 2 bytes on all*/\
00188           /* machines.*/\
00189           /* A char is always 1 byte.*/\
00190           /*1 bit: Entry set or not (flags[2*fCtr][0]) */\
00191           /*1 bit: scalingCtr (flags[2*fCtr][1]) */\
00192           /*2 bits: stencilType (flags[2*fCtr][3,2]) */\
00193           /*3 bits: cNumFine (flags[2*fCtr][6,5,4]) */\
00194           /*3 bits: cNumCoarse (flags[2*fCtr+1][2,1,0]) */\
00195           /*5 bits: ctype (flags[2*fCtr+1][7,6,5,4,3]) */\
00196           fineTouchedDummyStatusPtr->flags[fCtr<<1] = 1;\
00197           fineTouchedDummyStatusPtr->flags[(fCtr<<1)+1] = cNumCoarse;\
00198           fineTouchedDummyStatusPtr->flags[(fCtr<<1)+1] |= (ctype<<3);\
00199         }else {\
00200           unsigned char scalingCtr = 0;\
00201           if(thisElemLev < refElemLev) {\
00202             scalingCtr = 1;\
00203           }\
00204           fineTouchedDummyStatusPtr->flags[fCtr<<1] = 1;\
00205           fineTouchedDummyStatusPtr->flags[fCtr<<1] |= (scalingCtr<<1);\
00206           fineTouchedDummyStatusPtr->flags[fCtr<<1] |= (2<<2);\
00207           fineTouchedDummyStatusPtr->flags[(fCtr<<1)+1] = cNumCoarse;\
00208           fineTouchedDummyStatusPtr->flags[(fCtr<<1)+1] |= (ctype<<3);\
00209         }\
00210       }\
00211     }\
00212     daf->next<ot::DA_FLAGS::WRITABLE>();\
00213   }else {\
00214     for(unsigned char cNumFine = 0; cNumFine < 8; cNumFine++) {\
00215       /*The coarse and fine elements are NOT the same. This is type-1.*/\
00216       /*Loop over each of the 8 children of the coarse element.*/\
00217       /*These are the underlying fine elements.*/\
00218       unsigned char fhnMask = daf->getHangingNodeIndex(daf->curr());\
00219       unsigned int fIndices[8];\
00220       daf->getNodeIndices(fIndices);\
00221       for(unsigned int fCtr = 0; fCtr < 8; fCtr++) {\
00222         if(!(fhnMask & (1 << fCtr))) {\
00223           unsigned char thisElemLev = daf->getLevel(daf->curr());\
00224           unsigned char refElemLev = daf->getLevel(fIndices[fCtr]);\
00225           ot::FineTouchedDummyStatus* fineTouchedDummyStatusPtr = \
00226           (&(fineTouchedDummyFlagsArr[fIndices[fCtr]]));\
00227           if(thisElemLev == refElemLev) {\
00228             fineTouchedDummyStatusPtr->flags[fCtr<<1] = 1;\
00229             fineTouchedDummyStatusPtr->flags[fCtr<<1] |= (1 << 2);\
00230             fineTouchedDummyStatusPtr->flags[fCtr<<1] |= (cNumFine<<4);\
00231             fineTouchedDummyStatusPtr->flags[(fCtr<<1)+1] = cNumCoarse;\
00232             fineTouchedDummyStatusPtr->flags[(fCtr<<1)+1] |= (ctype<<3);\
00233           }else {\
00234             unsigned char scalingCtr = 0;\
00235             if(thisElemLev < refElemLev) {\
00236               scalingCtr = 1;\
00237             }\
00238             fineTouchedDummyStatusPtr->flags[fCtr<<1] = 1;\
00239             fineTouchedDummyStatusPtr->flags[fCtr<<1] |= (scalingCtr<<1);\
00240             fineTouchedDummyStatusPtr->flags[fCtr<<1] |= (3<<2);\
00241             fineTouchedDummyStatusPtr->flags[fCtr<<1] |= (cNumFine<<4);\
00242             fineTouchedDummyStatusPtr->flags[(fCtr<<1)+1] = cNumCoarse;\
00243             fineTouchedDummyStatusPtr->flags[(fCtr<<1)+1] |= (ctype<<3);\
00244           }\
00245         }\
00246       }\
00247       daf->next<ot::DA_FLAGS::WRITABLE>();\
00248     }\
00249   }\
00250 }
00251 
00252 #define ITLB_DUMMY_FCTR_BLOCK1 {\
00253   if(fineTouchedDummyStatusPtr->flags[dummyFctr<<1]) {\
00254     /*12_10 = (00001100)_2*/\
00255     unsigned char dummyStencilType = \
00256     ((12 & (fineTouchedDummyStatusPtr->flags[dummyFctr<<1]))>>2);\
00257     /*7_10 = (00000111)_2*/\
00258     unsigned char dummyCnumCoarse = \
00259     (7 & (fineTouchedDummyStatusPtr->flags[(dummyFctr<<1)+1]));\
00260     /*248_10 = (11111000)_2*/\
00261     unsigned char dummyCtype = \
00262     ((248 & (fineTouchedDummyStatusPtr->flags[(dummyFctr<<1)+1]))>>3);\
00263     switch(dummyStencilType) {\
00264       case 0: {\
00265                 dummyMapPtrs[dummyFctr] = \
00266                 VtxMap1[dummyFctr][dummyCnumCoarse][dummyCtype];\
00267                 break;\
00268               }\
00269       case 1: {\
00270                 /*112_10 = (01110000)_2*/\
00271                 unsigned char dummyCnumFine = \
00272                 ((112 & (fineTouchedDummyStatusPtr->flags[dummyFctr<<1]))>>4);\
00273                 dummyMapPtrs[dummyFctr] = \
00274                 VtxMap2[dummyFctr][dummyCnumFine][dummyCnumCoarse][dummyCtype];\
00275                 break;\
00276               }\
00277       case 2: {\
00278                 /*2_10 = (00000010)_2*/\
00279                 unsigned char dummyScalingCtr = \
00280                 ((2 & (fineTouchedDummyStatusPtr->flags[dummyFctr<<1]))>>1);\
00281                 dummyMapPtrs[dummyFctr] = \
00282                 VtxMap3[dummyFctr-1][dummyScalingCtr][dummyCnumCoarse][dummyCtype];\
00283                 break;\
00284               }\
00285       case 3: {\
00286                 unsigned char dummyScalingCtr = \
00287                 ((2 & (fineTouchedDummyStatusPtr->flags[dummyFctr<<1]))>>1);\
00288                 unsigned char dummyCnumFine = \
00289                 ((112 & (fineTouchedDummyStatusPtr->flags[dummyFctr<<1]))>>4);\
00290                 dummyMapPtrs[dummyFctr] = \
00291                 VtxMap4[dummyFctr-1][dummyScalingCtr][dummyCnumFine][dummyCnumCoarse][dummyCtype];\
00292                 break;\
00293               }\
00294       default: {\
00295                  assert(false);\
00296                }\
00297     }\
00298   }\
00299 }
00300 
00301 #define ITLB_DUMMY_FCTR_BLOCK2 {\
00302   if(dummyMapPtrs[dummyFctr]) {\
00303     for(unsigned char dummyCctr = 0; dummyCctr < 8; dummyCctr++) {\
00304       if(coarseVtxId == dummyMapPtrs[dummyFctr][dummyCctr]) {\
00305         skipThisEntry = true;\
00306         break;\
00307       }\
00308     }\
00309     if(skipThisEntry) {\
00310       break;\
00311     }\
00312   }\
00313 }
00314 
00315 #define ITLB_DUMMY_FINAL_SET_VALUE(nodeNum,idx) {\
00316   if(!(fhnMask & (1 << nodeNum))) {\
00317     ot::FineTouchedDummyStatus* fineTouchedDummyStatusPtr = (&(fineTouchedDummyFlagsArr[idx]));\
00318     typedef unsigned short* ushPtr;\
00319     ushPtr dummyMapPtrs[8];\
00320     for(unsigned char dummyFctr = 0; dummyFctr < 8; dummyFctr++) {\
00321       dummyMapPtrs[dummyFctr] = NULL;\
00322       ITLB_DUMMY_FCTR_BLOCK1\
00323     }\
00324     ot::FineTouchedStatus* fineTouchedStatusPtr = (&(fineTouchedFlagsArr[idx]));\
00325     for(unsigned char fCtr = 0; fCtr < 8; fCtr++) {\
00326       fineTouchedStatusPtr->flags[fCtr] = 0;\
00327       /*Handle negative boundaries*/\
00328       if(dummyMapPtrs[fCtr]) {\
00329         for(unsigned char cCtr = 0; cCtr < 8; cCtr++) {\
00330           /*Can't guarantee that each of the 8 elements*/\
00331           /*surrounding this fine node has a different cNum*/\
00332           /*But, they will have a different fCtr*/\
00333           unsigned short coarseVtxId = dummyMapPtrs[fCtr][cCtr];\
00334           bool skipThisEntry = false;\
00335           for(unsigned char dummyFctr = 0; dummyFctr < fCtr; dummyFctr++) {\
00336             ITLB_DUMMY_FCTR_BLOCK2\
00337           }\
00338           if(!skipThisEntry) {\
00339             fineTouchedStatusPtr->flags[fCtr] |= (1 << cCtr);\
00340           }\
00341         }\
00342       }\
00343     }\
00344   }\
00345 }
00346 
00347 #define INTERGRID_TRANSFER_LOOP_BLOCK_DUMMY_FINAL_W {\
00348   /*To avoid redundant writes, only the element whose anchor is */\
00349   /*the regular fine grid node writes for all the 8 elements */\
00350   /*surrounding this node. Note, that some of these elements */\
00351   /* may be owned by other processors. So all processors take */\
00352   /* care of all the 8 elements surrounding the node they own.*/\
00353   /* The only other problem is with the positive boundary nodes. */\
00354   /* The element whose anchor is this positive boundary node is only */\
00355   /* a pseudo-element and will never be visited while looping through the */\
00356   /* elements. To make things worse, we can have situtations where a */\
00357   /* positive boundary node is owned by one processor and all the */\
00358   /* true  elements that share this node are on different processors. */\
00359   /* Thus a writable loop will never suffice to take care of this */\
00360   /* scenario. Hence, this WRITABLE loop will handle all nodes except */\
00361   /* positive boundaries and a separate ALL loop will handle positive */\
00362   /* boundary nodes alone. NOTE, that unlike the other loops in the */\
00363   /* restriction/prolongation, these two loops are not simultaneous */\
00364   /* loops through both the fine and coarse grids. Looping through the */\
00365   /* fine mesh will suffice.*/\
00366   unsigned char fhnMask = daf->getHangingNodeIndex(daf->curr());\
00367   ITLB_DUMMY_FINAL_SET_VALUE(0,daf->curr())\
00368 }
00369 
00370 #define INTERGRID_TRANSFER_LOOP_BLOCK_DUMMY_FINAL_A {\
00371   unsigned char fBndFlag;\
00372   bool fIsBnd = daf->isBoundaryOctant(&fBndFlag);\
00373   fIsBnd = (fBndFlag > ot::TreeNode::NEG_POS_DEMARCATION);\
00374   if(fIsBnd) {\
00375     unsigned char fhnMask = daf->getHangingNodeIndex(daf->curr());\
00376     unsigned int fIndices[8];\
00377     daf->getNodeIndices(fIndices);\
00378     if(fBndFlag & ot::TreeNode::X_POS_BDY) {\
00379       ITLB_DUMMY_FINAL_SET_VALUE(1,fIndices[1])\
00380     }\
00381     if(fBndFlag & ot::TreeNode::Y_POS_BDY) {\
00382       ITLB_DUMMY_FINAL_SET_VALUE(2,fIndices[2])\
00383     }\
00384     if(fBndFlag & ot::TreeNode::Z_POS_BDY) {\
00385       ITLB_DUMMY_FINAL_SET_VALUE(4,fIndices[4])\
00386     }\
00387     if( (fBndFlag & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY))\
00388         == (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY) ) {\
00389       ITLB_DUMMY_FINAL_SET_VALUE(3,fIndices[3])\
00390     }\
00391     if( (fBndFlag & (ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY))\
00392         == (ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY) ) {\
00393       ITLB_DUMMY_FINAL_SET_VALUE(6,fIndices[6])\
00394     }\
00395     if( (fBndFlag & (ot::TreeNode::Z_POS_BDY + ot::TreeNode::X_POS_BDY))\
00396         == (ot::TreeNode::Z_POS_BDY + ot::TreeNode::X_POS_BDY) ) {\
00397       ITLB_DUMMY_FINAL_SET_VALUE(5,fIndices[5])\
00398     }\
00399     if( (fBndFlag & (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY))\
00400         == (ot::TreeNode::X_POS_BDY + ot::TreeNode::Y_POS_BDY + ot::TreeNode::Z_POS_BDY) ) {\
00401       ITLB_DUMMY_FINAL_SET_VALUE(7,fIndices[7])\
00402     }\
00403   }else {\
00404     if(daf->isLUTcompressed()) {\
00405       daf->updateQuotientCounter();\
00406     }\
00407   }\
00408 }
00409 
00410 PetscErrorCode dummyRestrictMatVecType1(TransferOpData *data) {
00411 
00412   PROF_MG_RESTRICT_DUMMY_BEGIN
00413 
00414     ot::DA * dac = data->dac;
00415   ot::DA * daf = data->daf;
00416 
00417   ot::FineTouchedDummyStatus* fineTouchedDummyFlagsArr;
00418   std::vector<ot::FineTouchedDummyStatus > fineTouchedDummyFlags;
00419 
00420   daf->createVector<ot::FineTouchedDummyStatus >(fineTouchedDummyFlags, false, false, 1);
00421   //The constructor takes care of zeroing out the memory
00422   daf->vecGetBuffer<ot::FineTouchedDummyStatus >(fineTouchedDummyFlags,
00423       fineTouchedDummyFlagsArr, false, false, false, 1);//writable 
00424 
00425   if(dac->iAmActive()) {
00426     for(dac->init<ot::DA_FLAGS::W_DEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>();
00427         dac->curr() < dac->end<ot::DA_FLAGS::W_DEPENDENT>(); dac->next<ot::DA_FLAGS::W_DEPENDENT>()) {
00428       INTERGRID_TRANSFER_LOOP_BLOCK_DUMMY;      
00429     }//end dependent loop
00430   }
00431 
00432   if(daf->iAmActive()) {
00433     daf->WriteToGhostsBegin<ot::FineTouchedDummyStatus>(fineTouchedDummyFlagsArr, 1);
00434   }
00435 
00436   if(dac->iAmActive()) {
00437     //Note: If Coarse is Independent, then the corresponding Fine is also independent.
00438     //Hence, overlapping comm with comp is possible.            
00439     for(dac->init<ot::DA_FLAGS::INDEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>();
00440         dac->curr() < dac->end<ot::DA_FLAGS::INDEPENDENT>(); dac->next<ot::DA_FLAGS::INDEPENDENT>()) {
00441       INTERGRID_TRANSFER_LOOP_BLOCK_DUMMY;      
00442     }//end Independent loop (overlapping with write to coarse ghosts) 
00443   }
00444 
00445   if(daf->iAmActive()) {
00446     daf->WriteToGhostsEnd<ot::FineTouchedDummyStatus >(fineTouchedDummyFlagsArr, 1);
00447   }
00448 
00449   //Take care of the discrepancies across processors.
00450   //It is not sufficient to loop over the dependent elements alone to set the
00451   //status. i.e. setting status for the independent elements can not be
00452   //combined with computing dummystatus. This is because by definition, a
00453   //dependent element is one which has atleast 1 writable and 1 ghost node. But
00454   //we could have cases where the owner of the node is not a dependent element,
00455   //but this node is shared with ghost elements.
00456 
00457   ot::FineTouchedStatus* fineTouchedFlagsArr;
00458   std::vector<ot::FineTouchedStatus >* fineTouchedFlags = data->fineTouchedFlags;
00459 
00460   daf->vecGetBuffer<ot::FineTouchedStatus >(*fineTouchedFlags, fineTouchedFlagsArr,
00461       false, false, false, 1);//writable 
00462 
00463   if(daf->iAmActive()) {
00464     for(daf->init<ot::DA_FLAGS::WRITABLE>(); daf->curr() < daf->end<ot::DA_FLAGS::WRITABLE>(); daf->next<ot::DA_FLAGS::WRITABLE>()) {
00465       INTERGRID_TRANSFER_LOOP_BLOCK_DUMMY_FINAL_W;      
00466     }//end  W loop
00467     for(daf->init<ot::DA_FLAGS::ALL>(); daf->curr() < daf->end<ot::DA_FLAGS::ALL>(); daf->next<ot::DA_FLAGS::ALL>()) {
00468       INTERGRID_TRANSFER_LOOP_BLOCK_DUMMY_FINAL_A;      
00469     }//end  A loop
00470   }
00471 
00472   daf->vecRestoreBuffer<ot::FineTouchedStatus >(*fineTouchedFlags, fineTouchedFlagsArr, 
00473       false, false, false, 1);//writable 
00474 
00475   //THIS IS A HACK FOR EFFICIENCY PURPOSES. Although, the buffer was modified
00476   //there is no need to write the changes back to the vector. This is because
00477   //the dummy vector is only temporary.   
00478   daf->vecRestoreBuffer<ot::FineTouchedDummyStatus >(fineTouchedDummyFlags, fineTouchedDummyFlagsArr, 
00479       false, false, true, 1);//READ-ONLY
00480 
00481   fineTouchedDummyFlags.clear();
00482 
00483   PROF_MG_RESTRICT_DUMMY_END
00484 }//restrict-3
00485 
00486 PetscErrorCode restrictMatVecType1(Mat R, Vec f, Vec c) {
00487 
00488   PROF_MG_RESTRICT_BEGIN
00489 
00490     TransferOpData *data;
00491   iC(MatShellGetContext( R, (void **)&data));
00492 
00493   unsigned int dof = data->dof;
00494 
00495   //Overlap 75% of the independent computation with the first communication and
00496   //25% with the second communication. In the first communication, we exchange
00497   // fine grid ghosts. In the second, we exchange coarse grid ghosts (1/4 of
00498   //fine grid, assuming uniform refinement). So, the
00499   //first comm. is more expensive.
00500   unsigned int fop = 75;
00501 
00502   unsigned char* suppressedDOFc = data->suppressedDOFc;
00503   unsigned char* suppressedDOFf = data->suppressedDOFf;
00504 
00505   ot::DA * dac = data->dac;
00506   ot::DA * daf = data->daf;
00507 
00508   PetscInt cSz;
00509   iC(VecGetLocalSize(c,&cSz));
00510 
00511   unsigned int fopCnt = (fop*cSz)/(100*dof);
00512 
00513   std::vector<ot::FineTouchedStatus >* fineTouchedFlags = data->fineTouchedFlags;
00514   ot::FineTouchedStatus* fineTouchedFlagsArr;
00515 
00516   PetscScalar *farr = NULL;
00517   PetscScalar *carr = NULL;
00518 
00519   daf->vecGetBuffer(f, farr, false, false, true, dof);//Read-only
00520   daf->vecGetBuffer<ot::FineTouchedStatus >(*fineTouchedFlags, 
00521       fineTouchedFlagsArr, false, false, true, 1);//read-only 
00522 
00523   if(daf->iAmActive()) {
00524     daf->ReadFromGhostsBegin<PetscScalar>(farr, dof);
00525     //This communication can be avoided if we store it
00526     daf->ReadFromGhostsBegin<ot::FineTouchedStatus>(fineTouchedFlagsArr, 1);
00527   }
00528 
00529   VecZeroEntries(c);
00530   dac->vecGetBuffer(c, carr, false, false, false, dof);//Writable
00531 
00532   if(dac->iAmActive()) {
00533     //Note: If Coarse is Independent, then the corresponding Fine is also independent.
00534     //Hence, overlapping comm with comp is possible.            
00535     //Order of the test condition is important. We want to store the info before checking loopCtr.               
00536     unsigned int loopCtr = 0;
00537     if(suppressedDOFc || suppressedDOFf) {
00538       for(dac->init<ot::DA_FLAGS::INDEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>();
00539           ( (daf->currWithInfo() == daf->currWithInfo()) && 
00540             (dac->currWithInfo() < dac->end<ot::DA_FLAGS::INDEPENDENT>()) && (loopCtr < fopCnt) );
00541           dac->next<ot::DA_FLAGS::INDEPENDENT>(), loopCtr++) {
00542         INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_SUPPRESSED_DOFS);  
00543       }//end Independent loop (overlapping with read from fine ghosts)
00544     } else {
00545       for(dac->init<ot::DA_FLAGS::INDEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>();
00546           ( (daf->currWithInfo() == daf->currWithInfo()) && 
00547             (dac->currWithInfo() < dac->end<ot::DA_FLAGS::INDEPENDENT>()) && (loopCtr < fopCnt) );
00548           dac->next<ot::DA_FLAGS::INDEPENDENT>(), loopCtr++) {
00549         INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_NO_SUPPRESSED_DOFS);       
00550       }//end Independent loop (overlapping with read from fine ghosts)
00551     }
00552   }
00553 
00554   if(daf->iAmActive()) {
00555     daf->ReadFromGhostsEnd<PetscScalar>(farr);
00556     daf->ReadFromGhostsEnd<ot::FineTouchedStatus>(fineTouchedFlagsArr);
00557   }
00558 
00559   if(dac->iAmActive()) {
00560     if(suppressedDOFc || suppressedDOFf) {
00561       for(dac->init<ot::DA_FLAGS::W_DEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>();
00562           dac->curr() < dac->end<ot::DA_FLAGS::W_DEPENDENT>(); dac->next<ot::DA_FLAGS::W_DEPENDENT>()) {
00563         INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_SUPPRESSED_DOFS);  
00564       }//end dependent loop
00565     } else {
00566       for(dac->init<ot::DA_FLAGS::W_DEPENDENT>(), daf->init<ot::DA_FLAGS::WRITABLE>();
00567           dac->curr() < dac->end<ot::DA_FLAGS::W_DEPENDENT>(); dac->next<ot::DA_FLAGS::W_DEPENDENT>()) {
00568         INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_NO_SUPPRESSED_DOFS);       
00569       }//end dependent loop
00570     }
00571   }
00572 
00573   if(dac->iAmActive()) {
00574     dac->WriteToGhostsBegin<PetscScalar>(carr,  dof);
00575   }
00576 
00577   if(dac->iAmActive()) {
00578     //Continue Independent loop from where we left off.
00579     if(suppressedDOFc || suppressedDOFf) {
00580       for(dac->init<ot::DA_FLAGS::FROM_STORED>(), daf->init<ot::DA_FLAGS::FROM_STORED>();
00581           dac->curr() < dac->end<ot::DA_FLAGS::INDEPENDENT>(); dac->next<ot::DA_FLAGS::INDEPENDENT>()) {
00582         INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_SUPPRESSED_DOFS);  
00583       }//end Independent loop (overlapping with write to coarse ghosts) 
00584     } else {
00585       for(dac->init<ot::DA_FLAGS::FROM_STORED>(), daf->init<ot::DA_FLAGS::FROM_STORED>();
00586           dac->curr() < dac->end<ot::DA_FLAGS::INDEPENDENT>(); dac->next<ot::DA_FLAGS::INDEPENDENT>()) {
00587         INTERGRID_TRANSFER_LOOP_BLOCK(ITLB_SET_VALUE_NO_SUPPRESSED_DOFS);       
00588       }//end Independent loop (overlapping with write to coarse ghosts) 
00589     }
00590   }
00591 
00592   if(dac->iAmActive()) {
00593     dac->WriteToGhostsEnd<PetscScalar>(carr, dof);
00594   }
00595 
00596   daf->vecRestoreBuffer(f, farr, false, false, true, dof);//Read-only
00597   dac->vecRestoreBuffer(c, carr, false, false, false, dof);//Writable  
00598   daf->vecRestoreBuffer<ot::FineTouchedStatus >(*fineTouchedFlags, 
00599       fineTouchedFlagsArr, false, false, true, 1);//read-only 
00600 
00601 #ifdef PETSC_USE_LOG
00602   PetscLogFlops(128*dof*(daf->getElementSize()));
00603 #endif
00604 
00605   PROF_MG_RESTRICT_END
00606 }//restrict-3
00607 
00608 #undef ITLB_SET_VALUE_NO_SUPPRESSED_DOFS
00609 #undef ITLB_SET_VALUE_SUPPRESSED_DOFS
00610 #undef INTERGRID_TRANSFER_LOOP_BLOCK
00611 #undef INTERGRID_TRANSFER_LOOP_BLOCK_DUMMY
00612 #undef ITLB_DUMMY_FCTR_BLOCK1 
00613 #undef ITLB_DUMMY_FCTR_BLOCK2 
00614 #undef ITLB_DUMMY_FINAL_SET_VALUE 
00615 #undef INTERGRID_TRANSFER_LOOP_BLOCK_DUMMY_FINAL_W
00616 #undef INTERGRID_TRANSFER_LOOP_BLOCK_DUMMY_FINAL_A
00617 
00618 }//end namespace
00619 

Generated on Tue Mar 24 16:14:06 2009 for DENDRO by  doxygen 1.3.9.1