Actual source code: matpapt.c

  1: #define PETSCMAT_DLL

  3: /*
  4:   Defines matrix-matrix product routines for pairs of SeqAIJ matrices
  5:           C = P * A * P^T
  6: */

 8:  #include src/mat/impls/aij/seq/aij.h
 9:  #include src/mat/utils/freespace.h

 11: static PetscEvent logkey_matapplypapt          = 0;
 12: static PetscEvent logkey_matapplypapt_symbolic = 0;
 13: static PetscEvent logkey_matapplypapt_numeric  = 0;

 15: /*
 16:      MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
 17:            C = P * A * P^T;

 19:      Note: C is assumed to be uncreated.
 20:            If this is not the case, Destroy C before calling this routine.
 21: */
 24: PetscErrorCode MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C)
 25: {
 26:   /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
 27:   /*        and MatMatMult_SeqAIJ_SeqAIJ_Symbolic.  Perhaps they could be merged nicely. */
 28:   PetscErrorCode     ierr;
 29:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
 30:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
 31:   PetscInt           *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
 32:   PetscInt           *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
 33:   PetscInt           an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N;
 34:   PetscInt           i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
 35:   MatScalar          *ca;

 38:   /* some error checking which could be moved into interface layer */
 39:   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am);
 40:   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an);

 42:   /* Set up timers */
 43:   if (!logkey_matapplypapt_symbolic) {
 45:   }

 48:   /* Create ij structure of P^T */
 49:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

 51:   /* Allocate ci array, arrays for fill computation and */
 52:   /* free space for accumulating nonzero column info */
 53:   PetscMalloc(((pm+1)*1)*sizeof(PetscInt),&ci);
 54:   ci[0] = 0;

 56:   PetscMalloc((2*an+2*pm+1)*sizeof(PetscInt),&padenserow);
 57:   PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(PetscInt));
 58:   pasparserow  = padenserow  + an;
 59:   denserow     = pasparserow + an;
 60:   sparserow    = denserow    + pm;

 62:   /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
 63:   /* This should be reasonable if sparsity of PAPt is similar to that of A. */
 64:   PetscFreeSpaceGet((ai[am]/pn)*pm,&free_space);
 65:   current_space = free_space;

 67:   /* Determine fill for each row of C: */
 68:   for (i=0;i<pm;i++) {
 69:     pnzi  = pi[i+1] - pi[i];
 70:     panzi = 0;
 71:     /* Get symbolic sparse row of PA: */
 72:     for (j=0;j<pnzi;j++) {
 73:       arow = *pj++;
 74:       anzj = ai[arow+1] - ai[arow];
 75:       ajj  = aj + ai[arow];
 76:       for (k=0;k<anzj;k++) {
 77:         if (!padenserow[ajj[k]]) {
 78:           padenserow[ajj[k]]   = -1;
 79:           pasparserow[panzi++] = ajj[k];
 80:         }
 81:       }
 82:     }
 83:     /* Using symbolic row of PA, determine symbolic row of C: */
 84:     paj    = pasparserow;
 85:     cnzi   = 0;
 86:     for (j=0;j<panzi;j++) {
 87:       ptrow = *paj++;
 88:       ptnzj = pti[ptrow+1] - pti[ptrow];
 89:       ptjj  = ptj + pti[ptrow];
 90:       for (k=0;k<ptnzj;k++) {
 91:         if (!denserow[ptjj[k]]) {
 92:           denserow[ptjj[k]] = -1;
 93:           sparserow[cnzi++] = ptjj[k];
 94:         }
 95:       }
 96:     }

 98:     /* sort sparse representation */
 99:     PetscSortInt(cnzi,sparserow);

101:     /* If free space is not available, make more free space */
102:     /* Double the amount of total space in the list */
103:     if (current_space->local_remaining<cnzi) {
104:       PetscFreeSpaceGet(current_space->total_array_size,&current_space);
105:     }

107:     /* Copy data into free space, and zero out dense row */
108:     PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
109:     current_space->array           += cnzi;
110:     current_space->local_used      += cnzi;
111:     current_space->local_remaining -= cnzi;

113:     for (j=0;j<panzi;j++) {
114:       padenserow[pasparserow[j]] = 0;
115:     }
116:     for (j=0;j<cnzi;j++) {
117:       denserow[sparserow[j]] = 0;
118:     }
119:     ci[i+1] = ci[i] + cnzi;
120:   }
121:   /* column indices are in the list of free space */
122:   /* Allocate space for cj, initialize cj, and */
123:   /* destroy list of free space and other temporary array(s) */
124:   PetscMalloc((ci[pm]+1)*sizeof(PetscInt),&cj);
125:   PetscFreeSpaceContiguous(&free_space,cj);
126:   PetscFree(padenserow);
127: 
128:   /* Allocate space for ca */
129:   PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);
130:   PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));
131: 
132:   /* put together the new matrix */
133:   MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);

135:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
136:   /* Since these are PETSc arrays, change flags to free them as necessary. */
137:   c = (Mat_SeqAIJ *)((*C)->data);
138:   c->free_a  = PETSC_TRUE;
139:   c->free_ij = PETSC_TRUE;
140:   c->nonew   = 0;

142:   /* Clean up. */
143:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

146:   return(0);
147: }

149: /*
150:      MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
151:            C = P * A * P^T;
152:      Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
153: */
156: PetscErrorCode MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
157: {
159:   PetscInt       flops=0;
160:   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *) A->data;
161:   Mat_SeqAIJ     *p  = (Mat_SeqAIJ *) P->data;
162:   Mat_SeqAIJ     *c  = (Mat_SeqAIJ *) C->data;
163:   PetscInt       *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
164:   PetscInt       *ci=c->i,*cj=c->j;
165:   PetscInt       an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,cn=C->cmap.N,cm=C->rmap.N;
166:   PetscInt       i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
167:   MatScalar      *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;


171:   /* This error checking should be unnecessary if the symbolic was performed */
172:   if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm,cm);
173:   if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am);
174:   if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an);
175:   if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm, cn);

177:   /* Set up timers */
178:   if (!logkey_matapplypapt_numeric) {
180:   }

183:   PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(PetscInt)),&paa);
184:   PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(PetscInt)));
185:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

187:   paj      = (PetscInt*)(paa + an);
188:   pajdense = paj + an;

190:   for (i=0;i<pm;i++) {
191:     /* Form sparse row of P*A */
192:     pnzi  = pi[i+1] - pi[i];
193:     panzj = 0;
194:     for (j=0;j<pnzi;j++) {
195:       arow = *pj++;
196:       anzj = ai[arow+1] - ai[arow];
197:       ajj  = aj + ai[arow];
198:       aaj  = aa + ai[arow];
199:       for (k=0;k<anzj;k++) {
200:         if (!pajdense[ajj[k]]) {
201:           pajdense[ajj[k]] = -1;
202:           paj[panzj++]     = ajj[k];
203:         }
204:         paa[ajj[k]] += (*pa)*aaj[k];
205:       }
206:       flops += 2*anzj;
207:       pa++;
208:     }

210:     /* Sort the j index array for quick sparse axpy. */
211:     PetscSortInt(panzj,paj);

213:     /* Compute P*A*P^T using sparse inner products. */
214:     /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
215:     cnzi = ci[i+1] - ci[i];
216:     for (j=0;j<cnzi;j++) {
217:       /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
218:       ptcol = *cj++;
219:       ptnzj = pi[ptcol+1] - pi[ptcol];
220:       ptj   = pjj + pi[ptcol];
221:       ptaj  = pta + pi[ptcol];
222:       sum   = 0.;
223:       k1    = 0;
224:       k2    = 0;
225:       while ((k1<panzj) && (k2<ptnzj)) {
226:         if (paj[k1]==ptj[k2]) {
227:           sum += paa[paj[k1++]]*ptaj[k2++];
228:         } else if (paj[k1] < ptj[k2]) {
229:           k1++;
230:         } else /* if (paj[k1] > ptj[k2]) */ {
231:           k2++;
232:         }
233:       }
234:       *ca++ = sum;
235:     }

237:     /* Zero the current row info for P*A */
238:     for (j=0;j<panzj;j++) {
239:       paa[paj[j]]      = 0.;
240:       pajdense[paj[j]] = 0;
241:     }
242:   }

244:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
245:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
246:   PetscLogFlops(flops);
248:   return(0);
249: }
250: 
253: PetscErrorCode MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C)
254: {

258:   if (!logkey_matapplypapt) {
260:   }
262:   MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);
263:   MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);
265:   return(0);
266: }