Actual source code: aijfact.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/aij/seq/aij.h
4: #include src/inline/dot.h
5: #include src/inline/spops.h
6: #include petscbt.h
7: #include src/mat/utils/freespace.h
11: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12: {
15: SETERRQ(PETSC_ERR_SUP,"Code not written");
16: #if !defined(PETSC_USE_DEBUG)
17: return(0);
18: #endif
19: }
22: #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
26: #endif
30: /* ------------------------------------------------------------
32: This interface was contribed by Tony Caola
34: This routine is an interface to the pivoting drop-tolerance
35: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36: SPARSEKIT2.
38: The SPARSEKIT2 routines used here are covered by the GNU
39: copyright; see the file gnu in this directory.
41: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42: help in getting this routine ironed out.
44: The major drawback to this routine is that if info->fill is
45: not large enough it fails rather than allocating more space;
46: this can be fixed by hacking/improving the f2c version of
47: Yousef Saad's code.
49: ------------------------------------------------------------
50: */
51: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
52: {
53: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
55: SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56: You can obtain the drop tolerance routines by installing PETSc from\n\
57: www.mcs.anl.gov/petsc\n");
58: #else
59: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
60: IS iscolf,isicol,isirow;
61: PetscTruth reorder;
62: PetscErrorCode ierr,sierr;
63: PetscInt *c,*r,*ic,i,n = A->rmap.n;
64: PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65: PetscInt *ordcol,*iwk,*iperm,*jw;
66: PetscInt jmax,lfill,job,*o_i,*o_j;
67: PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
68: PetscReal af;
72: if (info->dt == PETSC_DEFAULT) info->dt = .005;
73: if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
74: if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01;
75: if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz;
76: lfill = (PetscInt)(info->dtcount/2.0);
77: jmax = (PetscInt)(info->fill*a->nz);
80: /* ------------------------------------------------------------
81: If reorder=.TRUE., then the original matrix has to be
82: reordered to reflect the user selected ordering scheme, and
83: then de-reordered so it is in it's original format.
84: Because Saad's dperm() is NOT in place, we have to copy
85: the original matrix and allocate more storage. . .
86: ------------------------------------------------------------
87: */
89: /* set reorder to true if either isrow or iscol is not identity */
90: ISIdentity(isrow,&reorder);
91: if (reorder) {ISIdentity(iscol,&reorder);}
92: reorder = PetscNot(reorder);
94:
95: /* storage for ilu factor */
96: PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
97: PetscMalloc(jmax*sizeof(PetscInt),&new_j);
98: PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
99: PetscMalloc(n*sizeof(PetscInt),&ordcol);
101: /* ------------------------------------------------------------
102: Make sure that everything is Fortran formatted (1-Based)
103: ------------------------------------------------------------
104: */
105: for (i=old_i[0];i<old_i[n];i++) {
106: old_j[i]++;
107: }
108: for(i=0;i<n+1;i++) {
109: old_i[i]++;
110: };
111:
113: if (reorder) {
114: ISGetIndices(iscol,&c);
115: ISGetIndices(isrow,&r);
116: for(i=0;i<n;i++) {
117: r[i] = r[i]+1;
118: c[i] = c[i]+1;
119: }
120: PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
121: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
122: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
123: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124: for (i=0;i<n;i++) {
125: r[i] = r[i]-1;
126: c[i] = c[i]-1;
127: }
128: ISRestoreIndices(iscol,&c);
129: ISRestoreIndices(isrow,&r);
130: o_a = old_a2;
131: o_j = old_j2;
132: o_i = old_i2;
133: } else {
134: o_a = old_a;
135: o_j = old_j;
136: o_i = old_i;
137: }
139: /* ------------------------------------------------------------
140: Call Saad's ilutp() routine to generate the factorization
141: ------------------------------------------------------------
142: */
144: PetscMalloc(2*n*sizeof(PetscInt),&iperm);
145: PetscMalloc(2*n*sizeof(PetscInt),&jw);
146: PetscMalloc(n*sizeof(PetscScalar),&w);
148: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149: if (sierr) {
150: switch (sierr) {
151: case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
152: case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
153: case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154: case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155: case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156: default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157: }
158: }
160: PetscFree(w);
161: PetscFree(jw);
163: /* ------------------------------------------------------------
164: Saad's routine gives the result in Modified Sparse Row (msr)
165: Convert to Compressed Sparse Row format (csr)
166: ------------------------------------------------------------
167: */
169: PetscMalloc(n*sizeof(PetscScalar),&wk);
170: PetscMalloc((n+1)*sizeof(PetscInt),&iwk);
172: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
174: PetscFree(iwk);
175: PetscFree(wk);
177: if (reorder) {
178: PetscFree(old_a2);
179: PetscFree(old_j2);
180: PetscFree(old_i2);
181: } else {
182: /* fix permutation of old_j that the factorization introduced */
183: for (i=old_i[0]; i<old_i[n]; i++) {
184: old_j[i-1] = iperm[old_j[i-1]-1];
185: }
186: }
188: /* get rid of the shift to indices starting at 1 */
189: for (i=0; i<n+1; i++) {
190: old_i[i]--;
191: }
192: for (i=old_i[0];i<old_i[n];i++) {
193: old_j[i]--;
194: }
195:
196: /* Make the factored matrix 0-based */
197: for (i=0; i<n+1; i++) {
198: new_i[i]--;
199: }
200: for (i=new_i[0];i<new_i[n];i++) {
201: new_j[i]--;
202: }
204: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205: /*-- permute the right-hand-side and solution vectors --*/
206: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
207: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
208: ISGetIndices(isicol,&ic);
209: for(i=0; i<n; i++) {
210: ordcol[i] = ic[iperm[i]-1];
211: };
212: ISRestoreIndices(isicol,&ic);
213: ISDestroy(isicol);
215: PetscFree(iperm);
217: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
218: PetscFree(ordcol);
220: /*----- put together the new matrix -----*/
222: MatCreate(A->comm,fact);
223: MatSetSizes(*fact,n,n,n,n);
224: MatSetType(*fact,A->type_name);
225: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
226: (*fact)->factor = FACTOR_LU;
227: (*fact)->assembled = PETSC_TRUE;
229: b = (Mat_SeqAIJ*)(*fact)->data;
230: b->free_a = PETSC_TRUE;
231: b->free_ij = PETSC_TRUE;
232: b->sorted = PETSC_FALSE;
233: b->singlemalloc = PETSC_FALSE;
234: b->a = new_a;
235: b->j = new_j;
236: b->i = new_i;
237: b->ilen = 0;
238: b->imax = 0;
239: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
240: b->row = isirow;
241: b->col = iscolf;
242: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
243: b->maxnz = b->nz = new_i[n];
244: MatMarkDiagonal_SeqAIJ(*fact);
245: (*fact)->info.factor_mallocs = 0;
247: af = ((double)b->nz)/((double)a->nz) + .001;
248: PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);
249: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
250: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
251: PetscInfo(A,"for best performance.\n");
253: MatILUDTFactor_Inode(A,isrow,iscol,info,fact);
255: return(0);
256: #endif
257: }
261: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
262: {
263: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
264: IS isicol;
265: PetscErrorCode ierr;
266: PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
267: PetscInt *bi,*bj,*ajtmp;
268: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
269: PetscReal f;
270: PetscInt nlnk,*lnk,k,**bi_ptr;
271: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
272: PetscBT lnkbt;
275: if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
276: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
277: ISGetIndices(isrow,&r);
278: ISGetIndices(isicol,&ic);
280: /* get new row pointers */
281: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
282: bi[0] = 0;
284: /* bdiag is location of diagonal in factor */
285: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
286: bdiag[0] = 0;
288: /* linked list for storing column indices of the active row */
289: nlnk = n + 1;
290: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
292: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
294: /* initial FreeSpace size is f*(ai[n]+1) */
295: f = info->fill;
296: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
297: current_space = free_space;
299: for (i=0; i<n; i++) {
300: /* copy previous fill into linked list */
301: nzi = 0;
302: nnz = ai[r[i]+1] - ai[r[i]];
303: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
304: ajtmp = aj + ai[r[i]];
305: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
306: nzi += nlnk;
308: /* add pivot rows into linked list */
309: row = lnk[n];
310: while (row < i) {
311: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
312: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
313: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
314: nzi += nlnk;
315: row = lnk[row];
316: }
317: bi[i+1] = bi[i] + nzi;
318: im[i] = nzi;
320: /* mark bdiag */
321: nzbd = 0;
322: nnz = nzi;
323: k = lnk[n];
324: while (nnz-- && k < i){
325: nzbd++;
326: k = lnk[k];
327: }
328: bdiag[i] = bi[i] + nzbd;
330: /* if free space is not available, make more free space */
331: if (current_space->local_remaining<nzi) {
332: nnz = (n - i)*nzi; /* estimated and max additional space needed */
333: PetscFreeSpaceGet(nnz,¤t_space);
334: reallocs++;
335: }
337: /* copy data into free space, then initialize lnk */
338: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
339: bi_ptr[i] = current_space->array;
340: current_space->array += nzi;
341: current_space->local_used += nzi;
342: current_space->local_remaining -= nzi;
343: }
344: #if defined(PETSC_USE_INFO)
345: if (ai[n] != 0) {
346: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
347: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
348: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
349: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
350: PetscInfo(A,"for best performance.\n");
351: } else {
352: PetscInfo(A,"Empty matrix\n");
353: }
354: #endif
356: ISRestoreIndices(isrow,&r);
357: ISRestoreIndices(isicol,&ic);
359: /* destroy list of free space and other temporary array(s) */
360: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
361: PetscFreeSpaceContiguous(&free_space,bj);
362: PetscLLDestroy(lnk,lnkbt);
363: PetscFree2(bi_ptr,im);
365: /* put together the new matrix */
366: MatCreate(A->comm,B);
367: MatSetSizes(*B,n,n,n,n);
368: MatSetType(*B,A->type_name);
369: MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);
370: PetscLogObjectParent(*B,isicol);
371: b = (Mat_SeqAIJ*)(*B)->data;
372: b->free_a = PETSC_TRUE;
373: b->free_ij = PETSC_TRUE;
374: b->singlemalloc = PETSC_FALSE;
375: PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
376: b->j = bj;
377: b->i = bi;
378: b->diag = bdiag;
379: b->ilen = 0;
380: b->imax = 0;
381: b->row = isrow;
382: b->col = iscol;
383: PetscObjectReference((PetscObject)isrow);
384: PetscObjectReference((PetscObject)iscol);
385: b->icol = isicol;
386: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
388: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
389: PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
390: b->maxnz = b->nz = bi[n] ;
392: (*B)->factor = FACTOR_LU;
393: (*B)->info.factor_mallocs = reallocs;
394: (*B)->info.fill_ratio_given = f;
396: if (ai[n] != 0) {
397: (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
398: } else {
399: (*B)->info.fill_ratio_needed = 0.0;
400: }
401: MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);
402: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
403: return(0);
404: }
406: /*
407: Trouble in factorization, should we dump the original matrix?
408: */
411: PetscErrorCode MatFactorDumpMatrix(Mat A)
412: {
414: PetscTruth flg;
417: PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);
418: if (flg) {
419: PetscViewer viewer;
420: char filename[PETSC_MAX_PATH_LEN];
422: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
423: PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);
424: MatView(A,viewer);
425: PetscViewerDestroy(viewer);
426: }
427: return(0);
428: }
430: /* ----------------------------------------------------------- */
433: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
434: {
435: Mat C=*B;
436: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
437: IS isrow = b->row,isicol = b->icol;
439: PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
440: PetscInt *ajtmp,*bjtmp,nz,row,*ics;
441: PetscInt *diag_offset = b->diag,diag,*pj;
442: PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps;
443: PetscScalar d;
444: PetscReal rs;
445: LUShift_Ctx sctx;
446: PetscInt newshift,*ddiag;
449: ISGetIndices(isrow,&r);
450: ISGetIndices(isicol,&ic);
451: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
452: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
453: rtmps = rtmp; ics = ic;
455: sctx.shift_top = 0;
456: sctx.nshift_max = 0;
457: sctx.shift_lo = 0;
458: sctx.shift_hi = 0;
460: /* if both shift schemes are chosen by user, only use info->shiftpd */
461: if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
462: if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
463: PetscInt *aai = a->i;
464: ddiag = a->diag;
465: sctx.shift_top = 0;
466: for (i=0; i<n; i++) {
467: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
468: d = (a->a)[ddiag[i]];
469: rs = -PetscAbsScalar(d) - PetscRealPart(d);
470: v = a->a+aai[i];
471: nz = aai[i+1] - aai[i];
472: for (j=0; j<nz; j++)
473: rs += PetscAbsScalar(v[j]);
474: if (rs>sctx.shift_top) sctx.shift_top = rs;
475: }
476: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
477: sctx.shift_top *= 1.1;
478: sctx.nshift_max = 5;
479: sctx.shift_lo = 0.;
480: sctx.shift_hi = 1.;
481: }
483: sctx.shift_amount = 0;
484: sctx.nshift = 0;
485: do {
486: sctx.lushift = PETSC_FALSE;
487: for (i=0; i<n; i++){
488: nz = bi[i+1] - bi[i];
489: bjtmp = bj + bi[i];
490: for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
492: /* load in initial (unfactored row) */
493: nz = a->i[r[i]+1] - a->i[r[i]];
494: ajtmp = a->j + a->i[r[i]];
495: v = a->a + a->i[r[i]];
496: for (j=0; j<nz; j++) {
497: rtmp[ics[ajtmp[j]]] = v[j];
498: }
499: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
501: row = *bjtmp++;
502: while (row < i) {
503: pc = rtmp + row;
504: if (*pc != 0.0) {
505: pv = b->a + diag_offset[row];
506: pj = b->j + diag_offset[row] + 1;
507: multiplier = *pc / *pv++;
508: *pc = multiplier;
509: nz = bi[row+1] - diag_offset[row] - 1;
510: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
511: PetscLogFlops(2*nz);
512: }
513: row = *bjtmp++;
514: }
515: /* finished row so stick it into b->a */
516: pv = b->a + bi[i] ;
517: pj = b->j + bi[i] ;
518: nz = bi[i+1] - bi[i];
519: diag = diag_offset[i] - bi[i];
520: rs = 0.0;
521: for (j=0; j<nz; j++) {
522: pv[j] = rtmps[pj[j]];
523: if (j != diag) rs += PetscAbsScalar(pv[j]);
524: }
526: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
527: sctx.rs = rs;
528: sctx.pv = pv[diag];
529: MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);
530: if (newshift == 1) break;
531: }
533: if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
534: /*
535: * if no shift in this attempt & shifting & started shifting & can refine,
536: * then try lower shift
537: */
538: sctx.shift_hi = info->shift_fraction;
539: info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
540: sctx.shift_amount = info->shift_fraction * sctx.shift_top;
541: sctx.lushift = PETSC_TRUE;
542: sctx.nshift++;
543: }
544: } while (sctx.lushift);
546: /* invert diagonal entries for simplier triangular solves */
547: for (i=0; i<n; i++) {
548: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
549: }
551: PetscFree(rtmp);
552: ISRestoreIndices(isicol,&ic);
553: ISRestoreIndices(isrow,&r);
554: C->factor = FACTOR_LU;
555: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
556: C->assembled = PETSC_TRUE;
557: PetscLogFlops(C->cmap.n);
558: if (sctx.nshift){
559: if (info->shiftnz) {
560: PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
561: } else if (info->shiftpd) {
562: PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
563: }
564: }
565: return(0);
566: }
570: PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
571: {
573: A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
574: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
575: return(0);
576: }
579: /* ----------------------------------------------------------- */
582: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
583: {
585: Mat C;
588: MatLUFactorSymbolic(A,row,col,info,&C);
589: MatLUFactorNumeric(A,info,&C);
590: MatHeaderCopy(A,C);
591: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
592: return(0);
593: }
594: /* ----------------------------------------------------------- */
597: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
598: {
599: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
600: IS iscol = a->col,isrow = a->row;
602: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
603: PetscInt nz,*rout,*cout;
604: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
607: if (!n) return(0);
609: VecGetArray(bb,&b);
610: VecGetArray(xx,&x);
611: tmp = a->solve_work;
613: ISGetIndices(isrow,&rout); r = rout;
614: ISGetIndices(iscol,&cout); c = cout + (n-1);
616: /* forward solve the lower triangular */
617: tmp[0] = b[*r++];
618: tmps = tmp;
619: for (i=1; i<n; i++) {
620: v = aa + ai[i] ;
621: vi = aj + ai[i] ;
622: nz = a->diag[i] - ai[i];
623: sum = b[*r++];
624: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
625: tmp[i] = sum;
626: }
628: /* backward solve the upper triangular */
629: for (i=n-1; i>=0; i--){
630: v = aa + a->diag[i] + 1;
631: vi = aj + a->diag[i] + 1;
632: nz = ai[i+1] - a->diag[i] - 1;
633: sum = tmp[i];
634: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
635: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
636: }
638: ISRestoreIndices(isrow,&rout);
639: ISRestoreIndices(iscol,&cout);
640: VecRestoreArray(bb,&b);
641: VecRestoreArray(xx,&x);
642: PetscLogFlops(2*a->nz - A->cmap.n);
643: return(0);
644: }
648: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
649: {
650: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
651: IS iscol = a->col,isrow = a->row;
653: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
654: PetscInt nz,*rout,*cout,neq;
655: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
658: if (!n) return(0);
660: MatGetArray(B,&b);
661: MatGetArray(X,&x);
662:
663: tmp = a->solve_work;
664: ISGetIndices(isrow,&rout); r = rout;
665: ISGetIndices(iscol,&cout); c = cout;
667: for (neq=0; neq<n; neq++){
668: /* forward solve the lower triangular */
669: tmp[0] = b[r[0]];
670: tmps = tmp;
671: for (i=1; i<n; i++) {
672: v = aa + ai[i] ;
673: vi = aj + ai[i] ;
674: nz = a->diag[i] - ai[i];
675: sum = b[r[i]];
676: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
677: tmp[i] = sum;
678: }
679: /* backward solve the upper triangular */
680: for (i=n-1; i>=0; i--){
681: v = aa + a->diag[i] + 1;
682: vi = aj + a->diag[i] + 1;
683: nz = ai[i+1] - a->diag[i] - 1;
684: sum = tmp[i];
685: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
686: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
687: }
689: b += n;
690: x += n;
691: }
692: ISRestoreIndices(isrow,&rout);
693: ISRestoreIndices(iscol,&cout);
694: MatRestoreArray(B,&b);
695: MatRestoreArray(X,&x);
696: PetscLogFlops(n*(2*a->nz - n));
697: return(0);
698: }
700: /* ----------------------------------------------------------- */
703: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
704: {
705: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
707: PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
708: PetscScalar *x,*b,*aa = a->a;
709: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
710: PetscInt adiag_i,i,*vi,nz,ai_i;
711: PetscScalar *v,sum;
712: #endif
715: if (!n) return(0);
717: VecGetArray(bb,&b);
718: VecGetArray(xx,&x);
720: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
721: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
722: #else
723: /* forward solve the lower triangular */
724: x[0] = b[0];
725: for (i=1; i<n; i++) {
726: ai_i = ai[i];
727: v = aa + ai_i;
728: vi = aj + ai_i;
729: nz = adiag[i] - ai_i;
730: sum = b[i];
731: while (nz--) sum -= *v++ * x[*vi++];
732: x[i] = sum;
733: }
735: /* backward solve the upper triangular */
736: for (i=n-1; i>=0; i--){
737: adiag_i = adiag[i];
738: v = aa + adiag_i + 1;
739: vi = aj + adiag_i + 1;
740: nz = ai[i+1] - adiag_i - 1;
741: sum = x[i];
742: while (nz--) sum -= *v++ * x[*vi++];
743: x[i] = sum*aa[adiag_i];
744: }
745: #endif
746: PetscLogFlops(2*a->nz - A->cmap.n);
747: VecRestoreArray(bb,&b);
748: VecRestoreArray(xx,&x);
749: return(0);
750: }
754: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
755: {
756: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
757: IS iscol = a->col,isrow = a->row;
759: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
760: PetscInt nz,*rout,*cout;
761: PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v;
764: if (yy != xx) {VecCopy(yy,xx);}
766: VecGetArray(bb,&b);
767: VecGetArray(xx,&x);
768: tmp = a->solve_work;
770: ISGetIndices(isrow,&rout); r = rout;
771: ISGetIndices(iscol,&cout); c = cout + (n-1);
773: /* forward solve the lower triangular */
774: tmp[0] = b[*r++];
775: for (i=1; i<n; i++) {
776: v = aa + ai[i] ;
777: vi = aj + ai[i] ;
778: nz = a->diag[i] - ai[i];
779: sum = b[*r++];
780: while (nz--) sum -= *v++ * tmp[*vi++ ];
781: tmp[i] = sum;
782: }
784: /* backward solve the upper triangular */
785: for (i=n-1; i>=0; i--){
786: v = aa + a->diag[i] + 1;
787: vi = aj + a->diag[i] + 1;
788: nz = ai[i+1] - a->diag[i] - 1;
789: sum = tmp[i];
790: while (nz--) sum -= *v++ * tmp[*vi++ ];
791: tmp[i] = sum*aa[a->diag[i]];
792: x[*c--] += tmp[i];
793: }
795: ISRestoreIndices(isrow,&rout);
796: ISRestoreIndices(iscol,&cout);
797: VecRestoreArray(bb,&b);
798: VecRestoreArray(xx,&x);
799: PetscLogFlops(2*a->nz);
801: return(0);
802: }
803: /* -------------------------------------------------------------------*/
806: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
807: {
808: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
809: IS iscol = a->col,isrow = a->row;
811: PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
812: PetscInt nz,*rout,*cout,*diag = a->diag;
813: PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1;
816: VecGetArray(bb,&b);
817: VecGetArray(xx,&x);
818: tmp = a->solve_work;
820: ISGetIndices(isrow,&rout); r = rout;
821: ISGetIndices(iscol,&cout); c = cout;
823: /* copy the b into temp work space according to permutation */
824: for (i=0; i<n; i++) tmp[i] = b[c[i]];
826: /* forward solve the U^T */
827: for (i=0; i<n; i++) {
828: v = aa + diag[i] ;
829: vi = aj + diag[i] + 1;
830: nz = ai[i+1] - diag[i] - 1;
831: s1 = tmp[i];
832: s1 *= (*v++); /* multiply by inverse of diagonal entry */
833: while (nz--) {
834: tmp[*vi++ ] -= (*v++)*s1;
835: }
836: tmp[i] = s1;
837: }
839: /* backward solve the L^T */
840: for (i=n-1; i>=0; i--){
841: v = aa + diag[i] - 1 ;
842: vi = aj + diag[i] - 1 ;
843: nz = diag[i] - ai[i];
844: s1 = tmp[i];
845: while (nz--) {
846: tmp[*vi-- ] -= (*v--)*s1;
847: }
848: }
850: /* copy tmp into x according to permutation */
851: for (i=0; i<n; i++) x[r[i]] = tmp[i];
853: ISRestoreIndices(isrow,&rout);
854: ISRestoreIndices(iscol,&cout);
855: VecRestoreArray(bb,&b);
856: VecRestoreArray(xx,&x);
858: PetscLogFlops(2*a->nz-A->cmap.n);
859: return(0);
860: }
864: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
865: {
866: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
867: IS iscol = a->col,isrow = a->row;
869: PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
870: PetscInt nz,*rout,*cout,*diag = a->diag;
871: PetscScalar *x,*b,*tmp,*aa = a->a,*v;
874: if (zz != xx) {VecCopy(zz,xx);}
876: VecGetArray(bb,&b);
877: VecGetArray(xx,&x);
878: tmp = a->solve_work;
880: ISGetIndices(isrow,&rout); r = rout;
881: ISGetIndices(iscol,&cout); c = cout;
883: /* copy the b into temp work space according to permutation */
884: for (i=0; i<n; i++) tmp[i] = b[c[i]];
886: /* forward solve the U^T */
887: for (i=0; i<n; i++) {
888: v = aa + diag[i] ;
889: vi = aj + diag[i] + 1;
890: nz = ai[i+1] - diag[i] - 1;
891: tmp[i] *= *v++;
892: while (nz--) {
893: tmp[*vi++ ] -= (*v++)*tmp[i];
894: }
895: }
897: /* backward solve the L^T */
898: for (i=n-1; i>=0; i--){
899: v = aa + diag[i] - 1 ;
900: vi = aj + diag[i] - 1 ;
901: nz = diag[i] - ai[i];
902: while (nz--) {
903: tmp[*vi-- ] -= (*v--)*tmp[i];
904: }
905: }
907: /* copy tmp into x according to permutation */
908: for (i=0; i<n; i++) x[r[i]] += tmp[i];
910: ISRestoreIndices(isrow,&rout);
911: ISRestoreIndices(iscol,&cout);
912: VecRestoreArray(bb,&b);
913: VecRestoreArray(xx,&x);
915: PetscLogFlops(2*a->nz);
916: return(0);
917: }
918: /* ----------------------------------------------------------------*/
919: EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat,PetscTruth*,PetscInt*);
923: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
924: {
925: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
926: IS isicol;
927: PetscErrorCode ierr;
928: PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d;
929: PetscInt *bi,*cols,nnz,*cols_lvl;
930: PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
931: PetscInt i,levels,diagonal_fill;
932: PetscTruth col_identity,row_identity;
933: PetscReal f;
934: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
935: PetscBT lnkbt;
936: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
937: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
938: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
939: PetscTruth missing;
942: f = info->fill;
943: levels = (PetscInt)info->levels;
944: diagonal_fill = (PetscInt)info->diagonal_fill;
945: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
947: /* special case that simply copies fill pattern */
948: ISIdentity(isrow,&row_identity);
949: ISIdentity(iscol,&col_identity);
950: if (!levels && row_identity && col_identity) {
951: MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
952: (*fact)->factor = FACTOR_LU;
953: (*fact)->info.factor_mallocs = 0;
954: (*fact)->info.fill_ratio_given = info->fill;
955: (*fact)->info.fill_ratio_needed = 1.0;
956: b = (Mat_SeqAIJ*)(*fact)->data;
957: MatMissingDiagonal_SeqAIJ(*fact,&missing,&d);
958: if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
959: b->row = isrow;
960: b->col = iscol;
961: b->icol = isicol;
962: PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);
963: (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
964: PetscObjectReference((PetscObject)isrow);
965: PetscObjectReference((PetscObject)iscol);
966: return(0);
967: }
969: ISGetIndices(isrow,&r);
970: ISGetIndices(isicol,&ic);
972: /* get new row pointers */
973: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
974: bi[0] = 0;
975: /* bdiag is location of diagonal in factor */
976: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
977: bdiag[0] = 0;
979: PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);
980: bjlvl_ptr = (PetscInt**)(bj_ptr + n);
982: /* create a linked list for storing column indices of the active row */
983: nlnk = n + 1;
984: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
986: /* initial FreeSpace size is f*(ai[n]+1) */
987: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
988: current_space = free_space;
989: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
990: current_space_lvl = free_space_lvl;
991:
992: for (i=0; i<n; i++) {
993: nzi = 0;
994: /* copy current row into linked list */
995: nnz = ai[r[i]+1] - ai[r[i]];
996: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
997: cols = aj + ai[r[i]];
998: lnk[i] = -1; /* marker to indicate if diagonal exists */
999: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1000: nzi += nlnk;
1002: /* make sure diagonal entry is included */
1003: if (diagonal_fill && lnk[i] == -1) {
1004: fm = n;
1005: while (lnk[fm] < i) fm = lnk[fm];
1006: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1007: lnk[fm] = i;
1008: lnk_lvl[i] = 0;
1009: nzi++; dcount++;
1010: }
1012: /* add pivot rows into the active row */
1013: nzbd = 0;
1014: prow = lnk[n];
1015: while (prow < i) {
1016: nnz = bdiag[prow];
1017: cols = bj_ptr[prow] + nnz + 1;
1018: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1019: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1020: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1021: nzi += nlnk;
1022: prow = lnk[prow];
1023: nzbd++;
1024: }
1025: bdiag[i] = nzbd;
1026: bi[i+1] = bi[i] + nzi;
1028: /* if free space is not available, make more free space */
1029: if (current_space->local_remaining<nzi) {
1030: nnz = nzi*(n - i); /* estimated and max additional space needed */
1031: PetscFreeSpaceGet(nnz,¤t_space);
1032: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1033: reallocs++;
1034: }
1036: /* copy data into free_space and free_space_lvl, then initialize lnk */
1037: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1038: bj_ptr[i] = current_space->array;
1039: bjlvl_ptr[i] = current_space_lvl->array;
1041: /* make sure the active row i has diagonal entry */
1042: if (*(bj_ptr[i]+bdiag[i]) != i) {
1043: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1044: try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1045: }
1047: current_space->array += nzi;
1048: current_space->local_used += nzi;
1049: current_space->local_remaining -= nzi;
1050: current_space_lvl->array += nzi;
1051: current_space_lvl->local_used += nzi;
1052: current_space_lvl->local_remaining -= nzi;
1053: }
1055: ISRestoreIndices(isrow,&r);
1056: ISRestoreIndices(isicol,&ic);
1058: /* destroy list of free space and other temporary arrays */
1059: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1060: PetscFreeSpaceContiguous(&free_space,bj);
1061: PetscIncompleteLLDestroy(lnk,lnkbt);
1062: PetscFreeSpaceDestroy(free_space_lvl);
1063: PetscFree(bj_ptr);
1065: #if defined(PETSC_USE_INFO)
1066: {
1067: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1068: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1069: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1070: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1071: PetscInfo(A,"for best performance.\n");
1072: if (diagonal_fill) {
1073: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1074: }
1075: }
1076: #endif
1078: /* put together the new matrix */
1079: MatCreate(A->comm,fact);
1080: MatSetSizes(*fact,n,n,n,n);
1081: MatSetType(*fact,A->type_name);
1082: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1083: PetscLogObjectParent(*fact,isicol);
1084: b = (Mat_SeqAIJ*)(*fact)->data;
1085: b->free_a = PETSC_TRUE;
1086: b->free_ij = PETSC_TRUE;
1087: b->singlemalloc = PETSC_FALSE;
1088: len = (bi[n] )*sizeof(PetscScalar);
1089: PetscMalloc(len+1,&b->a);
1090: b->j = bj;
1091: b->i = bi;
1092: for (i=0; i<n; i++) bdiag[i] += bi[i];
1093: b->diag = bdiag;
1094: b->ilen = 0;
1095: b->imax = 0;
1096: b->row = isrow;
1097: b->col = iscol;
1098: PetscObjectReference((PetscObject)isrow);
1099: PetscObjectReference((PetscObject)iscol);
1100: b->icol = isicol;
1101: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1102: /* In b structure: Free imax, ilen, old a, old j.
1103: Allocate bdiag, solve_work, new a, new j */
1104: PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1105: b->maxnz = b->nz = bi[n] ;
1106: (*fact)->factor = FACTOR_LU;
1107: (*fact)->info.factor_mallocs = reallocs;
1108: (*fact)->info.fill_ratio_given = f;
1109: (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1111: MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);
1112: (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1114: return(0);
1115: }
1117: #include src/mat/impls/sbaij/seq/sbaij.h
1120: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1121: {
1122: Mat C = *B;
1123: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
1124: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
1125: IS ip=b->row,iip = b->icol;
1127: PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1128: PetscInt *ai=a->i,*aj=a->j;
1129: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1130: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1131: PetscReal zeropivot,rs,shiftnz;
1132: PetscReal shiftpd;
1133: ChShift_Ctx sctx;
1134: PetscInt newshift;
1137: shiftnz = info->shiftnz;
1138: shiftpd = info->shiftpd;
1139: zeropivot = info->zeropivot;
1141: ISGetIndices(ip,&rip);
1142: ISGetIndices(iip,&riip);
1143:
1144: /* initialization */
1145: nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1146: PetscMalloc(nz,&il);
1147: jl = il + mbs;
1148: rtmp = (MatScalar*)(jl + mbs);
1150: sctx.shift_amount = 0;
1151: sctx.nshift = 0;
1152: do {
1153: sctx.chshift = PETSC_FALSE;
1154: for (i=0; i<mbs; i++) {
1155: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1156: }
1157:
1158: for (k = 0; k<mbs; k++){
1159: bval = ba + bi[k];
1160: /* initialize k-th row by the perm[k]-th row of A */
1161: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1162: for (j = jmin; j < jmax; j++){
1163: col = riip[aj[j]];
1164: if (col >= k){ /* only take upper triangular entry */
1165: rtmp[col] = aa[j];
1166: *bval++ = 0.0; /* for in-place factorization */
1167: }
1168: }
1169: /* shift the diagonal of the matrix */
1170: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1172: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1173: dk = rtmp[k];
1174: i = jl[k]; /* first row to be added to k_th row */
1176: while (i < k){
1177: nexti = jl[i]; /* next row to be added to k_th row */
1179: /* compute multiplier, update diag(k) and U(i,k) */
1180: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1181: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
1182: dk += uikdi*ba[ili];
1183: ba[ili] = uikdi; /* -U(i,k) */
1185: /* add multiple of row i to k-th row */
1186: jmin = ili + 1; jmax = bi[i+1];
1187: if (jmin < jmax){
1188: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1189: /* update il and jl for row i */
1190: il[i] = jmin;
1191: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1192: }
1193: i = nexti;
1194: }
1196: /* shift the diagonals when zero pivot is detected */
1197: /* compute rs=sum of abs(off-diagonal) */
1198: rs = 0.0;
1199: jmin = bi[k]+1;
1200: nz = bi[k+1] - jmin;
1201: if (nz){
1202: bcol = bj + jmin;
1203: while (nz--){
1204: rs += PetscAbsScalar(rtmp[*bcol]);
1205: bcol++;
1206: }
1207: }
1209: sctx.rs = rs;
1210: sctx.pv = dk;
1211: MatCholeskyCheckShift_inline(info,sctx,k,newshift);
1212: if (newshift == 1) break;
1213:
1214: /* copy data into U(k,:) */
1215: ba[bi[k]] = 1.0/dk; /* U(k,k) */
1216: jmin = bi[k]+1; jmax = bi[k+1];
1217: if (jmin < jmax) {
1218: for (j=jmin; j<jmax; j++){
1219: col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1220: }
1221: /* add the k-th row into il and jl */
1222: il[k] = jmin;
1223: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1224: }
1225: }
1226: } while (sctx.chshift);
1227: PetscFree(il);
1229: ISRestoreIndices(ip,&rip);
1230: ISRestoreIndices(iip,&riip);
1231: C->factor = FACTOR_CHOLESKY;
1232: C->assembled = PETSC_TRUE;
1233: C->preallocated = PETSC_TRUE;
1234: PetscLogFlops(C->rmap.n);
1235: if (sctx.nshift){
1236: if (shiftnz) {
1237: PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1238: } else if (shiftpd) {
1239: PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1240: }
1241: }
1242: return(0);
1243: }
1247: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1248: {
1249: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1250: Mat_SeqSBAIJ *b;
1251: Mat B;
1252: PetscErrorCode ierr;
1253: PetscTruth perm_identity;
1254: PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1255: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1256: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1257: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1258: PetscReal fill=info->fill,levels=info->levels;
1259: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1260: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1261: PetscBT lnkbt;
1262:
1264: ISIdentity(perm,&perm_identity);
1265: ISGetIndices(perm,&rip);
1267: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1268: ui[0] = 0;
1270: /* special case that simply copies fill pattern */
1271: if (!levels && perm_identity) {
1272: for (i=0; i<am; i++) {
1273: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1274: }
1275: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1276: cols = uj;
1277: for (i=0; i<am; i++) {
1278: aj = a->j + a->diag[i];
1279: ncols = ui[i+1] - ui[i];
1280: for (j=0; j<ncols; j++) *cols++ = *aj++;
1281: }
1282: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1283: /* initialization */
1284: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
1286: /* jl: linked list for storing indices of the pivot rows
1287: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1288: PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);
1289: il = jl + am;
1290: uj_ptr = (PetscInt**)(il + am);
1291: uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1292: for (i=0; i<am; i++){
1293: jl[i] = am; il[i] = 0;
1294: }
1296: /* create and initialize a linked list for storing column indices of the active row k */
1297: nlnk = am + 1;
1298: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
1300: /* initial FreeSpace size is fill*(ai[am]+1) */
1301: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1302: current_space = free_space;
1303: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
1304: current_space_lvl = free_space_lvl;
1306: for (k=0; k<am; k++){ /* for each active row k */
1307: /* initialize lnk by the column indices of row rip[k] of A */
1308: nzk = 0;
1309: ncols = ai[rip[k]+1] - ai[rip[k]];
1310: ncols_upper = 0;
1311: for (j=0; j<ncols; j++){
1312: i = *(aj + ai[rip[k]] + j);
1313: if (rip[i] >= k){ /* only take upper triangular entry */
1314: ajtmp[ncols_upper] = i;
1315: ncols_upper++;
1316: }
1317: }
1318: PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);
1319: nzk += nlnk;
1321: /* update lnk by computing fill-in for each pivot row to be merged in */
1322: prow = jl[k]; /* 1st pivot row */
1323:
1324: while (prow < k){
1325: nextprow = jl[prow];
1326:
1327: /* merge prow into k-th row */
1328: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1329: jmax = ui[prow+1];
1330: ncols = jmax-jmin;
1331: i = jmin - ui[prow];
1332: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1333: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
1334: j = *(uj - 1);
1335: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
1336: nzk += nlnk;
1338: /* update il and jl for prow */
1339: if (jmin < jmax){
1340: il[prow] = jmin;
1341: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1342: }
1343: prow = nextprow;
1344: }
1346: /* if free space is not available, make more free space */
1347: if (current_space->local_remaining<nzk) {
1348: i = am - k + 1; /* num of unfactored rows */
1349: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1350: PetscFreeSpaceGet(i,¤t_space);
1351: PetscFreeSpaceGet(i,¤t_space_lvl);
1352: reallocs++;
1353: }
1355: /* copy data into free_space and free_space_lvl, then initialize lnk */
1356: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1358: /* add the k-th row into il and jl */
1359: if (nzk > 1){
1360: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1361: jl[k] = jl[i]; jl[i] = k;
1362: il[k] = ui[k] + 1;
1363: }
1364: uj_ptr[k] = current_space->array;
1365: uj_lvl_ptr[k] = current_space_lvl->array;
1367: current_space->array += nzk;
1368: current_space->local_used += nzk;
1369: current_space->local_remaining -= nzk;
1371: current_space_lvl->array += nzk;
1372: current_space_lvl->local_used += nzk;
1373: current_space_lvl->local_remaining -= nzk;
1375: ui[k+1] = ui[k] + nzk;
1376: }
1378: #if defined(PETSC_USE_INFO)
1379: if (ai[am] != 0) {
1380: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1381: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1382: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1383: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1384: } else {
1385: PetscInfo(A,"Empty matrix.\n");
1386: }
1387: #endif
1389: ISRestoreIndices(perm,&rip);
1390: PetscFree(jl);
1391: PetscFree(ajtmp);
1393: /* destroy list of free space and other temporary array(s) */
1394: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1395: PetscFreeSpaceContiguous(&free_space,uj);
1396: PetscIncompleteLLDestroy(lnk,lnkbt);
1397: PetscFreeSpaceDestroy(free_space_lvl);
1399: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1401: /* put together the new matrix in MATSEQSBAIJ format */
1402: MatCreate(PETSC_COMM_SELF,fact);
1403: MatSetSizes(*fact,am,am,am,am);
1404: B = *fact;
1405: MatSetType(B,MATSEQSBAIJ);
1406: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1408: b = (Mat_SeqSBAIJ*)B->data;
1409: b->singlemalloc = PETSC_FALSE;
1410: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1411: b->j = uj;
1412: b->i = ui;
1413: b->diag = 0;
1414: b->ilen = 0;
1415: b->imax = 0;
1416: b->row = perm;
1417: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1418: PetscObjectReference((PetscObject)perm);
1419: b->icol = perm;
1420: PetscObjectReference((PetscObject)perm);
1421: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1422: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1423: b->maxnz = b->nz = ui[am];
1424: b->free_a = PETSC_TRUE;
1425: b->free_ij = PETSC_TRUE;
1426:
1427: B->factor = FACTOR_CHOLESKY;
1428: B->info.factor_mallocs = reallocs;
1429: B->info.fill_ratio_given = fill;
1430: if (ai[am] != 0) {
1431: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1432: } else {
1433: B->info.fill_ratio_needed = 0.0;
1434: }
1435: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1436: if (perm_identity){
1437: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1438: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1439: }
1440: return(0);
1441: }
1445: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1446: {
1447: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1448: Mat_SeqSBAIJ *b;
1449: Mat B;
1450: PetscErrorCode ierr;
1451: PetscTruth perm_identity;
1452: PetscReal fill = info->fill;
1453: PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1454: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1455: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1456: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1457: PetscBT lnkbt;
1458: IS iperm;
1461: /* check whether perm is the identity mapping */
1462: ISIdentity(perm,&perm_identity);
1463: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1464: ISGetIndices(iperm,&riip);
1465: ISGetIndices(perm,&rip);
1467: /* initialization */
1468: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1469: ui[0] = 0;
1471: /* jl: linked list for storing indices of the pivot rows
1472: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1473: PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);
1474: il = jl + am;
1475: cols = il + am;
1476: ui_ptr = (PetscInt**)(cols + am);
1477: for (i=0; i<am; i++){
1478: jl[i] = am; il[i] = 0;
1479: }
1481: /* create and initialize a linked list for storing column indices of the active row k */
1482: nlnk = am + 1;
1483: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
1485: /* initial FreeSpace size is fill*(ai[am]+1) */
1486: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1487: current_space = free_space;
1489: for (k=0; k<am; k++){ /* for each active row k */
1490: /* initialize lnk by the column indices of row rip[k] of A */
1491: nzk = 0;
1492: ncols = ai[rip[k]+1] - ai[rip[k]];
1493: if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1494: ncols_upper = 0;
1495: for (j=0; j<ncols; j++){
1496: i = riip[*(aj + ai[rip[k]] + j)];
1497: if (i >= k){ /* only take upper triangular entry */
1498: cols[ncols_upper] = i;
1499: ncols_upper++;
1500: }
1501: }
1502: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
1503: nzk += nlnk;
1505: /* update lnk by computing fill-in for each pivot row to be merged in */
1506: prow = jl[k]; /* 1st pivot row */
1507:
1508: while (prow < k){
1509: nextprow = jl[prow];
1510: /* merge prow into k-th row */
1511: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1512: jmax = ui[prow+1];
1513: ncols = jmax-jmin;
1514: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1515: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
1516: nzk += nlnk;
1518: /* update il and jl for prow */
1519: if (jmin < jmax){
1520: il[prow] = jmin;
1521: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1522: }
1523: prow = nextprow;
1524: }
1526: /* if free space is not available, make more free space */
1527: if (current_space->local_remaining<nzk) {
1528: i = am - k + 1; /* num of unfactored rows */
1529: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1530: PetscFreeSpaceGet(i,¤t_space);
1531: reallocs++;
1532: }
1534: /* copy data into free space, then initialize lnk */
1535: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
1537: /* add the k-th row into il and jl */
1538: if (nzk-1 > 0){
1539: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1540: jl[k] = jl[i]; jl[i] = k;
1541: il[k] = ui[k] + 1;
1542: }
1543: ui_ptr[k] = current_space->array;
1544: current_space->array += nzk;
1545: current_space->local_used += nzk;
1546: current_space->local_remaining -= nzk;
1548: ui[k+1] = ui[k] + nzk;
1549: }
1551: #if defined(PETSC_USE_INFO)
1552: if (ai[am] != 0) {
1553: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1554: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1555: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1556: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1557: } else {
1558: PetscInfo(A,"Empty matrix.\n");
1559: }
1560: #endif
1562: ISRestoreIndices(perm,&rip);
1563: ISRestoreIndices(iperm,&riip);
1564: PetscFree(jl);
1566: /* destroy list of free space and other temporary array(s) */
1567: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1568: PetscFreeSpaceContiguous(&free_space,uj);
1569: PetscLLDestroy(lnk,lnkbt);
1571: /* put together the new matrix in MATSEQSBAIJ format */
1572: MatCreate(PETSC_COMM_SELF,fact);
1573: MatSetSizes(*fact,am,am,am,am);
1574: B = *fact;
1575: MatSetType(B,MATSEQSBAIJ);
1576: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1578: b = (Mat_SeqSBAIJ*)B->data;
1579: b->singlemalloc = PETSC_FALSE;
1580: b->free_a = PETSC_TRUE;
1581: b->free_ij = PETSC_TRUE;
1582: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1583: b->j = uj;
1584: b->i = ui;
1585: b->diag = 0;
1586: b->ilen = 0;
1587: b->imax = 0;
1588: b->row = perm;
1589: b->col = perm;
1590: PetscObjectReference((PetscObject)perm);
1591: PetscObjectReference((PetscObject)perm);
1592: b->icol = iperm;
1593: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1594: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1595: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1596: b->maxnz = b->nz = ui[am];
1597:
1598: B->factor = FACTOR_CHOLESKY;
1599: B->info.factor_mallocs = reallocs;
1600: B->info.fill_ratio_given = fill;
1601: if (ai[am] != 0) {
1602: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1603: } else {
1604: B->info.fill_ratio_needed = 0.0;
1605: }
1606: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1607: if (perm_identity){
1608: (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1609: (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1610: }
1611: return(0);
1612: }