Actual source code: epskrylov.c
 
   slepc-3.7.3 2016-09-29
   
  1: /*
  2:    Common subroutines for all Krylov-type solvers.
  4:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  5:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  6:    Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
  8:    This file is part of SLEPc.
 10:    SLEPc is free software: you can redistribute it and/or modify it under  the
 11:    terms of version 3 of the GNU Lesser General Public License as published by
 12:    the Free Software Foundation.
 14:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 15:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 16:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 17:    more details.
 19:    You  should have received a copy of the GNU Lesser General  Public  License
 20:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 21:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 22: */
 24: #include <slepc/private/epsimpl.h>
 25: #include <slepc/private/slepcimpl.h>
 26: #include <slepcblaslapack.h>
 30: /*
 31:    EPSBasicArnoldi - Computes an m-step Arnoldi factorization. The first k
 32:    columns are assumed to be locked and therefore they are not modified. On
 33:    exit, the following relation is satisfied:
 35:                     OP * V - V * H = beta*v_m * e_m^T
 37:    where the columns of V are the Arnoldi vectors (which are B-orthonormal),
 38:    H is an upper Hessenberg matrix, e_m is the m-th vector of the canonical basis.
 39:    On exit, beta contains the B-norm of V[m] before normalization.
 40: */
 41: PetscErrorCode EPSBasicArnoldi(EPS eps,PetscBool trans,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
 42: {
 44:   PetscInt       j,m = *M;
 45:   Vec            vj,vj1;
 48:   BVSetActiveColumns(eps->V,0,m);
 49:   for (j=k;j<m;j++) {
 50:     BVGetColumn(eps->V,j,&vj);
 51:     BVGetColumn(eps->V,j+1,&vj1);
 52:     if (trans) {
 53:       STApplyTranspose(eps->st,vj,vj1);
 54:     } else {
 55:       STApply(eps->st,vj,vj1);
 56:     }
 57:     BVRestoreColumn(eps->V,j,&vj);
 58:     BVRestoreColumn(eps->V,j+1,&vj1);
 59:     BVOrthogonalizeColumn(eps->V,j+1,H+ldh*j,beta,breakdown);
 60:     if (*breakdown) {
 61:       H[j+1+ldh*j] = 0.0;
 62:       *M = j+1;
 63:       break;
 64:     } else {
 65:       H[j+1+ldh*j] = *beta;
 66:       BVScaleColumn(eps->V,j+1,1.0/(*beta));
 67:     }
 68:   }
 69:   return(0);
 70: }
 74: /*
 75:    EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
 76:    performs the computation in a different way. The main idea is that
 77:    reorthogonalization is delayed to the next Arnoldi step. This version is
 78:    more scalable but in some cases convergence may stagnate.
 79: */
 80: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
 81: {
 83:   PetscInt       i,j,m=*M;
 84:   Vec            u,t;
 85:   PetscScalar    shh[100],*lhh,dot,dot2;
 86:   PetscReal      norm1=0.0,norm2=1.0;
 87:   Vec            vj,vj1,vj2;
 90:   if (m<=100) lhh = shh;
 91:   else {
 92:     PetscMalloc1(m,&lhh);
 93:   }
 94:   BVCreateVec(eps->V,&u);
 95:   BVCreateVec(eps->V,&t);
 97:   BVSetActiveColumns(eps->V,0,m);
 98:   for (j=k;j<m;j++) {
 99:     BVGetColumn(eps->V,j,&vj);
100:     BVGetColumn(eps->V,j+1,&vj1);
101:     STApply(eps->st,vj,vj1);
102:     BVRestoreColumn(eps->V,j,&vj);
103:     BVRestoreColumn(eps->V,j+1,&vj1);
105:     BVDotColumnBegin(eps->V,j+1,H+ldh*j);
106:     if (j>k) {
107:       BVDotColumnBegin(eps->V,j,lhh);
108:       BVGetColumn(eps->V,j,&vj);
109:       VecDotBegin(vj,vj,&dot);
110:     }
111:     if (j>k+1) {
112:       BVNormVecBegin(eps->V,u,NORM_2,&norm2);
113:       BVGetColumn(eps->V,j-2,&vj2);
114:       VecDotBegin(u,vj2,&dot2);
115:     }
117:     BVDotColumnEnd(eps->V,j+1,H+ldh*j);
118:     if (j>k) {
119:       BVDotColumnEnd(eps->V,j,lhh);
120:       VecDotEnd(vj,vj,&dot);
121:       BVRestoreColumn(eps->V,j,&vj);
122:     }
123:     if (j>k+1) {
124:       BVNormVecEnd(eps->V,u,NORM_2,&norm2);
125:       VecDotEnd(u,vj2,&dot2);
126:       BVRestoreColumn(eps->V,j-2,&vj2);
127:     }
129:     if (j>k) {
130:       norm1 = PetscSqrtReal(PetscRealPart(dot));
131:       for (i=0;i<j;i++)
132:         H[ldh*j+i] = H[ldh*j+i]/norm1;
133:       H[ldh*j+j] = H[ldh*j+j]/dot;
135:       BVCopyVec(eps->V,j,t);
136:       BVScaleColumn(eps->V,j,1.0/norm1);
137:       BVScaleColumn(eps->V,j+1,1.0/norm1);
138:     }
140:     BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);
142:     if (j>k) {
143:       BVSetActiveColumns(eps->V,0,j);
144:       BVMultVec(eps->V,-1.0,1.0,t,lhh);
145:       BVSetActiveColumns(eps->V,0,m);
146:       for (i=0;i<j;i++)
147:         H[ldh*(j-1)+i] += lhh[i];
148:     }
150:     if (j>k+1) {
151:       BVGetColumn(eps->V,j-1,&vj1);
152:       VecCopy(u,vj1);
153:       BVRestoreColumn(eps->V,j-1,&vj1);
154:       BVScaleColumn(eps->V,j-1,1.0/norm2);
155:       H[ldh*(j-2)+j-1] = norm2;
156:     }
158:     if (j<m-1) {
159:       VecCopy(t,u);
160:     }
161:   }
163:   BVNormVec(eps->V,t,NORM_2,&norm2);
164:   VecScale(t,1.0/norm2);
165:   BVGetColumn(eps->V,m-1,&vj1);
166:   VecCopy(t,vj1);
167:   BVRestoreColumn(eps->V,m-1,&vj1);
168:   H[ldh*(m-2)+m-1] = norm2;
170:   BVDotColumn(eps->V,m,lhh);
172:   BVMultColumn(eps->V,-1.0,1.0,m,lhh);
173:   for (i=0;i<m;i++)
174:     H[ldh*(m-1)+i] += lhh[i];
176:   BVNormColumn(eps->V,m,NORM_2,beta);
177:   BVScaleColumn(eps->V,m,1.0 / *beta);
178:   *breakdown = PETSC_FALSE;
180:   if (m>100) { PetscFree(lhh); }
181:   VecDestroy(&u);
182:   VecDestroy(&t);
183:   return(0);
184: }
188: /*
189:    EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi,
190:    but without reorthogonalization (only delayed normalization).
191: */
192: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
193: {
195:   PetscInt       i,j,m=*M;
196:   PetscScalar    dot;
197:   PetscReal      norm=0.0;
198:   Vec            vj,vj1;
201:   BVSetActiveColumns(eps->V,0,m);
202:   for (j=k;j<m;j++) {
203:     BVGetColumn(eps->V,j,&vj);
204:     BVGetColumn(eps->V,j+1,&vj1);
205:     STApply(eps->st,vj,vj1);
206:     BVRestoreColumn(eps->V,j+1,&vj1);
207:     BVDotColumnBegin(eps->V,j+1,H+ldh*j);
208:     if (j>k) {
209:       VecDotBegin(vj,vj,&dot);
210:     }
211:     BVDotColumnEnd(eps->V,j+1,H+ldh*j);
212:     if (j>k) {
213:       VecDotEnd(vj,vj,&dot);
214:     }
215:     BVRestoreColumn(eps->V,j,&vj);
217:     if (j>k) {
218:       norm = PetscSqrtReal(PetscRealPart(dot));
219:       BVScaleColumn(eps->V,j,1.0/norm);
220:       H[ldh*(j-1)+j] = norm;
222:       for (i=0;i<j;i++)
223:         H[ldh*j+i] = H[ldh*j+i]/norm;
224:       H[ldh*j+j] = H[ldh*j+j]/dot;
225:       BVScaleColumn(eps->V,j+1,1.0/norm);
226:       *beta = norm;
227:     }
228:     BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);
229:   }
231:   *breakdown = PETSC_FALSE;
232:   return(0);
233: }
237: /*
238:    EPSKrylovConvergence - Implements the loop that checks for convergence
239:    in Krylov methods.
241:    Input Parameters:
242:      eps   - the eigensolver; some error estimates are updated in eps->errest
243:      getall - whether all residuals must be computed
244:      kini  - initial value of k (the loop variable)
245:      nits  - number of iterations of the loop
246:      V     - set of basis vectors (used only if trueresidual is activated)
247:      nv    - number of vectors to process (dimension of Q, columns of V)
248:      beta  - norm of f (the residual vector of the Arnoldi/Lanczos factorization)
249:      corrf - correction factor for residual estimates (only in harmonic KS)
251:    Output Parameters:
252:      kout  - the first index where the convergence test failed
253: */
254: PetscErrorCode EPSKrylovConvergence(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal corrf,PetscInt *kout)
255: {
257:   PetscInt       k,newk,marker,ld,inside;
258:   PetscScalar    re,im,*Zr,*Zi,*X;
259:   PetscReal      resnorm;
260:   PetscBool      isshift,refined,istrivial;
261:   Vec            x,y,w[3];
264:   RGIsTrivial(eps->rg,&istrivial);
265:   if (eps->trueres) {
266:     BVCreateVec(eps->V,&x);
267:     BVCreateVec(eps->V,&y);
268:     BVCreateVec(eps->V,&w[0]);
269:     BVCreateVec(eps->V,&w[2]);
270: #if !defined(PETSC_USE_COMPLEX)
271:     BVCreateVec(eps->V,&w[1]);
272: #else
273:     w[1] = NULL;
274: #endif
275:   }
276:   DSGetLeadingDimension(eps->ds,&ld);
277:   DSGetRefined(eps->ds,&refined);
278:   PetscObjectTypeCompare((PetscObject)eps->st,STSHIFT,&isshift);
279:   marker = -1;
280:   if (eps->trackall) getall = PETSC_TRUE;
281:   for (k=kini;k<kini+nits;k++) {
282:     /* eigenvalue */
283:     re = eps->eigr[k];
284:     im = eps->eigi[k];
285:     if (!istrivial || eps->trueres || isshift || eps->conv==EPS_CONV_NORM) {
286:       STBackTransform(eps->st,1,&re,&im);
287:     }
288:     if (!istrivial) {
289:       RGCheckInside(eps->rg,1,&re,&im,&inside);
290:       if (marker==-1 && inside<0) marker = k;
291:       if (!(eps->trueres || isshift || eps->conv==EPS_CONV_NORM)) {  /* make sure eps->converged below uses the right value */
292:         re = eps->eigr[k];
293:         im = eps->eigi[k];
294:       }
295:     }
296:     newk = k;
297:     DSVectors(eps->ds,DS_MAT_X,&newk,&resnorm);
298:     if (eps->trueres) {
299:       DSGetArray(eps->ds,DS_MAT_X,&X);
300:       Zr = X+k*ld;
301:       if (newk==k+1) Zi = X+newk*ld;
302:       else Zi = NULL;
303:       EPSComputeRitzVector(eps,Zr,Zi,eps->V,x,y);
304:       DSRestoreArray(eps->ds,DS_MAT_X,&X);
305:       EPSComputeResidualNorm_Private(eps,re,im,x,y,w,&resnorm);
306:     }
307:     else if (!refined) resnorm *= beta*corrf;
308:     /* error estimate */
309:     (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
310:     if (marker==-1 && eps->errest[k] >= eps->tol) marker = k;
311:     if (newk==k+1) {
312:       eps->errest[k+1] = eps->errest[k];
313:       k++;
314:     }
315:     if (marker!=-1 && !getall) break;
316:   }
317:   if (marker!=-1) k = marker;
318:   *kout = k;
319:   if (eps->trueres) {
320:     VecDestroy(&x);
321:     VecDestroy(&y);
322:     VecDestroy(&w[0]);
323:     VecDestroy(&w[2]);
324: #if !defined(PETSC_USE_COMPLEX)
325:     VecDestroy(&w[1]);
326: #endif
327:   }
328:   return(0);
329: }
333: /*
334:    EPSFullLanczos - Computes an m-step Lanczos factorization with full
335:    reorthogonalization.  At each Lanczos step, the corresponding Lanczos
336:    vector is orthogonalized with respect to all previous Lanczos vectors.
337:    This is equivalent to computing an m-step Arnoldi factorization and
338:    exploting symmetry of the operator.
340:    The first k columns are assumed to be locked and therefore they are
341:    not modified. On exit, the following relation is satisfied:
343:                     OP * V - V * T = beta_m*v_m * e_m^T
345:    where the columns of V are the Lanczos vectors (which are B-orthonormal),
346:    T is a real symmetric tridiagonal matrix, and e_m is the m-th vector of
347:    the canonical basis. The tridiagonal is stored as two arrays: alpha
348:    contains the diagonal elements, beta the off-diagonal. On exit, the last
349:    element of beta contains the B-norm of V[m] before normalization.
350: */
351: PetscErrorCode EPSFullLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscInt k,PetscInt *M,PetscBool *breakdown)
352: {
354:   PetscInt       j,m = *M;
355:   Vec            vj,vj1;
356:   PetscScalar    *hwork,lhwork[100];
359:   if (m > 100) {
360:     PetscMalloc1(m,&hwork);
361:   } else hwork = lhwork;
363:   BVSetActiveColumns(eps->V,0,m);
364:   for (j=k;j<m;j++) {
365:     BVGetColumn(eps->V,j,&vj);
366:     BVGetColumn(eps->V,j+1,&vj1);
367:     STApply(eps->st,vj,vj1);
368:     BVRestoreColumn(eps->V,j,&vj);
369:     BVRestoreColumn(eps->V,j+1,&vj1);
370:     BVOrthogonalizeColumn(eps->V,j+1,hwork,beta+j,breakdown);
371:     alpha[j] = PetscRealPart(hwork[j]);
372:     if (*breakdown) {
373:       *M = j+1;
374:       break;
375:     } else {
376:       BVScaleColumn(eps->V,j+1,1.0/beta[j]);
377:     }
378:   }
379:   if (m > 100) {
380:     PetscFree(hwork);
381:   }
382:   return(0);
383: }
387: PetscErrorCode EPSPseudoLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscReal *omega,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscBool *symmlost,PetscReal *cos,Vec w)
388: {
390:   PetscInt       j,m = *M,i,ld,l;
391:   Vec            vj,vj1;
392:   PetscScalar    *hwork,lhwork[100];
393:   PetscReal      norm,norm1,norm2,t,*f,sym=0.0,fro=0.0;
394:   PetscBLASInt   j_,one=1;
397:   DSGetLeadingDimension(eps->ds,&ld);
398:   DSGetDimensions(eps->ds,NULL,NULL,&l,NULL,NULL);
399:   if (cos) *cos = 1.0;
400:   if (m > 100) {
401:     PetscMalloc1(m,&hwork);
402:   } else hwork = lhwork;
404:   BVSetActiveColumns(eps->V,0,m);
405:   for (j=k;j<m;j++) {
406:     BVGetColumn(eps->V,j,&vj);
407:     BVGetColumn(eps->V,j+1,&vj1);
408:     STApply(eps->st,vj,vj1);
409:     BVRestoreColumn(eps->V,j,&vj);
410:     BVRestoreColumn(eps->V,j+1,&vj1);
411:     BVOrthogonalizeColumn(eps->V,j+1,hwork,&norm,breakdown);
412:     alpha[j] = PetscRealPart(hwork[j]);
413:     beta[j] = PetscAbsReal(norm);
414:     DSGetArrayReal(eps->ds,DS_MAT_T,&f);
415:     if (j==k) { 
416:       for (i=l;i<j-1;i++) hwork[i]-= f[2*ld+i];
417:       for (i=0;i<l;i++) hwork[i] = 0.0;
418:     }
419:     DSRestoreArrayReal(eps->ds,DS_MAT_T,&f);
420:     hwork[j-1] -= beta[j-1];
421:     PetscBLASIntCast(j,&j_);
422:     sym = SlepcAbs(BLASnrm2_(&j_,hwork,&one),sym);
423:     fro = SlepcAbs(fro,SlepcAbs(alpha[j],beta[j]));
424:     if (j>0) fro = SlepcAbs(fro,beta[j-1]);
425:     if (sym/fro>PetscMax(PETSC_SQRT_MACHINE_EPSILON,10*eps->tol)) { *symmlost = PETSC_TRUE; *M=j+1; break; }
426:     omega[j+1] = (norm<0.0)? -1.0: 1.0;
427:     BVScaleColumn(eps->V,j+1,1.0/norm);
428:     /* */
429:     if (cos) {
430:       BVGetColumn(eps->V,j+1,&vj1);
431:       VecNorm(vj1,NORM_2,&norm1);
432:       BVApplyMatrix(eps->V,vj1,w);
433:       BVRestoreColumn(eps->V,j+1,&vj1);
434:       VecNorm(w,NORM_2,&norm2);
435:       t = 1.0/(norm1*norm2);
436:       if (*cos>t) *cos = t;
437:     }
438:   }
439:   if (m > 100) {
440:     PetscFree(hwork);
441:   }
442:   return(0);
443: }