Actual source code: arnoldi.c

  1: /*                       

  3:    SLEPc eigensolver: "arnoldi"

  5:    Method: Explicitly Restarted Arnoldi

  7:    Algorithm:

  9:        Arnoldi method with explicit restart and deflation.

 11:    References:

 13:        [1] "Arnoldi Methods in SLEPc", SLEPc Technical Report STR-4, 
 14:            available at http://www.grycap.upv.es/slepc.

 16:    Last update: Oct 2006

 18: */
 19:  #include src/eps/epsimpl.h
 20:  #include slepcblaslapack.h

 22: typedef struct {
 23:   PetscTruth delayed;
 24: } EPS_ARNOLDI;

 28: PetscErrorCode EPSSetUp_ARNOLDI(EPS eps)
 29: {
 31:   PetscInt       N;

 34:   VecGetSize(eps->vec_initial,&N);
 35:   if (eps->ncv) {
 36:     if (eps->ncv<eps->nev) SETERRQ(1,"The value of ncv must be at least nev");
 37:   }
 38:   else eps->ncv = PetscMin(N,PetscMax(2*eps->nev,eps->nev+15));
 39:   if (!eps->max_it) eps->max_it = PetscMax(100,2*N/eps->ncv);
 40:   if (eps->ishermitian && (eps->which==EPS_LARGEST_IMAGINARY || eps->which==EPS_SMALLEST_IMAGINARY))
 41:     SETERRQ(1,"Wrong value of eps->which");

 43:   EPSAllocateSolution(eps);
 44:   PetscFree(eps->T);
 45:   PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&eps->T);
 46:   if (eps->solverclass==EPS_TWO_SIDE) {
 47:     PetscFree(eps->Tl);
 48:     PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&eps->Tl);
 49:     EPSDefaultGetWork(eps,2);
 50:   }
 51:   else { EPSDefaultGetWork(eps,1); }
 52:   return(0);
 53: }

 57: /*
 58:    EPSBasicArnoldi - Computes an m-step Arnoldi factorization. The first k
 59:    columns are assumed to be locked and therefore they are not modified. On
 60:    exit, the following relation is satisfied:

 62:                     OP * V - V * H = f * e_m^T

 64:    where the columns of V are the Arnoldi vectors (which are B-orthonormal),
 65:    H is an upper Hessenberg matrix, f is the residual vector and e_m is
 66:    the m-th vector of the canonical basis. The vector f is B-orthogonal to
 67:    the columns of V. On exit, beta contains the B-norm of f and the next 
 68:    Arnoldi vector can be computed as v_{m+1} = f / beta. 
 69: */
 70: PetscErrorCode EPSBasicArnoldi(EPS eps,PetscTruth trans,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta,PetscTruth *breakdown)
 71: {
 73:   int            j,m = *M;
 74:   PetscReal      norm;

 77:   for (j=k;j<m-1;j++) {
 78:     if (trans) { STApplyTranspose(eps->OP,V[j],V[j+1]); }
 79:     else { STApply(eps->OP,V[j],V[j+1]); }
 80:     EPSOrthogonalize(eps,eps->nds,PETSC_NULL,eps->DS,V[j+1],PETSC_NULL,PETSC_NULL,PETSC_NULL);
 81:     EPSOrthogonalize(eps,j+1,PETSC_NULL,V,V[j+1],H+m*j,&norm,breakdown);
 82:     H[(m+1)*j+1] = norm;
 83:     if (*breakdown) {
 84:       *M = j+1;
 85:       *beta = norm;
 86:       return(0);
 87:     } else {
 88:       VecScale(V[j+1],1/norm);
 89:     }
 90:   }
 91:   STApply(eps->OP,V[m-1],f);
 92:   EPSOrthogonalize(eps,eps->nds,PETSC_NULL,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);
 93:   EPSOrthogonalize(eps,m,PETSC_NULL,V,f,H+m*(m-1),beta,PETSC_NULL);
 94:   return(0);
 95: }

 99: /*
100:    EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
101:    performs the computation in a different way. The main idea is that
102:    reorthogonalization is delayed to the next Arnoldi step. This version is
103:    more scalable but in some cases convergence may stagnate.
104: */
105: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta,PetscTruth *breakdown)
106: {
108:   int            i,j,m=*M;
109:   Vec            w,u,t;
110:   PetscScalar    shh[100],*lhh,dot,dot2;
111:   PetscReal      norm1=0.0,norm2;

114:   if (m<=100) lhh = shh;
115:   else { PetscMalloc(m*sizeof(PetscScalar),&lhh); }
116:   VecDuplicate(f,&w);
117:   VecDuplicate(f,&u);
118:   VecDuplicate(f,&t);

120:   for (j=k;j<m;j++) {
121:     STApply(eps->OP,V[j],f);
122:     EPSOrthogonalize(eps,eps->nds,PETSC_NULL,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);

124:     STMInnerProductBegin(eps->OP,j+1,f,V,H+m*j);
125:     if (j>k) {
126:       STMInnerProductBegin(eps->OP,j,V[j],V,lhh);
127:       STInnerProductBegin(eps->OP,V[j],V[j],&dot);
128:     }
129:     if (j>k+1) {
130:       STNormBegin(eps->OP,u,&norm2);
131:       VecDotBegin(u,V[j-2],&dot2);
132:     }
133: 
134:     STMInnerProductEnd(eps->OP,j+1,f,V,H+m*j);
135:     if (j>k) {
136:       STMInnerProductEnd(eps->OP,j,V[j],V,lhh);
137:       STInnerProductEnd(eps->OP,V[j],V[j],&dot);
138:     }
139:     if (j>k+1) {
140:       STNormEnd(eps->OP,u,&norm2);
141:       VecDotEnd(u,V[j-2],&dot2);
142:       if (PetscAbsScalar(dot2/norm2) > PETSC_MACHINE_EPSILON) {
143:         *breakdown = PETSC_TRUE;
144:         *M = j-1;
145:         *beta = norm2;

147:         if (m>100) { PetscFree(lhh); }
148:         VecDestroy(w);
149:         VecDestroy(u);
150:         VecDestroy(t);
151:         return(0);
152:       }
153:     }
154: 
155:     if (j>k) {
156:       norm1 = sqrt(PetscRealPart(dot));
157:       for (i=0;i<j;i++)
158:         H[m*j+i] = H[m*j+i]/norm1;
159:       H[m*j+j] = H[m*j+j]/dot;
160: 
161:       VecCopy(V[j],t);
162:       VecScale(V[j],1.0/norm1);
163:       VecScale(f,1.0/norm1);
164:     }

166:     VecSet(w,0.0);
167:     VecMAXPY(w,j+1,H+m*j,V);
168:     VecAXPY(f,-1.0,w);

170:     if (j>k) {
171:       VecSet(w,0.0);
172:       VecMAXPY(w,j,lhh,V);
173:       VecAXPY(t,-1.0,w);
174:       for (i=0;i<j;i++)
175:         H[m*(j-1)+i] += lhh[i];
176:     }

178:     if (j>k+1) {
179:       VecCopy(u,V[j-1]);
180:       VecScale(V[j-1],1.0/norm2);
181:       H[m*(j-2)+j-1] = norm2;
182:     }

184:     if (j<m-1) {
185:       VecCopy(f,V[j+1]);
186:       VecCopy(t,u);
187:     }
188:   }

190:   STNorm(eps->OP,t,&norm2);
191:   VecScale(t,1.0/norm2);
192:   VecCopy(t,V[m-1]);
193:   H[m*(m-2)+m-1] = norm2;

195:   STMInnerProduct(eps->OP,m,f,V,lhh);
196: 
197:   VecSet(w,0.0);
198:   VecMAXPY(w,m,lhh,V);
199:   VecAXPY(f,-1.0,w);
200:   for (i=0;i<m;i++)
201:     H[m*(m-1)+i] += lhh[i];

203:   STNorm(eps->OP,f,beta);
204:   VecScale(f,1.0 / *beta);
205:   *breakdown = PETSC_FALSE;
206: 
207:   if (m>100) { PetscFree(lhh); }
208:   VecDestroy(w);
209:   VecDestroy(u);
210:   VecDestroy(t);

212:   return(0);
213: }

217: /*
218:    EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi1,
219:    but without reorthogonalization (only delayed normalization).
220: */
221: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta,PetscTruth *breakdown)
222: {
224:   int            i,j,m=*M;
225:   Vec            w;
226:   PetscScalar    dot;
227:   PetscReal      norm=0.0;

230:   VecDuplicate(f,&w);

232:   for (j=k;j<m;j++) {
233:     STApply(eps->OP,V[j],f);
234:     EPSOrthogonalize(eps,eps->nds,PETSC_NULL,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);

236:     STMInnerProductBegin(eps->OP,j+1,f,V,H+m*j);
237:     if (j>k) {
238:       STInnerProductBegin(eps->OP,V[j],V[j],&dot);
239:     }
240: 
241:     STMInnerProductEnd(eps->OP,j+1,f,V,H+m*j);
242:     if (j>k) {
243:       STInnerProductEnd(eps->OP,V[j],V[j],&dot);
244:     }
245: 
246:     if (j>k) {
247:       norm = sqrt(PetscRealPart(dot));
248:       VecScale(V[j],1.0/norm);
249:       H[m*(j-1)+j] = norm;

251:       for (i=0;i<j;i++)
252:         H[m*j+i] = H[m*j+i]/norm;
253:       H[m*j+j] = H[m*j+j]/dot;
254:       VecScale(f,1.0/norm);
255:     }

257:     VecSet(w,0.0);
258:     VecMAXPY(w,j+1,H+m*j,V);
259:     VecAXPY(f,-1.0,w);

261:     if (j<m-1) {
262:       VecCopy(f,V[j+1]);
263:     }
264:   }

266:   STNorm(eps->OP,f,beta);
267:   VecScale(f,1.0 / *beta);
268:   *breakdown = PETSC_FALSE;
269: 
270:   VecDestroy(w);
271:   return(0);
272: }

276: /*
277:    EPSArnoldiResiduals - Computes the 2-norm of the residual vectors from
278:    the information provided by the m-step Arnoldi factorization,

280:                     OP * V - V * H = f * e_m^T

282:    For the approximate eigenpair (k_i,V*y_i), the residual norm is computed as
283:    |beta*y(end,i)| where beta is the norm of f and y is the corresponding 
284:    eigenvector of H.
285: */
286: PetscErrorCode ArnoldiResiduals(PetscScalar *H,int ldh,PetscScalar *U,PetscReal beta,int nconv,int ncv,PetscScalar *eigr,PetscScalar *eigi,PetscReal *errest,PetscScalar *work)
287: {
288: #if defined(SLEPC_MISSING_LAPACK_TREVC)
290:   SETERRQ(PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable.");
291: #else
293:   int            i,mout,info;
294:   PetscScalar    *Y=work+4*ncv;
295:   PetscReal      w;
296: #if defined(PETSC_USE_COMPLEX)
297:   PetscReal      *rwork=(PetscReal*)(work+3*ncv);
298: #endif


302:   /* Compute eigenvectors Y of H */
303:   PetscMemcpy(Y,U,ncv*ncv*sizeof(PetscScalar));
304: #if !defined(PETSC_USE_COMPLEX)
305:   LAPACKtrevc_("R","B",PETSC_NULL,&ncv,H,&ldh,PETSC_NULL,&ncv,Y,&ncv,&ncv,&mout,work,&info,1,1);
306: #else
307:   LAPACKtrevc_("R","B",PETSC_NULL,&ncv,H,&ldh,PETSC_NULL,&ncv,Y,&ncv,&ncv,&mout,work,rwork,&info,1,1);
308: #endif
309:   if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);

311:   /* Compute residual norm estimates as beta*abs(Y(m,:)) */
312:   for (i=nconv;i<ncv;i++) {
313: #if !defined(PETSC_USE_COMPLEX)
314:     if (eigi[i] != 0 && i<ncv-1) {
315:       errest[i] = beta*SlepcAbsEigenvalue(Y[i*ncv+ncv-1],Y[(i+1)*ncv+ncv-1]);
316:       w = SlepcAbsEigenvalue(eigr[i],eigi[i]);
317:       if (w > errest[i])
318:         errest[i] = errest[i] / w;
319:       errest[i+1] = errest[i];
320:       i++;
321:     } else
322: #endif
323:     {
324:       errest[i] = beta*PetscAbsScalar(Y[i*ncv+ncv-1]);
325:       w = PetscAbsScalar(eigr[i]);
326:       if (w > errest[i])
327:         errest[i] = errest[i] / w;
328:     }
329:   }
330:   return(0);
331: #endif
332: }

336: PetscErrorCode EPSSolve_ARNOLDI(EPS eps)
337: {
339:   int            i,k;
340:   Vec            f=eps->work[0];
341:   PetscScalar    *H=eps->T,*U,*work;
342:   PetscReal      beta;
343:   PetscTruth     breakdown;
344:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

347:   PetscMemzero(eps->T,eps->ncv*eps->ncv*sizeof(PetscScalar));
348:   PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&U);
349:   PetscMalloc((eps->ncv+4)*eps->ncv*sizeof(PetscScalar),&work);
350: 
351:   /* Get the starting Arnoldi vector */
352:   EPSGetStartVector(eps,0,eps->V[0],PETSC_NULL);
353: 
354:   /* Restart loop */
355:   while (eps->reason == EPS_CONVERGED_ITERATING) {
356:     eps->its++;

358:     /* Compute an nv-step Arnoldi factorization */
359:     eps->nv = eps->ncv;
360:     if (!arnoldi->delayed) {
361:       EPSBasicArnoldi(eps,PETSC_FALSE,H,eps->V,eps->nconv,&eps->nv,f,&beta,&breakdown);
362:     } else if (eps->orthog_ref == EPS_ORTH_REFINE_NEVER) {
363:       EPSDelayedArnoldi1(eps,H,eps->V,eps->nconv,&eps->nv,f,&beta,&breakdown);
364:     } else {
365:       EPSDelayedArnoldi(eps,H,eps->V,eps->nconv,&eps->nv,f,&beta,&breakdown);
366:     }

368:     /* Reduce H to (quasi-)triangular form, H <- U H U' */
369:     PetscMemzero(U,eps->nv*eps->nv*sizeof(PetscScalar));
370:     for (i=0;i<eps->nv;i++) { U[i*(eps->nv+1)] = 1.0; }
371:     EPSDenseSchur(eps->nv,eps->nconv,H,eps->ncv,U,eps->eigr,eps->eigi);

373:     /* Sort the remaining columns of the Schur form */
374:     EPSSortDenseSchur(eps->nv,eps->nconv,H,eps->ncv,U,eps->eigr,eps->eigi,eps->which);

376:     /* Compute residual norm estimates */
377:     ArnoldiResiduals(H,eps->ncv,U,beta,eps->nconv,eps->nv,eps->eigr,eps->eigi,eps->errest,work);
378: 
379:     /* Lock converged eigenpairs and update the corresponding vectors,
380:        including the restart vector: V(:,idx) = V*U(:,idx) */
381:     k = eps->nconv;
382:     while (k<eps->nv && eps->errest[k]<eps->tol) k++;
383:     for (i=eps->nconv;i<=k && i<eps->nv;i++) {
384:       VecSet(eps->AV[i],0.0);
385:       VecMAXPY(eps->AV[i],eps->nv,U+eps->nv*i,eps->V);
386:     }
387:     for (i=eps->nconv;i<=k && i<eps->nv;i++) {
388:       VecCopy(eps->AV[i],eps->V[i]);
389:     }
390:     eps->nconv = k;

392:     EPSMonitor(eps,eps->its,eps->nconv,eps->eigr,eps->eigi,eps->errest,eps->nv);
393:     if (breakdown) {
394:       PetscInfo2(eps,"Breakdown in Arnoldi method (it=%i norm=%g)\n",eps->its,beta);
395:       EPSGetStartVector(eps,k,eps->V[k],&breakdown);
396:       if (breakdown) {
397:         eps->reason = EPS_DIVERGED_BREAKDOWN;
398:         PetscInfo(eps,"Unable to generate more start vectors\n");
399:       }
400:     }
401:     if (eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
402:     if (eps->nconv >= eps->nev) eps->reason = EPS_CONVERGED_TOL;
403:   }
404: 
405:   PetscFree(U);
406:   PetscFree(work);
407:   return(0);
408: }

412: PetscErrorCode EPSSetFromOptions_ARNOLDI(EPS eps)
413: {
415:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

418:   PetscOptionsHead("ARNOLDI options");
419:   PetscOptionsTruth("-eps_arnoldi_delayed","Arnoldi with delayed reorthogonalization","EPSArnoldiSetDelayed",PETSC_FALSE,&arnoldi->delayed,PETSC_NULL);
420:   PetscOptionsTail();
421:   return(0);
422: }

427: PetscErrorCode EPSArnoldiSetDelayed_ARNOLDI(EPS eps,PetscTruth delayed)
428: {
429:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

432:   arnoldi->delayed = delayed;
433:   return(0);
434: }

439: /*@
440:    EPSArnoldiSetDelayed - Activates or deactivates delayed reorthogonalization 
441:    in the Arnoldi iteration. 

443:    Collective on EPS

445:    Input Parameters:
446: +  eps - the eigenproblem solver context
447: -  delayed - boolean flag

449:    Options Database Key:
450: .  -eps_arnoldi_delayed - Activates delayed reorthogonalization in Arnoldi
451:    
452:    Note:
453:    Delayed reorthogonalization is an aggressive optimization for the Arnoldi
454:    eigensolver than may provide better scalability, but sometimes makes the
455:    solver converge less than the default algorithm.

457:    Level: advanced

459: .seealso: EPSArnoldiGetDelayed()
460: @*/
461: PetscErrorCode EPSArnoldiSetDelayed(EPS eps,PetscTruth delayed)
462: {
463:   PetscErrorCode ierr, (*f)(EPS,PetscTruth);

467:   PetscObjectQueryFunction((PetscObject)eps,"EPSArnoldiSetDelayed_C",(void (**)())&f);
468:   if (f) {
469:     (*f)(eps,delayed);
470:   }
471:   return(0);
472: }

477: PetscErrorCode EPSArnoldiGetDelayed_ARNOLDI(EPS eps,PetscTruth *delayed)
478: {
479:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

482:   *delayed = arnoldi->delayed;
483:   return(0);
484: }

489: /*@C
490:    EPSArnoldiGetDelayed - Gets the type of reorthogonalization used during the Arnoldi
491:    iteration. 

493:    Collective on EPS

495:    Input Parameter:
496: .  eps - the eigenproblem solver context

498:    Input Parameter:
499: .  delayed - boolean flag indicating if delayed reorthogonalization has been enabled

501:    Level: advanced

503: .seealso: EPSArnoldiSetDelayed()
504: @*/
505: PetscErrorCode EPSArnoldiGetDelayed(EPS eps,PetscTruth *delayed)
506: {
507:   PetscErrorCode ierr, (*f)(EPS,PetscTruth*);

511:   PetscObjectQueryFunction((PetscObject)eps,"EPSArnoldiGetDelayed_C",(void (**)())&f);
512:   if (f) {
513:     (*f)(eps,delayed);
514:   }
515:   return(0);
516: }

520: PetscErrorCode EPSView_ARNOLDI(EPS eps,PetscViewer viewer)
521: {
523:   PetscTruth     isascii;
524:   EPS_ARNOLDI    *arnoldi = (EPS_ARNOLDI *)eps->data;

527:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
528:   if (!isascii) {
529:     SETERRQ1(1,"Viewer type %s not supported for EPSARNOLDI",((PetscObject)viewer)->type_name);
530:   }
531:   if (arnoldi->delayed) {
532:     PetscViewerASCIIPrintf(viewer,"using delayed reorthogonalization\n");
533:   }
534:   return(0);
535: }

537: EXTERN PetscErrorCode EPSSolve_TS_ARNOLDI(EPS);

542: PetscErrorCode EPSCreate_ARNOLDI(EPS eps)
543: {
545:   EPS_ARNOLDI    *arnoldi;
546: 
548:   PetscNew(EPS_ARNOLDI,&arnoldi);
549:   PetscLogObjectMemory(eps,sizeof(EPS_ARNOLDI));
550:   eps->data                      = (void *)arnoldi;
551:   eps->ops->solve                = EPSSolve_ARNOLDI;
552:   eps->ops->solvets              = EPSSolve_TS_ARNOLDI;
553:   eps->ops->setup                = EPSSetUp_ARNOLDI;
554:   eps->ops->setfromoptions       = EPSSetFromOptions_ARNOLDI;
555:   eps->ops->destroy              = EPSDestroy_Default;
556:   eps->ops->view                 = EPSView_ARNOLDI;
557:   eps->ops->backtransform        = EPSBackTransform_Default;
558:   eps->ops->computevectors       = EPSComputeVectors_Schur;
559:   arnoldi->delayed               = PETSC_FALSE;
560:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiSetDelayed_C","EPSArnoldiSetDelayed_ARNOLDI",EPSArnoldiSetDelayed_ARNOLDI);
561:   PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiGetDelayed_C","EPSArnoldiGetDelayed_ARNOLDI",EPSArnoldiGetDelayed_ARNOLDI);
562:   return(0);
563: }