Actual source code: arnoldi.c
1: /*
3: SLEPc eigensolver: "arnoldi"
5: Method: Explicitly Restarted Arnoldi
7: Algorithm:
9: Arnoldi method with explicit restart and deflation.
11: References:
13: [1] "Arnoldi Methods in SLEPc", SLEPc Technical Report STR-4,
14: available at http://www.grycap.upv.es/slepc.
16: Last update: Oct 2006
18: */
19: #include src/eps/epsimpl.h
20: #include slepcblaslapack.h
22: typedef struct {
23: PetscTruth delayed;
24: } EPS_ARNOLDI;
28: PetscErrorCode EPSSetUp_ARNOLDI(EPS eps)
29: {
31: PetscInt N;
34: VecGetSize(eps->vec_initial,&N);
35: if (eps->ncv) {
36: if (eps->ncv<eps->nev) SETERRQ(1,"The value of ncv must be at least nev");
37: }
38: else eps->ncv = PetscMin(N,PetscMax(2*eps->nev,eps->nev+15));
39: if (!eps->max_it) eps->max_it = PetscMax(100,2*N/eps->ncv);
40: if (eps->ishermitian && (eps->which==EPS_LARGEST_IMAGINARY || eps->which==EPS_SMALLEST_IMAGINARY))
41: SETERRQ(1,"Wrong value of eps->which");
43: EPSAllocateSolution(eps);
44: PetscFree(eps->T);
45: PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&eps->T);
46: if (eps->solverclass==EPS_TWO_SIDE) {
47: PetscFree(eps->Tl);
48: PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&eps->Tl);
49: EPSDefaultGetWork(eps,2);
50: }
51: else { EPSDefaultGetWork(eps,1); }
52: return(0);
53: }
57: /*
58: EPSBasicArnoldi - Computes an m-step Arnoldi factorization. The first k
59: columns are assumed to be locked and therefore they are not modified. On
60: exit, the following relation is satisfied:
62: OP * V - V * H = f * e_m^T
64: where the columns of V are the Arnoldi vectors (which are B-orthonormal),
65: H is an upper Hessenberg matrix, f is the residual vector and e_m is
66: the m-th vector of the canonical basis. The vector f is B-orthogonal to
67: the columns of V. On exit, beta contains the B-norm of f and the next
68: Arnoldi vector can be computed as v_{m+1} = f / beta.
69: */
70: PetscErrorCode EPSBasicArnoldi(EPS eps,PetscTruth trans,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta,PetscTruth *breakdown)
71: {
73: int j,m = *M;
74: PetscReal norm;
77: for (j=k;j<m-1;j++) {
78: if (trans) { STApplyTranspose(eps->OP,V[j],V[j+1]); }
79: else { STApply(eps->OP,V[j],V[j+1]); }
80: EPSOrthogonalize(eps,eps->nds,PETSC_NULL,eps->DS,V[j+1],PETSC_NULL,PETSC_NULL,PETSC_NULL);
81: EPSOrthogonalize(eps,j+1,PETSC_NULL,V,V[j+1],H+m*j,&norm,breakdown);
82: H[(m+1)*j+1] = norm;
83: if (*breakdown) {
84: *M = j+1;
85: *beta = norm;
86: return(0);
87: } else {
88: VecScale(V[j+1],1/norm);
89: }
90: }
91: STApply(eps->OP,V[m-1],f);
92: EPSOrthogonalize(eps,eps->nds,PETSC_NULL,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);
93: EPSOrthogonalize(eps,m,PETSC_NULL,V,f,H+m*(m-1),beta,PETSC_NULL);
94: return(0);
95: }
99: /*
100: EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
101: performs the computation in a different way. The main idea is that
102: reorthogonalization is delayed to the next Arnoldi step. This version is
103: more scalable but in some cases convergence may stagnate.
104: */
105: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta,PetscTruth *breakdown)
106: {
108: int i,j,m=*M;
109: Vec w,u,t;
110: PetscScalar shh[100],*lhh,dot,dot2;
111: PetscReal norm1=0.0,norm2;
114: if (m<=100) lhh = shh;
115: else { PetscMalloc(m*sizeof(PetscScalar),&lhh); }
116: VecDuplicate(f,&w);
117: VecDuplicate(f,&u);
118: VecDuplicate(f,&t);
120: for (j=k;j<m;j++) {
121: STApply(eps->OP,V[j],f);
122: EPSOrthogonalize(eps,eps->nds,PETSC_NULL,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);
124: STMInnerProductBegin(eps->OP,j+1,f,V,H+m*j);
125: if (j>k) {
126: STMInnerProductBegin(eps->OP,j,V[j],V,lhh);
127: STInnerProductBegin(eps->OP,V[j],V[j],&dot);
128: }
129: if (j>k+1) {
130: STNormBegin(eps->OP,u,&norm2);
131: VecDotBegin(u,V[j-2],&dot2);
132: }
133:
134: STMInnerProductEnd(eps->OP,j+1,f,V,H+m*j);
135: if (j>k) {
136: STMInnerProductEnd(eps->OP,j,V[j],V,lhh);
137: STInnerProductEnd(eps->OP,V[j],V[j],&dot);
138: }
139: if (j>k+1) {
140: STNormEnd(eps->OP,u,&norm2);
141: VecDotEnd(u,V[j-2],&dot2);
142: if (PetscAbsScalar(dot2/norm2) > PETSC_MACHINE_EPSILON) {
143: *breakdown = PETSC_TRUE;
144: *M = j-1;
145: *beta = norm2;
147: if (m>100) { PetscFree(lhh); }
148: VecDestroy(w);
149: VecDestroy(u);
150: VecDestroy(t);
151: return(0);
152: }
153: }
154:
155: if (j>k) {
156: norm1 = sqrt(PetscRealPart(dot));
157: for (i=0;i<j;i++)
158: H[m*j+i] = H[m*j+i]/norm1;
159: H[m*j+j] = H[m*j+j]/dot;
160:
161: VecCopy(V[j],t);
162: VecScale(V[j],1.0/norm1);
163: VecScale(f,1.0/norm1);
164: }
166: VecSet(w,0.0);
167: VecMAXPY(w,j+1,H+m*j,V);
168: VecAXPY(f,-1.0,w);
170: if (j>k) {
171: VecSet(w,0.0);
172: VecMAXPY(w,j,lhh,V);
173: VecAXPY(t,-1.0,w);
174: for (i=0;i<j;i++)
175: H[m*(j-1)+i] += lhh[i];
176: }
178: if (j>k+1) {
179: VecCopy(u,V[j-1]);
180: VecScale(V[j-1],1.0/norm2);
181: H[m*(j-2)+j-1] = norm2;
182: }
184: if (j<m-1) {
185: VecCopy(f,V[j+1]);
186: VecCopy(t,u);
187: }
188: }
190: STNorm(eps->OP,t,&norm2);
191: VecScale(t,1.0/norm2);
192: VecCopy(t,V[m-1]);
193: H[m*(m-2)+m-1] = norm2;
195: STMInnerProduct(eps->OP,m,f,V,lhh);
196:
197: VecSet(w,0.0);
198: VecMAXPY(w,m,lhh,V);
199: VecAXPY(f,-1.0,w);
200: for (i=0;i<m;i++)
201: H[m*(m-1)+i] += lhh[i];
203: STNorm(eps->OP,f,beta);
204: VecScale(f,1.0 / *beta);
205: *breakdown = PETSC_FALSE;
206:
207: if (m>100) { PetscFree(lhh); }
208: VecDestroy(w);
209: VecDestroy(u);
210: VecDestroy(t);
212: return(0);
213: }
217: /*
218: EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi1,
219: but without reorthogonalization (only delayed normalization).
220: */
221: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,Vec *V,int k,int *M,Vec f,PetscReal *beta,PetscTruth *breakdown)
222: {
224: int i,j,m=*M;
225: Vec w;
226: PetscScalar dot;
227: PetscReal norm=0.0;
230: VecDuplicate(f,&w);
232: for (j=k;j<m;j++) {
233: STApply(eps->OP,V[j],f);
234: EPSOrthogonalize(eps,eps->nds,PETSC_NULL,eps->DS,f,PETSC_NULL,PETSC_NULL,PETSC_NULL);
236: STMInnerProductBegin(eps->OP,j+1,f,V,H+m*j);
237: if (j>k) {
238: STInnerProductBegin(eps->OP,V[j],V[j],&dot);
239: }
240:
241: STMInnerProductEnd(eps->OP,j+1,f,V,H+m*j);
242: if (j>k) {
243: STInnerProductEnd(eps->OP,V[j],V[j],&dot);
244: }
245:
246: if (j>k) {
247: norm = sqrt(PetscRealPart(dot));
248: VecScale(V[j],1.0/norm);
249: H[m*(j-1)+j] = norm;
251: for (i=0;i<j;i++)
252: H[m*j+i] = H[m*j+i]/norm;
253: H[m*j+j] = H[m*j+j]/dot;
254: VecScale(f,1.0/norm);
255: }
257: VecSet(w,0.0);
258: VecMAXPY(w,j+1,H+m*j,V);
259: VecAXPY(f,-1.0,w);
261: if (j<m-1) {
262: VecCopy(f,V[j+1]);
263: }
264: }
266: STNorm(eps->OP,f,beta);
267: VecScale(f,1.0 / *beta);
268: *breakdown = PETSC_FALSE;
269:
270: VecDestroy(w);
271: return(0);
272: }
276: /*
277: EPSArnoldiResiduals - Computes the 2-norm of the residual vectors from
278: the information provided by the m-step Arnoldi factorization,
280: OP * V - V * H = f * e_m^T
282: For the approximate eigenpair (k_i,V*y_i), the residual norm is computed as
283: |beta*y(end,i)| where beta is the norm of f and y is the corresponding
284: eigenvector of H.
285: */
286: PetscErrorCode ArnoldiResiduals(PetscScalar *H,int ldh,PetscScalar *U,PetscReal beta,int nconv,int ncv,PetscScalar *eigr,PetscScalar *eigi,PetscReal *errest,PetscScalar *work)
287: {
288: #if defined(SLEPC_MISSING_LAPACK_TREVC)
290: SETERRQ(PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable.");
291: #else
293: int i,mout,info;
294: PetscScalar *Y=work+4*ncv;
295: PetscReal w;
296: #if defined(PETSC_USE_COMPLEX)
297: PetscReal *rwork=(PetscReal*)(work+3*ncv);
298: #endif
302: /* Compute eigenvectors Y of H */
303: PetscMemcpy(Y,U,ncv*ncv*sizeof(PetscScalar));
304: #if !defined(PETSC_USE_COMPLEX)
305: LAPACKtrevc_("R","B",PETSC_NULL,&ncv,H,&ldh,PETSC_NULL,&ncv,Y,&ncv,&ncv,&mout,work,&info,1,1);
306: #else
307: LAPACKtrevc_("R","B",PETSC_NULL,&ncv,H,&ldh,PETSC_NULL,&ncv,Y,&ncv,&ncv,&mout,work,rwork,&info,1,1);
308: #endif
309: if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);
311: /* Compute residual norm estimates as beta*abs(Y(m,:)) */
312: for (i=nconv;i<ncv;i++) {
313: #if !defined(PETSC_USE_COMPLEX)
314: if (eigi[i] != 0 && i<ncv-1) {
315: errest[i] = beta*SlepcAbsEigenvalue(Y[i*ncv+ncv-1],Y[(i+1)*ncv+ncv-1]);
316: w = SlepcAbsEigenvalue(eigr[i],eigi[i]);
317: if (w > errest[i])
318: errest[i] = errest[i] / w;
319: errest[i+1] = errest[i];
320: i++;
321: } else
322: #endif
323: {
324: errest[i] = beta*PetscAbsScalar(Y[i*ncv+ncv-1]);
325: w = PetscAbsScalar(eigr[i]);
326: if (w > errest[i])
327: errest[i] = errest[i] / w;
328: }
329: }
330: return(0);
331: #endif
332: }
336: PetscErrorCode EPSSolve_ARNOLDI(EPS eps)
337: {
339: int i,k;
340: Vec f=eps->work[0];
341: PetscScalar *H=eps->T,*U,*work;
342: PetscReal beta;
343: PetscTruth breakdown;
344: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
347: PetscMemzero(eps->T,eps->ncv*eps->ncv*sizeof(PetscScalar));
348: PetscMalloc(eps->ncv*eps->ncv*sizeof(PetscScalar),&U);
349: PetscMalloc((eps->ncv+4)*eps->ncv*sizeof(PetscScalar),&work);
350:
351: /* Get the starting Arnoldi vector */
352: EPSGetStartVector(eps,0,eps->V[0],PETSC_NULL);
353:
354: /* Restart loop */
355: while (eps->reason == EPS_CONVERGED_ITERATING) {
356: eps->its++;
358: /* Compute an nv-step Arnoldi factorization */
359: eps->nv = eps->ncv;
360: if (!arnoldi->delayed) {
361: EPSBasicArnoldi(eps,PETSC_FALSE,H,eps->V,eps->nconv,&eps->nv,f,&beta,&breakdown);
362: } else if (eps->orthog_ref == EPS_ORTH_REFINE_NEVER) {
363: EPSDelayedArnoldi1(eps,H,eps->V,eps->nconv,&eps->nv,f,&beta,&breakdown);
364: } else {
365: EPSDelayedArnoldi(eps,H,eps->V,eps->nconv,&eps->nv,f,&beta,&breakdown);
366: }
368: /* Reduce H to (quasi-)triangular form, H <- U H U' */
369: PetscMemzero(U,eps->nv*eps->nv*sizeof(PetscScalar));
370: for (i=0;i<eps->nv;i++) { U[i*(eps->nv+1)] = 1.0; }
371: EPSDenseSchur(eps->nv,eps->nconv,H,eps->ncv,U,eps->eigr,eps->eigi);
373: /* Sort the remaining columns of the Schur form */
374: EPSSortDenseSchur(eps->nv,eps->nconv,H,eps->ncv,U,eps->eigr,eps->eigi,eps->which);
376: /* Compute residual norm estimates */
377: ArnoldiResiduals(H,eps->ncv,U,beta,eps->nconv,eps->nv,eps->eigr,eps->eigi,eps->errest,work);
378:
379: /* Lock converged eigenpairs and update the corresponding vectors,
380: including the restart vector: V(:,idx) = V*U(:,idx) */
381: k = eps->nconv;
382: while (k<eps->nv && eps->errest[k]<eps->tol) k++;
383: for (i=eps->nconv;i<=k && i<eps->nv;i++) {
384: VecSet(eps->AV[i],0.0);
385: VecMAXPY(eps->AV[i],eps->nv,U+eps->nv*i,eps->V);
386: }
387: for (i=eps->nconv;i<=k && i<eps->nv;i++) {
388: VecCopy(eps->AV[i],eps->V[i]);
389: }
390: eps->nconv = k;
392: EPSMonitor(eps,eps->its,eps->nconv,eps->eigr,eps->eigi,eps->errest,eps->nv);
393: if (breakdown) {
394: PetscInfo2(eps,"Breakdown in Arnoldi method (it=%i norm=%g)\n",eps->its,beta);
395: EPSGetStartVector(eps,k,eps->V[k],&breakdown);
396: if (breakdown) {
397: eps->reason = EPS_DIVERGED_BREAKDOWN;
398: PetscInfo(eps,"Unable to generate more start vectors\n");
399: }
400: }
401: if (eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
402: if (eps->nconv >= eps->nev) eps->reason = EPS_CONVERGED_TOL;
403: }
404:
405: PetscFree(U);
406: PetscFree(work);
407: return(0);
408: }
412: PetscErrorCode EPSSetFromOptions_ARNOLDI(EPS eps)
413: {
415: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
418: PetscOptionsHead("ARNOLDI options");
419: PetscOptionsTruth("-eps_arnoldi_delayed","Arnoldi with delayed reorthogonalization","EPSArnoldiSetDelayed",PETSC_FALSE,&arnoldi->delayed,PETSC_NULL);
420: PetscOptionsTail();
421: return(0);
422: }
427: PetscErrorCode EPSArnoldiSetDelayed_ARNOLDI(EPS eps,PetscTruth delayed)
428: {
429: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
432: arnoldi->delayed = delayed;
433: return(0);
434: }
439: /*@
440: EPSArnoldiSetDelayed - Activates or deactivates delayed reorthogonalization
441: in the Arnoldi iteration.
443: Collective on EPS
445: Input Parameters:
446: + eps - the eigenproblem solver context
447: - delayed - boolean flag
449: Options Database Key:
450: . -eps_arnoldi_delayed - Activates delayed reorthogonalization in Arnoldi
451:
452: Note:
453: Delayed reorthogonalization is an aggressive optimization for the Arnoldi
454: eigensolver than may provide better scalability, but sometimes makes the
455: solver converge less than the default algorithm.
457: Level: advanced
459: .seealso: EPSArnoldiGetDelayed()
460: @*/
461: PetscErrorCode EPSArnoldiSetDelayed(EPS eps,PetscTruth delayed)
462: {
463: PetscErrorCode ierr, (*f)(EPS,PetscTruth);
467: PetscObjectQueryFunction((PetscObject)eps,"EPSArnoldiSetDelayed_C",(void (**)())&f);
468: if (f) {
469: (*f)(eps,delayed);
470: }
471: return(0);
472: }
477: PetscErrorCode EPSArnoldiGetDelayed_ARNOLDI(EPS eps,PetscTruth *delayed)
478: {
479: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
482: *delayed = arnoldi->delayed;
483: return(0);
484: }
489: /*@C
490: EPSArnoldiGetDelayed - Gets the type of reorthogonalization used during the Arnoldi
491: iteration.
493: Collective on EPS
495: Input Parameter:
496: . eps - the eigenproblem solver context
498: Input Parameter:
499: . delayed - boolean flag indicating if delayed reorthogonalization has been enabled
501: Level: advanced
503: .seealso: EPSArnoldiSetDelayed()
504: @*/
505: PetscErrorCode EPSArnoldiGetDelayed(EPS eps,PetscTruth *delayed)
506: {
507: PetscErrorCode ierr, (*f)(EPS,PetscTruth*);
511: PetscObjectQueryFunction((PetscObject)eps,"EPSArnoldiGetDelayed_C",(void (**)())&f);
512: if (f) {
513: (*f)(eps,delayed);
514: }
515: return(0);
516: }
520: PetscErrorCode EPSView_ARNOLDI(EPS eps,PetscViewer viewer)
521: {
523: PetscTruth isascii;
524: EPS_ARNOLDI *arnoldi = (EPS_ARNOLDI *)eps->data;
527: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
528: if (!isascii) {
529: SETERRQ1(1,"Viewer type %s not supported for EPSARNOLDI",((PetscObject)viewer)->type_name);
530: }
531: if (arnoldi->delayed) {
532: PetscViewerASCIIPrintf(viewer,"using delayed reorthogonalization\n");
533: }
534: return(0);
535: }
537: EXTERN PetscErrorCode EPSSolve_TS_ARNOLDI(EPS);
542: PetscErrorCode EPSCreate_ARNOLDI(EPS eps)
543: {
545: EPS_ARNOLDI *arnoldi;
546:
548: PetscNew(EPS_ARNOLDI,&arnoldi);
549: PetscLogObjectMemory(eps,sizeof(EPS_ARNOLDI));
550: eps->data = (void *)arnoldi;
551: eps->ops->solve = EPSSolve_ARNOLDI;
552: eps->ops->solvets = EPSSolve_TS_ARNOLDI;
553: eps->ops->setup = EPSSetUp_ARNOLDI;
554: eps->ops->setfromoptions = EPSSetFromOptions_ARNOLDI;
555: eps->ops->destroy = EPSDestroy_Default;
556: eps->ops->view = EPSView_ARNOLDI;
557: eps->ops->backtransform = EPSBackTransform_Default;
558: eps->ops->computevectors = EPSComputeVectors_Schur;
559: arnoldi->delayed = PETSC_FALSE;
560: PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiSetDelayed_C","EPSArnoldiSetDelayed_ARNOLDI",EPSArnoldiSetDelayed_ARNOLDI);
561: PetscObjectComposeFunctionDynamic((PetscObject)eps,"EPSArnoldiGetDelayed_C","EPSArnoldiGetDelayed_ARNOLDI",EPSArnoldiGetDelayed_ARNOLDI);
562: return(0);
563: }