Actual source code: ks-slice.c
slepc-3.18.3 2023-03-24
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: SLEPc eigensolver: "krylovschur"
13: Method: Krylov-Schur with spectrum slicing for symmetric eigenproblems
15: References:
17: [1] R.G. Grimes et al., "A shifted block Lanczos algorithm for
18: solving sparse symmetric generalized eigenproblems", SIAM J.
19: Matrix Anal. Appl. 15(1):228-272, 1994.
21: [2] C. Campos and J.E. Roman, "Spectrum slicing strategies based
22: on restarted Lanczos methods", Numer. Algor. 60(2):279-295,
23: 2012.
24: */
26: #include <slepc/private/epsimpl.h>
27: #include "krylovschur.h"
29: static PetscBool cited = PETSC_FALSE;
30: static const char citation[] =
31: "@Article{slepc-slice,\n"
32: " author = \"C. Campos and J. E. Roman\",\n"
33: " title = \"Strategies for spectrum slicing based on restarted {Lanczos} methods\",\n"
34: " journal = \"Numer. Algorithms\",\n"
35: " volume = \"60\",\n"
36: " number = \"2\",\n"
37: " pages = \"279--295\",\n"
38: " year = \"2012,\"\n"
39: " doi = \"https://doi.org/10.1007/s11075-012-9564-z\"\n"
40: "}\n";
42: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON
44: static PetscErrorCode EPSSliceResetSR(EPS eps)
45: {
46: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
47: EPS_SR sr=ctx->sr;
48: EPS_shift s;
50: if (sr) {
51: if (ctx->npart>1) {
52: BVDestroy(&sr->V);
53: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
54: }
55: /* Reviewing list of shifts to free memory */
56: s = sr->s0;
57: if (s) {
58: while (s->neighb[1]) {
59: s = s->neighb[1];
60: PetscFree(s->neighb[0]);
61: }
62: PetscFree(s);
63: }
64: PetscFree(sr);
65: }
66: ctx->sr = NULL;
67: return 0;
68: }
70: PetscErrorCode EPSReset_KrylovSchur_Slice(EPS eps)
71: {
72: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
74: if (!ctx->global) return 0;
75: /* Reset auxiliary EPS */
76: EPSSliceResetSR(ctx->eps);
77: EPSReset(ctx->eps);
78: EPSSliceResetSR(eps);
79: PetscFree(ctx->inertias);
80: PetscFree(ctx->shifts);
81: return 0;
82: }
84: PetscErrorCode EPSDestroy_KrylovSchur_Slice(EPS eps)
85: {
86: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
88: if (!ctx->global) return 0;
89: /* Destroy auxiliary EPS */
90: EPSReset_KrylovSchur_Slice(eps);
91: EPSDestroy(&ctx->eps);
92: if (ctx->npart>1) {
93: PetscSubcommDestroy(&ctx->subc);
94: if (ctx->commset) {
95: MPI_Comm_free(&ctx->commrank);
96: ctx->commset = PETSC_FALSE;
97: }
98: ISDestroy(&ctx->isrow);
99: ISDestroy(&ctx->iscol);
100: MatDestroyMatrices(1,&ctx->submata);
101: MatDestroyMatrices(1,&ctx->submatb);
102: }
103: PetscFree(ctx->subintervals);
104: PetscFree(ctx->nconv_loc);
105: return 0;
106: }
108: /*
109: EPSSliceAllocateSolution - Allocate memory storage for common variables such
110: as eigenvalues and eigenvectors. The argument extra is used for methods
111: that require a working basis slightly larger than ncv.
112: */
113: static PetscErrorCode EPSSliceAllocateSolution(EPS eps,PetscInt extra)
114: {
115: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
116: PetscReal eta;
117: PetscInt k;
118: BVType type;
119: BVOrthogType orthog_type;
120: BVOrthogRefineType orthog_ref;
121: BVOrthogBlockType ob_type;
122: Mat matrix;
123: Vec t;
124: EPS_SR sr = ctx->sr;
126: /* allocate space for eigenvalues and friends */
127: k = PetscMax(1,sr->numEigs);
128: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
129: PetscMalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);
131: /* allocate sr->V and transfer options from eps->V */
132: BVDestroy(&sr->V);
133: BVCreate(PetscObjectComm((PetscObject)eps),&sr->V);
134: if (!eps->V) EPSGetBV(eps,&eps->V);
135: if (!((PetscObject)(eps->V))->type_name) BVSetType(sr->V,BVSVEC);
136: else {
137: BVGetType(eps->V,&type);
138: BVSetType(sr->V,type);
139: }
140: STMatCreateVecsEmpty(eps->st,&t,NULL);
141: BVSetSizesFromVec(sr->V,t,k);
142: VecDestroy(&t);
143: EPS_SetInnerProduct(eps);
144: BVGetMatrix(eps->V,&matrix,NULL);
145: BVSetMatrix(sr->V,matrix,PETSC_FALSE);
146: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
147: BVSetOrthogonalization(sr->V,orthog_type,orthog_ref,eta,ob_type);
148: return 0;
149: }
151: static PetscErrorCode EPSSliceGetEPS(EPS eps)
152: {
153: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data,*ctx_local;
154: BV V;
155: BVType type;
156: PetscReal eta;
157: BVOrthogType orthog_type;
158: BVOrthogRefineType orthog_ref;
159: BVOrthogBlockType ob_type;
160: PetscInt i;
161: PetscReal h,a,b;
162: PetscRandom rand;
163: EPS_SR sr=ctx->sr;
165: if (!ctx->eps) EPSKrylovSchurGetChildEPS(eps,&ctx->eps);
167: /* Determine subintervals */
168: if (ctx->npart==1) {
169: a = eps->inta; b = eps->intb;
170: } else {
171: if (!ctx->subintset) { /* uniform distribution if no set by user */
173: h = (eps->intb-eps->inta)/ctx->npart;
174: a = eps->inta+ctx->subc->color*h;
175: b = (ctx->subc->color==ctx->npart-1)?eps->intb:eps->inta+(ctx->subc->color+1)*h;
176: PetscFree(ctx->subintervals);
177: PetscMalloc1(ctx->npart+1,&ctx->subintervals);
178: for (i=0;i<ctx->npart;i++) ctx->subintervals[i] = eps->inta+h*i;
179: ctx->subintervals[ctx->npart] = eps->intb;
180: } else {
181: a = ctx->subintervals[ctx->subc->color];
182: b = ctx->subintervals[ctx->subc->color+1];
183: }
184: }
185: EPSSetInterval(ctx->eps,a,b);
186: EPSSetConvergenceTest(ctx->eps,eps->conv);
187: EPSSetDimensions(ctx->eps,ctx->nev,ctx->ncv,ctx->mpd);
188: EPSKrylovSchurSetLocking(ctx->eps,ctx->lock);
190: ctx_local = (EPS_KRYLOVSCHUR*)ctx->eps->data;
191: ctx_local->detect = ctx->detect;
193: /* transfer options from eps->V */
194: EPSGetBV(ctx->eps,&V);
195: BVGetRandomContext(V,&rand); /* make sure the random context is available when duplicating */
196: if (!eps->V) EPSGetBV(eps,&eps->V);
197: if (!((PetscObject)(eps->V))->type_name) BVSetType(V,BVSVEC);
198: else {
199: BVGetType(eps->V,&type);
200: BVSetType(V,type);
201: }
202: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
203: BVSetOrthogonalization(V,orthog_type,orthog_ref,eta,ob_type);
205: ctx->eps->which = eps->which;
206: ctx->eps->max_it = eps->max_it;
207: ctx->eps->tol = eps->tol;
208: ctx->eps->purify = eps->purify;
209: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL;
210: EPSSetProblemType(ctx->eps,eps->problem_type);
211: EPSSetUp(ctx->eps);
212: ctx->eps->nconv = 0;
213: ctx->eps->its = 0;
214: for (i=0;i<ctx->eps->ncv;i++) {
215: ctx->eps->eigr[i] = 0.0;
216: ctx->eps->eigi[i] = 0.0;
217: ctx->eps->errest[i] = 0.0;
218: }
219: return 0;
220: }
222: static PetscErrorCode EPSSliceGetInertia(EPS eps,PetscReal shift,PetscInt *inertia,PetscInt *zeros)
223: {
224: KSP ksp,kspr;
225: PC pc;
226: Mat F;
227: PetscReal nzshift=shift;
228: PetscBool flg;
230: if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
231: if (inertia) *inertia = eps->n;
232: } else if (shift <= PETSC_MIN_REAL) {
233: if (inertia) *inertia = 0;
234: if (zeros) *zeros = 0;
235: } else {
236: /* If the shift is zero, perturb it to a very small positive value.
237: The goal is that the nonzero pattern is the same in all cases and reuse
238: the symbolic factorizations */
239: nzshift = (shift==0.0)? 10.0/PETSC_MAX_REAL: shift;
240: STSetShift(eps->st,nzshift);
241: STGetKSP(eps->st,&ksp);
242: KSPGetPC(ksp,&pc);
243: PetscObjectTypeCompare((PetscObject)pc,PCREDUNDANT,&flg);
244: if (flg) {
245: PCRedundantGetKSP(pc,&kspr);
246: KSPGetPC(kspr,&pc);
247: }
248: PCFactorGetMatrix(pc,&F);
249: MatGetInertia(F,inertia,zeros,NULL);
250: }
251: if (inertia) PetscInfo(eps,"Computed inertia at shift %g: %" PetscInt_FMT "\n",(double)nzshift,*inertia);
252: return 0;
253: }
255: /*
256: Dummy backtransform operation
257: */
258: static PetscErrorCode EPSBackTransform_Skip(EPS eps)
259: {
260: return 0;
261: }
263: PetscErrorCode EPSSetUp_KrylovSchur_Slice(EPS eps)
264: {
265: EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data,*ctx_glob;
266: EPS_SR sr,sr_loc,sr_glob;
267: PetscInt nEigs,dssz=1,i,zeros=0,off=0,method,hiteig=0;
268: PetscMPIInt nproc,rank=0,aux;
269: PetscReal r;
270: MPI_Request req;
271: Mat A,B=NULL;
272: DSParallelType ptype;
273: MPI_Comm child;
275: if (ctx->global) {
276: EPSCheckHermitianDefiniteCondition(eps,PETSC_TRUE," with spectrum slicing");
277: EPSCheckSinvertCayleyCondition(eps,PETSC_TRUE," with spectrum slicing");
281: EPSCheckUnsupportedCondition(eps,EPS_FEATURE_ARBITRARY | EPS_FEATURE_REGION | EPS_FEATURE_STOPPING,PETSC_TRUE," with spectrum slicing");
282: EPSCheckIgnoredCondition(eps,EPS_FEATURE_BALANCE,PETSC_TRUE," with spectrum slicing");
283: if (eps->tol==PETSC_DEFAULT) {
284: #if defined(PETSC_USE_REAL_SINGLE)
285: eps->tol = SLEPC_DEFAULT_TOL;
286: #else
287: /* use tighter tolerance */
288: eps->tol = SLEPC_DEFAULT_TOL*1e-2;
289: #endif
290: }
291: if (eps->max_it==PETSC_DEFAULT) eps->max_it = 100;
292: if (ctx->nev==1) ctx->nev = PetscMin(40,eps->n); /* nev not set, use default value */
294: }
295: eps->ops->backtransform = EPSBackTransform_Skip;
297: /* create spectrum slicing context and initialize it */
298: EPSSliceResetSR(eps);
299: PetscNew(&sr);
300: ctx->sr = sr;
301: sr->itsKs = 0;
302: sr->nleap = 0;
303: sr->nMAXCompl = eps->nev/4;
304: sr->iterCompl = eps->max_it/4;
305: sr->sPres = NULL;
306: sr->nS = 0;
308: if (ctx->npart==1 || ctx->global) {
309: /* check presence of ends and finding direction */
310: if ((eps->inta > PETSC_MIN_REAL && !(ctx->subintervals && ctx->subintervals[0]==ctx->subintervals[1])) || eps->intb >= PETSC_MAX_REAL) {
311: sr->int0 = eps->inta;
312: sr->int1 = eps->intb;
313: sr->dir = 1;
314: if (eps->intb >= PETSC_MAX_REAL) { /* Right-open interval */
315: sr->hasEnd = PETSC_FALSE;
316: } else sr->hasEnd = PETSC_TRUE;
317: } else {
318: sr->int0 = eps->intb;
319: sr->int1 = eps->inta;
320: sr->dir = -1;
321: sr->hasEnd = PetscNot(eps->inta <= PETSC_MIN_REAL);
322: }
323: }
324: if (ctx->global) {
325: EPSSetDimensions_Default(eps,ctx->nev,&ctx->ncv,&ctx->mpd);
326: /* create subintervals and initialize auxiliary eps for slicing runs */
327: EPSKrylovSchurGetChildEPS(eps,&ctx->eps);
328: /* prevent computation of factorization in global eps */
329: STSetTransform(eps->st,PETSC_FALSE);
330: EPSSliceGetEPS(eps);
331: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
332: if (ctx->npart>1) {
333: PetscSubcommGetChild(ctx->subc,&child);
334: if ((sr->dir>0&&ctx->subc->color==0)||(sr->dir<0&&ctx->subc->color==ctx->npart-1)) sr->inertia0 = sr_loc->inertia0;
335: MPI_Comm_rank(child,&rank);
336: if (!rank) MPI_Bcast(&sr->inertia0,1,MPIU_INT,(sr->dir>0)?0:ctx->npart-1,ctx->commrank);
337: MPI_Bcast(&sr->inertia0,1,MPIU_INT,0,child);
338: PetscFree(ctx->nconv_loc);
339: PetscMalloc1(ctx->npart,&ctx->nconv_loc);
340: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
341: if (sr->dir<0) off = 1;
342: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
343: PetscMPIIntCast(sr_loc->numEigs,&aux);
344: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
345: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
346: } else {
347: MPI_Comm_rank(child,&rank);
348: if (!rank) {
349: PetscMPIIntCast(sr_loc->numEigs,&aux);
350: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
351: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
352: }
353: PetscMPIIntCast(ctx->npart,&aux);
354: MPI_Bcast(ctx->nconv_loc,aux,MPI_INT,0,child);
355: MPI_Bcast(ctx->subintervals+off,aux,MPIU_REAL,0,child);
356: }
357: nEigs = 0;
358: for (i=0;i<ctx->npart;i++) nEigs += ctx->nconv_loc[i];
359: } else {
360: nEigs = sr_loc->numEigs;
361: sr->inertia0 = sr_loc->inertia0;
362: sr->dir = sr_loc->dir;
363: }
364: sr->inertia1 = sr->inertia0+sr->dir*nEigs;
365: sr->numEigs = nEigs;
366: eps->nev = nEigs;
367: eps->ncv = nEigs;
368: eps->mpd = nEigs;
369: } else {
370: ctx_glob = (EPS_KRYLOVSCHUR*)ctx->eps->data;
371: sr_glob = ctx_glob->sr;
372: if (ctx->npart>1) {
373: sr->dir = sr_glob->dir;
374: sr->int0 = (sr->dir==1)?eps->inta:eps->intb;
375: sr->int1 = (sr->dir==1)?eps->intb:eps->inta;
376: if ((sr->dir>0&&ctx->subc->color==ctx->npart-1)||(sr->dir<0&&ctx->subc->color==0)) sr->hasEnd = sr_glob->hasEnd;
377: else sr->hasEnd = PETSC_TRUE;
378: }
379: /* sets first shift */
380: STSetShift(eps->st,(sr->int0==0.0)?10.0/PETSC_MAX_REAL:sr->int0);
381: STSetUp(eps->st);
383: /* compute inertia0 */
384: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL);
385: /* undocumented option to control what to do when an eigenvalue is found:
386: - error out if it's the endpoint of the user-provided interval (or sub-interval)
387: - if it's an endpoint computed internally:
388: + if hiteig=0 error out
389: + else if hiteig=1 the subgroup that hit the eigenvalue does nothing
390: + otherwise the subgroup that hit the eigenvalue perturbs the shift and recomputes inertia
391: */
392: PetscOptionsGetInt(NULL,NULL,"-eps_krylovschur_hiteigenvalue",&hiteig,NULL);
393: if (zeros) { /* error in factorization */
396: if (hiteig==1) { /* idle subgroup */
397: sr->inertia0 = -1;
398: } else { /* perturb shift */
399: sr->int0 *= (1.0+SLICE_PTOL);
400: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,&zeros);
402: }
403: }
404: if (ctx->npart>1) {
405: PetscSubcommGetChild(ctx->subc,&child);
406: /* inertia1 is received from neighbour */
407: MPI_Comm_rank(child,&rank);
408: if (!rank) {
409: if (sr->inertia0!=-1 && ((sr->dir>0 && ctx->subc->color>0) || (sr->dir<0 && ctx->subc->color<ctx->npart-1))) { /* send inertia0 to neighbour0 */
410: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
411: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
412: }
413: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)|| (sr->dir<0 && ctx->subc->color>0)) { /* receive inertia1 from neighbour1 */
414: MPI_Recv(&(sr->inertia1),1,MPIU_INT,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
415: MPI_Recv(&(sr->int1),1,MPIU_REAL,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
416: }
417: if (sr->inertia0==-1 && !(sr->dir>0 && ctx->subc->color==ctx->npart-1) && !(sr->dir<0 && ctx->subc->color==0)) {
418: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
419: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
420: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
421: }
422: }
423: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)||(sr->dir<0 && ctx->subc->color>0)) {
424: MPI_Bcast(&sr->inertia1,1,MPIU_INT,0,child);
425: MPI_Bcast(&sr->int1,1,MPIU_REAL,0,child);
426: } else sr_glob->inertia1 = sr->inertia1;
427: }
429: /* last process in eps comm computes inertia1 */
430: if (ctx->npart==1 || ((sr->dir>0 && ctx->subc->color==ctx->npart-1) || (sr->dir<0 && ctx->subc->color==0))) {
431: EPSSliceGetInertia(eps,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL);
433: if (!rank && sr->inertia0==-1) {
434: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
435: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
436: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
437: }
438: if (sr->hasEnd) {
439: sr->dir = -sr->dir; r = sr->int0; sr->int0 = sr->int1; sr->int1 = r;
440: i = sr->inertia0; sr->inertia0 = sr->inertia1; sr->inertia1 = i;
441: }
442: }
444: /* number of eigenvalues in interval */
445: sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
446: if (ctx->npart>1) {
447: /* memory allocate for subinterval eigenpairs */
448: EPSSliceAllocateSolution(eps,1);
449: }
450: dssz = eps->ncv+1;
451: DSGetParallel(ctx->eps->ds,&ptype);
452: DSSetParallel(eps->ds,ptype);
453: DSGetMethod(ctx->eps->ds,&method);
454: DSSetMethod(eps->ds,method);
455: }
456: DSSetType(eps->ds,DSHEP);
457: DSSetCompact(eps->ds,PETSC_TRUE);
458: DSAllocate(eps->ds,dssz);
459: /* keep state of subcomm matrices to check that the user does not modify them */
460: EPSGetOperators(eps,&A,&B);
461: PetscObjectStateGet((PetscObject)A,&ctx->Astate);
462: PetscObjectGetId((PetscObject)A,&ctx->Aid);
463: if (B) {
464: PetscObjectStateGet((PetscObject)B,&ctx->Bstate);
465: PetscObjectGetId((PetscObject)B,&ctx->Bid);
466: } else {
467: ctx->Bstate=0;
468: ctx->Bid=0;
469: }
470: return 0;
471: }
473: static PetscErrorCode EPSSliceGatherEigenVectors(EPS eps)
474: {
475: Vec v,vg,v_loc;
476: IS is1,is2;
477: VecScatter vec_sc;
478: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
479: PetscInt nloc,m0,n0,i,si,idx,*idx1,*idx2,j;
480: PetscScalar *array;
481: EPS_SR sr_loc;
482: BV V_loc;
484: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
485: V_loc = sr_loc->V;
487: /* Gather parallel eigenvectors */
488: BVGetColumn(eps->V,0,&v);
489: VecGetOwnershipRange(v,&n0,&m0);
490: BVRestoreColumn(eps->V,0,&v);
491: BVGetColumn(ctx->eps->V,0,&v);
492: VecGetLocalSize(v,&nloc);
493: BVRestoreColumn(ctx->eps->V,0,&v);
494: PetscMalloc2(m0-n0,&idx1,m0-n0,&idx2);
495: VecCreateMPI(PetscObjectComm((PetscObject)eps),nloc,PETSC_DECIDE,&vg);
496: idx = -1;
497: for (si=0;si<ctx->npart;si++) {
498: j = 0;
499: for (i=n0;i<m0;i++) {
500: idx1[j] = i;
501: idx2[j++] = i+eps->n*si;
502: }
503: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx1,PETSC_COPY_VALUES,&is1);
504: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx2,PETSC_COPY_VALUES,&is2);
505: BVGetColumn(eps->V,0,&v);
506: VecScatterCreate(v,is1,vg,is2,&vec_sc);
507: BVRestoreColumn(eps->V,0,&v);
508: ISDestroy(&is1);
509: ISDestroy(&is2);
510: for (i=0;i<ctx->nconv_loc[si];i++) {
511: BVGetColumn(eps->V,++idx,&v);
512: if (ctx->subc->color==si) {
513: BVGetColumn(V_loc,i,&v_loc);
514: VecGetArray(v_loc,&array);
515: VecPlaceArray(vg,array);
516: }
517: VecScatterBegin(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
518: VecScatterEnd(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
519: if (ctx->subc->color==si) {
520: VecResetArray(vg);
521: VecRestoreArray(v_loc,&array);
522: BVRestoreColumn(V_loc,i,&v_loc);
523: }
524: BVRestoreColumn(eps->V,idx,&v);
525: }
526: VecScatterDestroy(&vec_sc);
527: }
528: PetscFree2(idx1,idx2);
529: VecDestroy(&vg);
530: return 0;
531: }
533: /*
534: EPSComputeVectors_Slice - Recover Eigenvectors from subcomunicators
535: */
536: PetscErrorCode EPSComputeVectors_Slice(EPS eps)
537: {
538: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
540: if (ctx->global && ctx->npart>1) {
541: EPSComputeVectors(ctx->eps);
542: EPSSliceGatherEigenVectors(eps);
543: }
544: return 0;
545: }
547: #define SWAP(a,b,t) {t=a;a=b;b=t;}
549: static PetscErrorCode EPSSliceGetInertias(EPS eps,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
550: {
551: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
552: PetscInt i=0,j,tmpi;
553: PetscReal v,tmpr;
554: EPS_shift s;
558: if (!ctx->sr->s0) { /* EPSSolve not called yet */
559: *n = 2;
560: } else {
561: *n = 1;
562: s = ctx->sr->s0;
563: while (s) {
564: (*n)++;
565: s = s->neighb[1];
566: }
567: }
568: PetscMalloc1(*n,shifts);
569: PetscMalloc1(*n,inertias);
570: if (!ctx->sr->s0) { /* EPSSolve not called yet */
571: (*shifts)[0] = ctx->sr->int0;
572: (*shifts)[1] = ctx->sr->int1;
573: (*inertias)[0] = ctx->sr->inertia0;
574: (*inertias)[1] = ctx->sr->inertia1;
575: } else {
576: s = ctx->sr->s0;
577: while (s) {
578: (*shifts)[i] = s->value;
579: (*inertias)[i++] = s->inertia;
580: s = s->neighb[1];
581: }
582: (*shifts)[i] = ctx->sr->int1;
583: (*inertias)[i] = ctx->sr->inertia1;
584: }
585: /* remove possible duplicate in last position */
586: if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
587: /* sort result */
588: for (i=0;i<*n;i++) {
589: v = (*shifts)[i];
590: for (j=i+1;j<*n;j++) {
591: if (v > (*shifts)[j]) {
592: SWAP((*shifts)[i],(*shifts)[j],tmpr);
593: SWAP((*inertias)[i],(*inertias)[j],tmpi);
594: v = (*shifts)[i];
595: }
596: }
597: }
598: return 0;
599: }
601: static PetscErrorCode EPSSliceGatherSolution(EPS eps)
602: {
603: PetscMPIInt rank,nproc,*disp,*ns_loc,aux;
604: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
605: PetscInt i,idx,j,*perm_loc,off=0,*inertias_loc,ns;
606: PetscScalar *eigr_loc;
607: EPS_SR sr_loc;
608: PetscReal *shifts_loc;
609: MPI_Comm child;
611: eps->nconv = 0;
612: for (i=0;i<ctx->npart;i++) eps->nconv += ctx->nconv_loc[i];
613: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
615: /* Gather the shifts used and the inertias computed */
616: EPSSliceGetInertias(ctx->eps,&ns,&shifts_loc,&inertias_loc);
617: if (ctx->sr->dir>0 && shifts_loc[ns-1]==sr_loc->int1 && ctx->subc->color<ctx->npart-1) ns--;
618: if (ctx->sr->dir<0 && shifts_loc[ns-1]==sr_loc->int0 && ctx->subc->color>0) {
619: ns--;
620: for (i=0;i<ns;i++) {
621: inertias_loc[i] = inertias_loc[i+1];
622: shifts_loc[i] = shifts_loc[i+1];
623: }
624: }
625: PetscMalloc1(ctx->npart,&ns_loc);
626: PetscSubcommGetChild(ctx->subc,&child);
627: MPI_Comm_rank(child,&rank);
628: PetscMPIIntCast(ns,&aux);
629: if (!rank) MPI_Allgather(&aux,1,MPI_INT,ns_loc,1,MPI_INT,ctx->commrank);
630: PetscMPIIntCast(ctx->npart,&aux);
631: MPI_Bcast(ns_loc,aux,MPI_INT,0,child);
632: ctx->nshifts = 0;
633: for (i=0;i<ctx->npart;i++) ctx->nshifts += ns_loc[i];
634: PetscFree(ctx->inertias);
635: PetscFree(ctx->shifts);
636: PetscMalloc1(ctx->nshifts,&ctx->inertias);
637: PetscMalloc1(ctx->nshifts,&ctx->shifts);
639: /* Gather eigenvalues (same ranks have fully set of eigenvalues)*/
640: eigr_loc = sr_loc->eigr;
641: perm_loc = sr_loc->perm;
642: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
643: PetscMalloc1(ctx->npart,&disp);
644: disp[0] = 0;
645: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ctx->nconv_loc[i-1];
646: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
647: PetscMPIIntCast(sr_loc->numEigs,&aux);
648: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
649: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
650: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
651: PetscMPIIntCast(ns,&aux);
652: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
653: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
654: MPIU_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
655: } else { /* subcommunicators with different size */
656: if (!rank) {
657: PetscMPIIntCast(sr_loc->numEigs,&aux);
658: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
659: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
660: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
661: PetscMPIIntCast(ns,&aux);
662: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
663: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
664: MPIU_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
665: }
666: PetscMPIIntCast(eps->nconv,&aux);
667: MPI_Bcast(eps->eigr,aux,MPIU_SCALAR,0,child);
668: MPI_Bcast(eps->perm,aux,MPIU_INT,0,child);
669: MPI_Bcast(ctx->shifts,ctx->nshifts,MPIU_REAL,0,child);
670: PetscMPIIntCast(ctx->nshifts,&aux);
671: MPI_Bcast(ctx->inertias,aux,MPIU_INT,0,child);
672: MPI_Bcast(&eps->its,1,MPIU_INT,0,child);
673: }
674: /* Update global array eps->perm */
675: idx = ctx->nconv_loc[0];
676: for (i=1;i<ctx->npart;i++) {
677: off += ctx->nconv_loc[i-1];
678: for (j=0;j<ctx->nconv_loc[i];j++) eps->perm[idx++] += off;
679: }
681: /* Gather parallel eigenvectors */
682: PetscFree(ns_loc);
683: PetscFree(disp);
684: PetscFree(shifts_loc);
685: PetscFree(inertias_loc);
686: return 0;
687: }
689: /*
690: Fills the fields of a shift structure
691: */
692: static PetscErrorCode EPSCreateShift(EPS eps,PetscReal val,EPS_shift neighb0,EPS_shift neighb1)
693: {
694: EPS_shift s,*pending2;
695: PetscInt i;
696: EPS_SR sr;
697: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
699: sr = ctx->sr;
700: if ((neighb0 && val==neighb0->value) || (neighb1 && val==neighb1->value)) {
701: sr->nPend++;
702: return 0;
703: }
704: PetscNew(&s);
705: s->value = val;
706: s->neighb[0] = neighb0;
707: if (neighb0) neighb0->neighb[1] = s;
708: s->neighb[1] = neighb1;
709: if (neighb1) neighb1->neighb[0] = s;
710: s->comp[0] = PETSC_FALSE;
711: s->comp[1] = PETSC_FALSE;
712: s->index = -1;
713: s->neigs = 0;
714: s->nconv[0] = s->nconv[1] = 0;
715: s->nsch[0] = s->nsch[1]=0;
716: /* Inserts in the stack of pending shifts */
717: /* If needed, the array is resized */
718: if (sr->nPend >= sr->maxPend) {
719: sr->maxPend *= 2;
720: PetscMalloc1(sr->maxPend,&pending2);
721: for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
722: PetscFree(sr->pending);
723: sr->pending = pending2;
724: }
725: sr->pending[sr->nPend++]=s;
726: return 0;
727: }
729: /* Prepare for Rational Krylov update */
730: static PetscErrorCode EPSPrepareRational(EPS eps)
731: {
732: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
733: PetscInt dir,i,k,ld,nv;
734: PetscScalar *A;
735: EPS_SR sr = ctx->sr;
736: Vec v;
738: DSGetLeadingDimension(eps->ds,&ld);
739: dir = (sr->sPres->neighb[0] == sr->sPrev)?1:-1;
740: dir*=sr->dir;
741: k = 0;
742: for (i=0;i<sr->nS;i++) {
743: if (dir*PetscRealPart(sr->S[i])>0.0) {
744: sr->S[k] = sr->S[i];
745: sr->S[sr->nS+k] = sr->S[sr->nS+i];
746: BVGetColumn(sr->Vnext,k,&v);
747: BVCopyVec(eps->V,eps->nconv+i,v);
748: BVRestoreColumn(sr->Vnext,k,&v);
749: k++;
750: if (k>=sr->nS/2)break;
751: }
752: }
753: /* Copy to DS */
754: DSGetArray(eps->ds,DS_MAT_A,&A);
755: PetscArrayzero(A,ld*ld);
756: for (i=0;i<k;i++) {
757: A[i*(1+ld)] = sr->S[i];
758: A[k+i*ld] = sr->S[sr->nS+i];
759: }
760: sr->nS = k;
761: DSRestoreArray(eps->ds,DS_MAT_A,&A);
762: DSGetDimensions(eps->ds,&nv,NULL,NULL,NULL);
763: DSSetDimensions(eps->ds,nv,0,k);
764: /* Append u to V */
765: BVGetColumn(sr->Vnext,sr->nS,&v);
766: BVCopyVec(eps->V,sr->nv,v);
767: BVRestoreColumn(sr->Vnext,sr->nS,&v);
768: return 0;
769: }
771: /* Provides next shift to be computed */
772: static PetscErrorCode EPSExtractShift(EPS eps)
773: {
774: PetscInt iner,zeros=0;
775: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
776: EPS_SR sr;
777: PetscReal newShift,diam,ptol;
778: EPS_shift sPres;
780: sr = ctx->sr;
781: if (sr->nPend > 0) {
782: if (sr->sPres==sr->pending[sr->nPend-1]) {
783: eps->reason = EPS_CONVERGED_ITERATING;
784: eps->its = 0;
785: sr->nPend--;
786: sr->sPres->rep = PETSC_TRUE;
787: return 0;
788: }
789: sr->sPrev = sr->sPres;
790: sr->sPres = sr->pending[--sr->nPend];
791: sPres = sr->sPres;
792: EPSSliceGetInertia(eps,sPres->value,&iner,ctx->detect?&zeros:NULL);
793: if (zeros) {
794: diam = PetscMin(PetscAbsReal(sPres->neighb[0]->value-sPres->value),PetscAbsReal(sPres->neighb[1]->value-sPres->value));
795: ptol = PetscMin(SLICE_PTOL,diam/2);
796: newShift = sPres->value*(1.0+ptol);
797: if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
798: else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
799: EPSSliceGetInertia(eps,newShift,&iner,&zeros);
801: sPres->value = newShift;
802: }
803: sr->sPres->inertia = iner;
804: eps->target = sr->sPres->value;
805: eps->reason = EPS_CONVERGED_ITERATING;
806: eps->its = 0;
807: } else sr->sPres = NULL;
808: return 0;
809: }
811: /*
812: Symmetric KrylovSchur adapted to spectrum slicing:
813: Allows searching an specific amount of eigenvalues in the subintervals left and right.
814: Returns whether the search has succeeded
815: */
816: static PetscErrorCode EPSKrylovSchur_Slice(EPS eps)
817: {
818: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
819: PetscInt i,k,l,ld,nv,*iwork,j,count0,count1,iterCompl=0,n0,n1;
820: Mat U,Op,T;
821: PetscScalar *Q,*A;
822: PetscReal *a,*b,beta,lambda;
823: EPS_shift sPres;
824: PetscBool breakdown,complIterating,sch0,sch1;
825: EPS_SR sr = ctx->sr;
827: /* Spectrum slicing data */
828: sPres = sr->sPres;
829: complIterating =PETSC_FALSE;
830: sch1 = sch0 = PETSC_TRUE;
831: DSGetLeadingDimension(eps->ds,&ld);
832: PetscMalloc1(2*ld,&iwork);
833: count0=0;count1=0; /* Found on both sides */
834: if (!sPres->rep && sr->nS > 0 && (sPres->neighb[0] == sr->sPrev || sPres->neighb[1] == sr->sPrev)) {
835: /* Rational Krylov */
836: DSTranslateRKS(eps->ds,sr->sPrev->value-sPres->value);
837: DSGetDimensions(eps->ds,NULL,NULL,&l,NULL);
838: DSSetDimensions(eps->ds,l+1,0,0);
839: BVSetActiveColumns(eps->V,0,l+1);
840: DSGetMat(eps->ds,DS_MAT_Q,&U);
841: BVMultInPlace(eps->V,U,0,l+1);
842: DSRestoreMat(eps->ds,DS_MAT_Q,&U);
843: } else {
844: /* Get the starting Lanczos vector */
845: EPSGetStartVector(eps,0,NULL);
846: l = 0;
847: }
848: /* Restart loop */
849: while (eps->reason == EPS_CONVERGED_ITERATING) {
850: eps->its++; sr->itsKs++;
851: /* Compute an nv-step Lanczos factorization */
852: nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
853: DSSetDimensions(eps->ds,nv,eps->nconv,eps->nconv+l);
854: DSGetMat(eps->ds,DS_MAT_T,&T);
855: STGetOperator(eps->st,&Op);
856: BVMatLanczos(eps->V,Op,T,eps->nconv+l,&nv,&beta,&breakdown);
857: STRestoreOperator(eps->st,&Op);
858: sr->nv = nv;
859: DSRestoreMat(eps->ds,DS_MAT_T,&T);
860: DSSetDimensions(eps->ds,nv,eps->nconv,eps->nconv+l);
861: if (l==0) DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
862: else DSSetState(eps->ds,DS_STATE_RAW);
863: BVSetActiveColumns(eps->V,eps->nconv,nv);
865: /* Solve projected problem and compute residual norm estimates */
866: if (eps->its == 1 && l > 0) {/* After rational update */
867: DSGetArray(eps->ds,DS_MAT_A,&A);
868: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
869: b = a + ld;
870: k = eps->nconv+l;
871: A[k*ld+k-1] = A[(k-1)*ld+k];
872: A[k*ld+k] = a[k];
873: for (j=k+1; j< nv; j++) {
874: A[j*ld+j] = a[j];
875: A[j*ld+j-1] = b[j-1] ;
876: A[(j-1)*ld+j] = b[j-1];
877: }
878: DSRestoreArray(eps->ds,DS_MAT_A,&A);
879: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
880: DSSolve(eps->ds,eps->eigr,NULL);
881: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
882: DSSetCompact(eps->ds,PETSC_TRUE);
883: } else { /* Restart */
884: DSSolve(eps->ds,eps->eigr,NULL);
885: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
886: }
887: DSSynchronize(eps->ds,eps->eigr,NULL);
889: /* Residual */
890: EPSKrylovConvergence(eps,PETSC_TRUE,eps->nconv,nv-eps->nconv,beta,0.0,1.0,&k);
891: /* Checking values obtained for completing */
892: for (i=0;i<k;i++) {
893: sr->back[i]=eps->eigr[i];
894: }
895: STBackTransform(eps->st,k,sr->back,eps->eigi);
896: count0=count1=0;
897: for (i=0;i<k;i++) {
898: lambda = PetscRealPart(sr->back[i]);
899: if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) count0++;
900: if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) count1++;
901: }
902: if (k>eps->nev && eps->ncv-k<5) eps->reason = EPS_CONVERGED_TOL;
903: else {
904: /* Checks completion */
905: if ((!sch0||count0 >= sPres->nsch[0]) && (!sch1 ||count1 >= sPres->nsch[1])) {
906: eps->reason = EPS_CONVERGED_TOL;
907: } else {
908: if (!complIterating && eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
909: if (complIterating) {
910: if (--iterCompl <= 0) eps->reason = EPS_DIVERGED_ITS;
911: } else if (k >= eps->nev) {
912: n0 = sPres->nsch[0]-count0;
913: n1 = sPres->nsch[1]-count1;
914: if (sr->iterCompl>0 && ((n0>0 && n0<= sr->nMAXCompl)||(n1>0&&n1<=sr->nMAXCompl))) {
915: /* Iterating for completion*/
916: complIterating = PETSC_TRUE;
917: if (n0 >sr->nMAXCompl)sch0 = PETSC_FALSE;
918: if (n1 >sr->nMAXCompl)sch1 = PETSC_FALSE;
919: iterCompl = sr->iterCompl;
920: } else eps->reason = EPS_CONVERGED_TOL;
921: }
922: }
923: }
924: /* Update l */
925: if (eps->reason == EPS_CONVERGED_ITERATING) l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
926: else l = nv-k;
927: if (breakdown) l=0;
928: if (!ctx->lock && l>0 && eps->reason == EPS_CONVERGED_ITERATING) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */
930: if (eps->reason == EPS_CONVERGED_ITERATING) {
931: if (breakdown) {
932: /* Start a new Lanczos factorization */
933: PetscInfo(eps,"Breakdown in Krylov-Schur method (it=%" PetscInt_FMT " norm=%g)\n",eps->its,(double)beta);
934: EPSGetStartVector(eps,k,&breakdown);
935: if (breakdown) {
936: eps->reason = EPS_DIVERGED_BREAKDOWN;
937: PetscInfo(eps,"Unable to generate more start vectors\n");
938: }
939: } else {
940: /* Prepare the Rayleigh quotient for restart */
941: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
942: DSGetArray(eps->ds,DS_MAT_Q,&Q);
943: b = a + ld;
944: for (i=k;i<k+l;i++) {
945: a[i] = PetscRealPart(eps->eigr[i]);
946: b[i] = PetscRealPart(Q[nv-1+i*ld]*beta);
947: }
948: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
949: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
950: }
951: }
952: /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
953: DSGetMat(eps->ds,DS_MAT_Q,&U);
954: BVMultInPlace(eps->V,U,eps->nconv,k+l);
955: DSRestoreMat(eps->ds,DS_MAT_Q,&U);
957: /* Normalize u and append it to V */
958: if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) BVCopyColumn(eps->V,nv,k+l);
959: eps->nconv = k;
960: if (eps->reason != EPS_CONVERGED_ITERATING) {
961: /* Store approximated values for next shift */
962: DSGetArray(eps->ds,DS_MAT_Q,&Q);
963: sr->nS = l;
964: for (i=0;i<l;i++) {
965: sr->S[i] = eps->eigr[i+k];/* Diagonal elements */
966: sr->S[i+l] = Q[nv-1+(i+k)*ld]*beta; /* Out of diagonal elements */
967: }
968: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
969: }
970: }
971: /* Check for completion */
972: for (i=0;i< eps->nconv; i++) {
973: if ((sr->dir)*PetscRealPart(eps->eigr[i])>0) sPres->nconv[1]++;
974: else sPres->nconv[0]++;
975: }
976: sPres->comp[0] = PetscNot(count0 < sPres->nsch[0]);
977: sPres->comp[1] = PetscNot(count1 < sPres->nsch[1]);
978: PetscInfo(eps,"Lanczos: %" PetscInt_FMT " evals in [%g,%g]%s and %" PetscInt_FMT " evals in [%g,%g]%s\n",count0,(double)(sr->dir==1?sPres->ext[0]:sPres->value),(double)(sr->dir==1?sPres->value:sPres->ext[0]),sPres->comp[0]?"*":"",count1,(double)(sr->dir==1?sPres->value:sPres->ext[1]),(double)(sr->dir==1?sPres->ext[1]:sPres->value),sPres->comp[1]?"*":"");
980: PetscFree(iwork);
981: return 0;
982: }
984: /*
985: Obtains value of subsequent shift
986: */
987: static PetscErrorCode EPSGetNewShiftValue(EPS eps,PetscInt side,PetscReal *newS)
988: {
989: PetscReal lambda,d_prev;
990: PetscInt i,idxP;
991: EPS_SR sr;
992: EPS_shift sPres,s;
993: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
995: sr = ctx->sr;
996: sPres = sr->sPres;
997: if (sPres->neighb[side]) {
998: /* Completing a previous interval */
999: *newS = (sPres->value + sPres->neighb[side]->value)/2;
1000: if (PetscAbsReal(sPres->value - *newS)/PetscAbsReal(sPres->value)<=100*PETSC_SQRT_MACHINE_EPSILON) *newS = sPres->value;
1001: } else { /* (Only for side=1). Creating a new interval. */
1002: if (sPres->neigs==0) {/* No value has been accepted*/
1003: if (sPres->neighb[0]) {
1004: /* Multiplying by 10 the previous distance */
1005: *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
1006: sr->nleap++;
1007: /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
1009: } else { /* First shift */
1011: /* Unaccepted values give information for next shift */
1012: idxP=0;/* Number of values left from shift */
1013: for (i=0;i<eps->nconv;i++) {
1014: lambda = PetscRealPart(eps->eigr[i]);
1015: if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
1016: else break;
1017: }
1018: /* Avoiding subtraction of eigenvalues (might be the same).*/
1019: if (idxP>0) {
1020: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[0]))/(idxP+0.3);
1021: } else {
1022: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[eps->nconv-1]))/(eps->nconv+0.3);
1023: }
1024: *newS = sPres->value + ((sr->dir)*d_prev*eps->nev)/2;
1025: }
1026: } else { /* Accepted values found */
1027: sr->nleap = 0;
1028: /* Average distance of values in previous subinterval */
1029: s = sPres->neighb[0];
1030: while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
1031: s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
1032: }
1033: if (s) {
1034: d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
1035: } else { /* First shift. Average distance obtained with values in this shift */
1036: /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
1037: if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(eps->tol)) {
1038: d_prev = PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
1039: } else {
1040: d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
1041: }
1042: }
1043: /* Average distance is used for next shift by adding it to value on the right or to shift */
1044: if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
1045: *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(eps->nev))/2;
1046: } else { /* Last accepted value is on the left of shift. Adding to shift */
1047: *newS = sPres->value + ((sr->dir)*d_prev*(eps->nev))/2;
1048: }
1049: }
1050: /* End of interval can not be surpassed */
1051: if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
1052: }/* of neighb[side]==null */
1053: return 0;
1054: }
1056: /*
1057: Function for sorting an array of real values
1058: */
1059: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
1060: {
1061: PetscReal re;
1062: PetscInt i,j,tmp;
1064: if (!prev) for (i=0;i<nr;i++) perm[i] = i;
1065: /* Insertion sort */
1066: for (i=1;i<nr;i++) {
1067: re = PetscRealPart(r[perm[i]]);
1068: j = i-1;
1069: while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
1070: tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
1071: }
1072: }
1073: return 0;
1074: }
1076: /* Stores the pairs obtained since the last shift in the global arrays */
1077: static PetscErrorCode EPSStoreEigenpairs(EPS eps)
1078: {
1079: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1080: PetscReal lambda,err,norm;
1081: PetscInt i,count;
1082: PetscBool iscayley;
1083: EPS_SR sr = ctx->sr;
1084: EPS_shift sPres;
1085: Vec v,w;
1087: sPres = sr->sPres;
1088: sPres->index = sr->indexEig;
1089: count = sr->indexEig;
1090: /* Back-transform */
1091: STBackTransform(eps->st,eps->nconv,eps->eigr,eps->eigi);
1092: PetscObjectTypeCompare((PetscObject)eps->st,STCAYLEY,&iscayley);
1093: /* Sort eigenvalues */
1094: sortRealEigenvalues(eps->eigr,eps->perm,eps->nconv,PETSC_FALSE,sr->dir);
1095: /* Values stored in global array */
1096: for (i=0;i<eps->nconv;i++) {
1097: lambda = PetscRealPart(eps->eigr[eps->perm[i]]);
1098: err = eps->errest[eps->perm[i]];
1100: if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
1102: sr->eigr[count] = lambda;
1103: sr->errest[count] = err;
1104: /* Explicit purification */
1105: BVGetColumn(eps->V,eps->perm[i],&w);
1106: if (eps->purify) {
1107: BVGetColumn(sr->V,count,&v);
1108: STApply(eps->st,w,v);
1109: BVRestoreColumn(sr->V,count,&v);
1110: } else BVInsertVec(sr->V,count,w);
1111: BVRestoreColumn(eps->V,eps->perm[i],&w);
1112: BVNormColumn(sr->V,count,NORM_2,&norm);
1113: BVScaleColumn(sr->V,count,1.0/norm);
1114: count++;
1115: }
1116: }
1117: sPres->neigs = count - sr->indexEig;
1118: sr->indexEig = count;
1119: /* Global ordering array updating */
1120: sortRealEigenvalues(sr->eigr,sr->perm,count,PETSC_TRUE,sr->dir);
1121: return 0;
1122: }
1124: static PetscErrorCode EPSLookForDeflation(EPS eps)
1125: {
1126: PetscReal val;
1127: PetscInt i,count0=0,count1=0;
1128: EPS_shift sPres;
1129: PetscInt ini,fin,k,idx0,idx1;
1130: EPS_SR sr;
1131: Vec v;
1132: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1134: sr = ctx->sr;
1135: sPres = sr->sPres;
1137: if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
1138: else ini = 0;
1139: fin = sr->indexEig;
1140: /* Selection of ends for searching new values */
1141: if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
1142: else sPres->ext[0] = sPres->neighb[0]->value;
1143: if (!sPres->neighb[1]) {
1144: if (sr->hasEnd) sPres->ext[1] = sr->int1;
1145: else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
1146: } else sPres->ext[1] = sPres->neighb[1]->value;
1147: /* Selection of values between right and left ends */
1148: for (i=ini;i<fin;i++) {
1149: val=PetscRealPart(sr->eigr[sr->perm[i]]);
1150: /* Values to the right of left shift */
1151: if ((sr->dir)*(val - sPres->ext[1]) < 0) {
1152: if ((sr->dir)*(val - sPres->value) < 0) count0++;
1153: else count1++;
1154: } else break;
1155: }
1156: /* The number of values on each side are found */
1157: if (sPres->neighb[0]) {
1158: sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
1160: } else sPres->nsch[0] = 0;
1162: if (sPres->neighb[1]) {
1163: sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
1165: } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);
1167: /* Completing vector of indexes for deflation */
1168: idx0 = ini;
1169: idx1 = ini+count0+count1;
1170: k=0;
1171: for (i=idx0;i<idx1;i++) sr->idxDef[k++]=sr->perm[i];
1172: BVDuplicateResize(eps->V,k+eps->ncv+1,&sr->Vnext);
1173: BVSetNumConstraints(sr->Vnext,k);
1174: for (i=0;i<k;i++) {
1175: BVGetColumn(sr->Vnext,-i-1,&v);
1176: BVCopyVec(sr->V,sr->idxDef[i],v);
1177: BVRestoreColumn(sr->Vnext,-i-1,&v);
1178: }
1180: /* For rational Krylov */
1181: if (sr->nS>0 && (sr->sPrev == sr->sPres->neighb[0] || sr->sPrev == sr->sPres->neighb[1])) EPSPrepareRational(eps);
1182: eps->nconv = 0;
1183: /* Get rid of temporary Vnext */
1184: BVDestroy(&eps->V);
1185: eps->V = sr->Vnext;
1186: sr->Vnext = NULL;
1187: return 0;
1188: }
1190: PetscErrorCode EPSSolve_KrylovSchur_Slice(EPS eps)
1191: {
1192: PetscInt i,lds,ti;
1193: PetscReal newS;
1194: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1195: EPS_SR sr=ctx->sr;
1196: Mat A,B=NULL;
1197: PetscObjectState Astate,Bstate=0;
1198: PetscObjectId Aid,Bid=0;
1200: PetscCitationsRegister(citation,&cited);
1201: if (ctx->global) {
1202: EPSSolve_KrylovSchur_Slice(ctx->eps);
1203: ctx->eps->state = EPS_STATE_SOLVED;
1204: eps->reason = EPS_CONVERGED_TOL;
1205: if (ctx->npart>1) {
1206: /* Gather solution from subsolvers */
1207: EPSSliceGatherSolution(eps);
1208: } else {
1209: eps->nconv = sr->numEigs;
1210: eps->its = ctx->eps->its;
1211: PetscFree(ctx->inertias);
1212: PetscFree(ctx->shifts);
1213: EPSSliceGetInertias(ctx->eps,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1214: }
1215: } else {
1216: if (ctx->npart==1) {
1217: sr->eigr = ctx->eps->eigr;
1218: sr->eigi = ctx->eps->eigi;
1219: sr->perm = ctx->eps->perm;
1220: sr->errest = ctx->eps->errest;
1221: sr->V = ctx->eps->V;
1222: }
1223: /* Check that the user did not modify subcomm matrices */
1224: EPSGetOperators(eps,&A,&B);
1225: PetscObjectStateGet((PetscObject)A,&Astate);
1226: PetscObjectGetId((PetscObject)A,&Aid);
1227: if (B) {
1228: PetscObjectStateGet((PetscObject)B,&Bstate);
1229: PetscObjectGetId((PetscObject)B,&Bid);
1230: }
1232: /* Only with eigenvalues present in the interval ...*/
1233: if (sr->numEigs==0) {
1234: eps->reason = EPS_CONVERGED_TOL;
1235: return 0;
1236: }
1237: /* Array of pending shifts */
1238: sr->maxPend = 100; /* Initial size */
1239: sr->nPend = 0;
1240: PetscMalloc1(sr->maxPend,&sr->pending);
1241: EPSCreateShift(eps,sr->int0,NULL,NULL);
1242: /* extract first shift */
1243: sr->sPrev = NULL;
1244: sr->sPres = sr->pending[--sr->nPend];
1245: sr->sPres->inertia = sr->inertia0;
1246: eps->target = sr->sPres->value;
1247: sr->s0 = sr->sPres;
1248: sr->indexEig = 0;
1249: /* Memory reservation for auxiliary variables */
1250: lds = PetscMin(eps->mpd,eps->ncv);
1251: PetscCalloc1(lds*lds,&sr->S);
1252: PetscMalloc1(eps->ncv,&sr->back);
1253: for (i=0;i<sr->numEigs;i++) {
1254: sr->eigr[i] = 0.0;
1255: sr->eigi[i] = 0.0;
1256: sr->errest[i] = 0.0;
1257: sr->perm[i] = i;
1258: }
1259: /* Vectors for deflation */
1260: PetscMalloc1(sr->numEigs,&sr->idxDef);
1261: sr->indexEig = 0;
1262: /* Main loop */
1263: while (sr->sPres) {
1264: /* Search for deflation */
1265: EPSLookForDeflation(eps);
1266: /* KrylovSchur */
1267: EPSKrylovSchur_Slice(eps);
1269: EPSStoreEigenpairs(eps);
1270: /* Select new shift */
1271: if (!sr->sPres->comp[1]) {
1272: EPSGetNewShiftValue(eps,1,&newS);
1273: EPSCreateShift(eps,newS,sr->sPres,sr->sPres->neighb[1]);
1274: }
1275: if (!sr->sPres->comp[0]) {
1276: /* Completing earlier interval */
1277: EPSGetNewShiftValue(eps,0,&newS);
1278: EPSCreateShift(eps,newS,sr->sPres->neighb[0],sr->sPres);
1279: }
1280: /* Preparing for a new search of values */
1281: EPSExtractShift(eps);
1282: }
1284: /* Updating eps values prior to exit */
1285: PetscFree(sr->S);
1286: PetscFree(sr->idxDef);
1287: PetscFree(sr->pending);
1288: PetscFree(sr->back);
1289: BVDuplicateResize(eps->V,eps->ncv+1,&sr->Vnext);
1290: BVSetNumConstraints(sr->Vnext,0);
1291: BVDestroy(&eps->V);
1292: eps->V = sr->Vnext;
1293: eps->nconv = sr->indexEig;
1294: eps->reason = EPS_CONVERGED_TOL;
1295: eps->its = sr->itsKs;
1296: eps->nds = 0;
1297: if (sr->dir<0) {
1298: for (i=0;i<eps->nconv/2;i++) {
1299: ti = sr->perm[i]; sr->perm[i] = sr->perm[eps->nconv-1-i]; sr->perm[eps->nconv-1-i] = ti;
1300: }
1301: }
1302: }
1303: return 0;
1304: }