Actual source code: fnexp.c
 
   slepc-3.7.3 2016-09-29
   
  1: /*
  2:    Exponential function  exp(x)
  4:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  5:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  6:    Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
  8:    This file is part of SLEPc.
 10:    SLEPc is free software: you can redistribute it and/or modify it under  the
 11:    terms of version 3 of the GNU Lesser General Public License as published by
 12:    the Free Software Foundation.
 14:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 15:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 16:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 17:    more details.
 19:    You  should have received a copy of the GNU Lesser General  Public  License
 20:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 21:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 22: */
 24: #include <slepc/private/fnimpl.h>      /*I "slepcfn.h" I*/
 25: #include <slepcblaslapack.h>
 29: PetscErrorCode FNEvaluateFunction_Exp(FN fn,PetscScalar x,PetscScalar *y)
 30: {
 32:   *y = PetscExpScalar(x);
 33:   return(0);
 34: }
 38: PetscErrorCode FNEvaluateDerivative_Exp(FN fn,PetscScalar x,PetscScalar *y)
 39: {
 41:   *y = PetscExpScalar(x);
 42:   return(0);
 43: }
 45: #define MAX_PADE 6
 46: #define SWAP(a,b,t) {t=a;a=b;b=t;}
 50: PetscErrorCode FNEvaluateFunctionMat_Exp(FN fn,Mat A,Mat B)
 51: {
 52: #if defined(PETSC_MISSING_LAPACK_GESV) || defined(SLEPC_MISSING_LAPACK_LANGE)
 54:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV/LANGE - Lapack routines are unavailable");
 55: #else
 57:   PetscBLASInt   n,ld,ld2,*ipiv,info,inc=1;
 58:   PetscInt       m,j,k,sexp;
 59:   PetscBool      odd;
 60:   const PetscInt p=MAX_PADE;
 61:   PetscReal      c[MAX_PADE+1],s,*rwork;
 62:   PetscScalar    scale,mone=-1.0,one=1.0,two=2.0,zero=0.0;
 63:   PetscScalar    *Aa,*Ba,*As,*A2,*Q,*P,*W,*aux;
 66:   MatDenseGetArray(A,&Aa);
 67:   MatDenseGetArray(B,&Ba);
 68:   MatGetSize(A,&m,NULL);
 69:   PetscBLASIntCast(m,&n);
 70:   ld  = n;
 71:   ld2 = ld*ld;
 72:   P   = Ba;
 73:   PetscMalloc6(m*m,&Q,m*m,&W,m*m,&As,m*m,&A2,ld,&rwork,ld,&ipiv);
 74:   PetscMemcpy(As,Aa,ld2*sizeof(PetscScalar));
 76:   /* Pade' coefficients */
 77:   c[0] = 1.0;
 78:   for (k=1;k<=p;k++) c[k] = c[k-1]*(p+1-k)/(k*(2*p+1-k));
 80:   /* Scaling */
 81:   s = LAPACKlange_("I",&n,&n,As,&ld,rwork);
 82:   if (s>0.5) {
 83:     sexp = PetscMax(0,(int)(PetscLogReal(s)/PetscLogReal(2.0))+2);
 84:     scale = PetscPowRealInt(2.0,-sexp);
 85:     PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&scale,As,&inc));
 86:   } else sexp = 0;
 88:   /* Horner evaluation */
 89:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,As,&ld,As,&ld,&zero,A2,&ld));
 90:   PetscMemzero(Q,ld2*sizeof(PetscScalar));
 91:   PetscMemzero(P,ld2*sizeof(PetscScalar));
 92:   for (j=0;j<n;j++) {
 93:     Q[j+j*ld] = c[p];
 94:     P[j+j*ld] = c[p-1];
 95:   }
 97:   odd = PETSC_TRUE;
 98:   for (k=p-1;k>0;k--) {
 99:     if (odd) {
100:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A2,&ld,&zero,W,&ld));
101:       SWAP(Q,W,aux);
102:       for (j=0;j<n;j++) Q[j+j*ld] += c[k-1];
103:       odd = PETSC_FALSE;
104:     } else {
105:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A2,&ld,&zero,W,&ld));
106:       SWAP(P,W,aux);
107:       for (j=0;j<n;j++) P[j+j*ld] += c[k-1];
108:       odd = PETSC_TRUE;
109:     }
110:   }
111:   if (odd) {
112:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,As,&ld,&zero,W,&ld));
113:     SWAP(Q,W,aux);
114:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
115:     PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
116:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESV %d",info);
117:     PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
118:     for (j=0;j<n;j++) P[j+j*ld] += 1.0;
119:     PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&mone,P,&inc));
120:   } else {
121:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,As,&ld,&zero,W,&ld));
122:     SWAP(P,W,aux);
123:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
124:     PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
125:     if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESV %d",info);
126:     PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
127:     for (j=0;j<n;j++) P[j+j*ld] += 1.0;
128:   }
130:   for (k=1;k<=sexp;k++) {
131:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,P,&ld,&zero,W,&ld));
132:     PetscMemcpy(P,W,ld2*sizeof(PetscScalar));
133:   }
134:   if (P!=Ba) { PetscMemcpy(Ba,P,ld2*sizeof(PetscScalar)); }
136:   PetscFree6(Q,W,As,A2,rwork,ipiv);
137:   MatDenseRestoreArray(A,&Aa);
138:   MatDenseRestoreArray(B,&Ba);
139:   return(0);
140: #endif
141: }
145: PetscErrorCode FNView_Exp(FN fn,PetscViewer viewer)
146: {
148:   PetscBool      isascii;
149:   char           str[50];
152:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
153:   if (isascii) {
154:     if (fn->beta==(PetscScalar)1.0) {
155:       if (fn->alpha==(PetscScalar)1.0) {
156:         PetscViewerASCIIPrintf(viewer,"  Exponential: exp(x)\n");
157:       } else {
158:         SlepcSNPrintfScalar(str,50,fn->alpha,PETSC_TRUE);
159:         PetscViewerASCIIPrintf(viewer,"  Exponential: exp(%s*x)\n",str);
160:       }
161:     } else {
162:       SlepcSNPrintfScalar(str,50,fn->beta,PETSC_TRUE);
163:       if (fn->alpha==(PetscScalar)1.0) {
164:         PetscViewerASCIIPrintf(viewer,"  Exponential: %s*exp(x)\n",str);
165:       } else {
166:         PetscViewerASCIIPrintf(viewer,"  Exponential: %s",str);
167:         PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
168:         SlepcSNPrintfScalar(str,50,fn->alpha,PETSC_TRUE);
169:         PetscViewerASCIIPrintf(viewer,"*exp(%s*x)\n",str);
170:         PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
171:       }
172:     }
173:   }
174:   return(0);
175: }
179: PETSC_EXTERN PetscErrorCode FNCreate_Exp(FN fn)
180: {
182:   fn->ops->evaluatefunction    = FNEvaluateFunction_Exp;
183:   fn->ops->evaluatederivative  = FNEvaluateDerivative_Exp;
184:   fn->ops->evaluatefunctionmat = FNEvaluateFunctionMat_Exp;
185:   fn->ops->view                = FNView_Exp;
186:   return(0);
187: }