/* MIT License Copyright (c) 2021 Parallel Applications Modelling Group - GMAP GMAP website: https://gmap.pucrs.br Pontifical Catholic University of Rio Grande do Sul (PUCRS) Av. Ipiranga, 6681, Porto Alegre - Brazil, 90619-900 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------ The original NPB 3.4.1 version was written in Fortran and belongs to: http://www.nas.nasa.gov/Software/NPB/ Authors of the Fortran code: S. Weeratunga V. Venkatakrishnan E. Barszcz M. Yarrow H. Jin ------------------------------------------------------------------------------ The serial C++ version is a translation of the original NPB 3.4.1 Serial C++ version: https://github.com/GMAP/NPB-CPP/tree/master/NPB-SER Authors of the C++ code: Dalvan Griebler Gabriell Araujo Júnior Löff ------------------------------------------------------------------------------ The OpenMP version is a parallel implementation of the serial C++ version OpenMP version: https://github.com/GMAP/NPB-CPP/tree/master/NPB-OMP Authors of the OpenMP code: Júnior Löff */ #include "../common/npb-CPP.hpp" #include "npbparams.hpp" /* * --------------------------------------------------------------------- * driver for the performance evaluation of the solver for * five coupled parabolic/elliptic partial differential equations * --------------------------------------------------------------------- * parameters which can be overridden in runtime config file * isiz1,isiz2,isiz3 give the maximum size * ipr = 1 to print out verbose information * omega = 2.0 is correct for all classes * tolrsd is tolerance levels for steady state residuals * --------------------------------------------------------------------- * field variables and residuals * to improve cache performance, second two dimensions padded by 1 * for even number sizes only. * note: corresponding array (called "v") in routines blts, buts, * and l2norm are similarly padded * --------------------------------------------------------------------- */ #define IPR_DEFAULT 1 #define OMEGA_DEFAULT 1.2 #define TOLRSD1_DEF 1.0e-08 #define TOLRSD2_DEF 1.0e-08 #define TOLRSD3_DEF 1.0e-08 #define TOLRSD4_DEF 1.0e-08 #define TOLRSD5_DEF 1.0e-08 #define C1 1.40e+00 #define C2 0.40e+00 #define C3 1.00e-01 #define C4 1.00e+00 #define C5 1.40e+00 #define T_TOTAL 1 #define T_RHSX 2 #define T_RHSY 3 #define T_RHSZ 4 #define T_RHS 5 #define T_JACLD 6 #define T_BLTS 7 #define T_JACU 8 #define T_BUTS 9 #define T_ADD 10 #define T_L2NORM 11 #define T_LAST 11 /* global variables */ #if defined(DO_NOT_ALLOCATE_ARRAYS_WITH_DYNAMIC_MEMORY_AND_AS_SINGLE_DIMENSION) static double u[ISIZ3][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5]; static double rsd[ISIZ3][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5]; static double frct[ISIZ3][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5]; static double flux[ISIZ1][5]; static double qs[ISIZ3][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1]; static double rho_i[ISIZ3][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1]; static double a[ISIZ2][ISIZ1 / 2 * 2 + 1][5][5]; static double b[ISIZ2][ISIZ1 / 2 * 2 + 1][5][5]; static double c[ISIZ2][ISIZ1 / 2 * 2 + 1][5][5]; static double d[ISIZ2][ISIZ1 / 2 * 2 + 1][5][5]; static double ce[13][5]; #else static double (*u)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5] = (double (*)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5]) malloc(sizeof(double) * ((ISIZ3) * (ISIZ2 / 2 * 2 + 1) * (ISIZ1 / 2 * 2 + 1) * (5))); static double (*rsd)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5] = (double (*)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5]) malloc(sizeof(double) * ((ISIZ3) * (ISIZ2 / 2 * 2 + 1) * (ISIZ1 / 2 * 2 + 1) * (5))); static double (*frct)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5] = (double (*)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5]) malloc(sizeof(double) * ((ISIZ3) * (ISIZ2 / 2 * 2 + 1) * (ISIZ1 / 2 * 2 + 1) * (5))); static double (*flux)[5] = (double (*)[5]) malloc(sizeof(double) * ((ISIZ1) * (5))); static double (*qs)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1] = (double (*)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1]) malloc(sizeof(double) * ((ISIZ3) * (ISIZ2 / 2 * 2 + 1) * (ISIZ1 / 2 * 2 + 1))); static double (*rho_i)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1] = (double (*)[ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1]) malloc(sizeof(double) * ((ISIZ3) * (ISIZ2 / 2 * 2 + 1) * (ISIZ1 / 2 * 2 + 1))); static double (*a)[ISIZ1 / 2 * 2 + 1][5][5] = (double (*)[ISIZ1 / 2 * 2 + 1][5][5]) malloc(sizeof(double) * ((ISIZ2) * (ISIZ1 / 2 * 2 + 1) * (5) * (5))); static double (*b)[ISIZ1 / 2 * 2 + 1][5][5] = (double (*)[ISIZ1 / 2 * 2 + 1][5][5]) malloc(sizeof(double) * ((ISIZ2) * (ISIZ1 / 2 * 2 + 1) * (5) * (5))); static double (*c)[ISIZ1 / 2 * 2 + 1][5][5] = (double (*)[ISIZ1 / 2 * 2 + 1][5][5]) malloc(sizeof(double) * ((ISIZ2) * (ISIZ1 / 2 * 2 + 1) * (5) * (5))); static double (*d)[ISIZ1 / 2 * 2 + 1][5][5] = (double (*)[ISIZ1 / 2 * 2 + 1][5][5]) malloc(sizeof(double) * ((ISIZ2) * (ISIZ1 / 2 * 2 + 1) * (5) * (5))); static double (*ce)[5] = (double (*)[5]) malloc(sizeof(double) * ((13) * (5))); #endif /* grid */ static double dxi, deta, dzeta; static double tx1, tx2, tx3; static double ty1, ty2, ty3; static double tz1, tz2, tz3; static int nx, ny, nz; static int nx0, ny0, nz0; static int ist, iend; static int jst, jend; static int ii1, ii2; static int ji1, ji2; static int ki1, ki2; /* dissipation */ static double dx1, dx2, dx3, dx4, dx5; static double dy1, dy2, dy3, dy4, dy5; static double dz1, dz2, dz3, dz4, dz5; static double dssp; /* output control parameters */ static int ipr, inorm; /* newton-raphson iteration control parameters */ static double dt, omega, tolrsd[5], rsdnm[5], errnm[5], frc; static int itmax; /* timer */ static double maxtime; static boolean timeron; /* function prototypes */ void blts(int nx, int ny, int nz, int k, double omega, double v[][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5], double ldz[][ISIZ1 / 2 * 2 + 1][5][5], double ldy[][ISIZ1 / 2 * 2 + 1][5][5], double ldx[][ISIZ1 / 2 * 2 + 1][5][5], double d[][ISIZ1 / 2 * 2 + 1][5][5], int ist, int iend, int jst, int jend, int nx0, int ny0); void buts(int nx, int ny, int nz, int k, double omega, double v[][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5], void* pointer_tv, double d[][ISIZ1 / 2 * 2 + 1][5][5], double udx[][ISIZ1 / 2 * 2 + 1][5][5], double udy[][ISIZ1 / 2 * 2 + 1][5][5], double udz[][ISIZ1 / 2 * 2 + 1][5][5], int ist, int iend, int jst, int jend, int nx0, int ny0); void domain(); void erhs(); void error(); void exact(int i, int j, int k, double u000ijk[]); void jacld(int k); void jacu(int k); void l2norm(int nx0, int ny0, int nz0, int ist, int iend, int jst, int jend, double v[][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5], double sum[5]); void pintgr(); void read_input(); void rhs(); void setbv(); void setcoeff(); void setiv(); void ssor(int niter); void verify(double xcr[], double xce[], double xci, char* class_npb, boolean* verified); static boolean flag[ISIZ1 / 2 * 2 + 1]; static boolean flag2[ISIZ1 / 2 * 2 + 1]; /* lu */ int main(int, char*[]) { #if defined(DO_NOT_ALLOCATE_ARRAYS_WITH_DYNAMIC_MEMORY_AND_AS_SINGLE_DIMENSION) printf( " DO_NOT_ALLOCATE_ARRAYS_WITH_DYNAMIC_MEMORY_AND_AS_SINGLE_DIMENSION mode on\n"); #endif char class_npb; boolean verified; double mflops; double t, tmax, trecs[T_LAST + 1]; int i; char* t_names[T_LAST + 1]; /* * --------------------------------------------------------------------- * setup info for timers * --------------------------------------------------------------------- */ FILE* fp; if((fp = fopen("timer.flag", "r")) != NULL) { timeron = TRUE; t_names[T_TOTAL] = (char*) "total"; t_names[T_RHSX] = (char*) "rhsx"; t_names[T_RHSY] = (char*) "rhsy"; t_names[T_RHSZ] = (char*) "rhsz"; t_names[T_RHS] = (char*) "rhs"; t_names[T_JACLD] = (char*) "jacld"; t_names[T_BLTS] = (char*) "blts"; t_names[T_JACU] = (char*) "jacu"; t_names[T_BUTS] = (char*) "buts"; t_names[T_ADD] = (char*) "add"; t_names[T_L2NORM] = (char*) "l2norm"; fclose(fp); } else { timeron = FALSE; } /* * --------------------------------------------------------------------- * read input data * --------------------------------------------------------------------- */ read_input(); /* * --------------------------------------------------------------------- * set up domain sizes * --------------------------------------------------------------------- */ domain(); /* * --------------------------------------------------------------------- * set up coefficients * --------------------------------------------------------------------- */ setcoeff(); #pragma omp parallel { /* * --------------------------------------------------------------------- * set the boundary values for dependent variables * --------------------------------------------------------------------- */ setbv(); /* * --------------------------------------------------------------------- * set the initial values for dependent variables * --------------------------------------------------------------------- */ setiv(); /* * --------------------------------------------------------------------- * compute the forcing term based on prescribed exact solution * --------------------------------------------------------------------- */ erhs(); } /* end parallel */ /* * --------------------------------------------------------------------- * perform one SSOR iteration to touch all pages * --------------------------------------------------------------------- */ ssor(1); #pragma omp parallel { /* * --------------------------------------------------------------------- * reset the boundary and initial values * --------------------------------------------------------------------- */ setbv(); setiv(); } /* * --------------------------------------------------------------------- * perform the SSOR iterations * --------------------------------------------------------------------- */ ssor(itmax); /* * --------------------------------------------------------------------- * compute the solution error * --------------------------------------------------------------------- */ error(); /* * --------------------------------------------------------------------- * compute the surface integral * --------------------------------------------------------------------- */ pintgr(); /* * --------------------------------------------------------------------- * verification test * --------------------------------------------------------------------- */ verify(rsdnm, errnm, frc, &class_npb, &verified); mflops = (double) itmax * (1984.77 * (double) nx0 * (double) ny0 * (double) nz0 - 10923.3 * pow(((double) (nx0 + ny0 + nz0) / 3.0), 2.0) + 27770.9 * (double) (nx0 + ny0 + nz0) / 3.0 - 144010.0) / (maxtime * 1000000.0); setenv("OMP_NUM_THREADS", "1", 0); c_print_results((char*) "LU", class_npb, nx0, ny0, nz0, itmax, maxtime, mflops, (char*) " floating point", verified, (char*) NPBVERSION, (char*) COMPILETIME, (char*) COMPILERVERSION, (char*) LIBVERSION, std::getenv("OMP_NUM_THREADS"), (char*) CS1, (char*) CS2, (char*) CS3, (char*) CS4, (char*) CS5, (char*) CS6, (char*) "(none)"); /* * --------------------------------------------------------------------- * more timers * --------------------------------------------------------------------- */ if(timeron) { for(i = 1; i <= T_LAST; i++) { trecs[i] = timer_read(i); } tmax = maxtime; if(tmax == 0.0) { tmax = 1.0; } printf(" SECTION Time (secs)\n"); for(i = 1; i <= T_LAST; i++) { printf(" %-8s:%9.3f (%6.2f%%)\n", t_names[i], trecs[i], trecs[i] * 100. / tmax); if(i == T_RHS) { t = trecs[T_RHSX] + trecs[T_RHSY] + trecs[T_RHSZ]; printf(" --> %8s:%9.3f (%6.2f%%)\n", "sub-rhs", t, t * 100. / tmax); t = trecs[i] - t; printf(" --> %8s:%9.3f (%6.2f%%)\n", "rest-rhs", t, t * 100. / tmax); } } } return 0; } /* * --------------------------------------------------------------------- * compute the regular-sparse, block lower triangular solution: * v <-- ( L-inv ) * v * --------------------------------------------------------------------- * to improve cache performance, second two dimensions padded by 1 * for even number sizes only. only needed in v. * --------------------------------------------------------------------- */ void blts(int /*nx*/, int /*ny*/, int /*nz*/, int k, double omega, double v[][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5], double ldz[][ISIZ1 / 2 * 2 + 1][5][5], double ldy[][ISIZ1 / 2 * 2 + 1][5][5], double ldx[][ISIZ1 / 2 * 2 + 1][5][5], double d[][ISIZ1 / 2 * 2 + 1][5][5], int ist, int iend, int jst, int jend, int /*nx0*/, int /*ny0*/) { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ int i, j, m; double tmp, tmp1; double tmat[5][5], tv[5]; #pragma omp for nowait schedule(static) for(j = jst; j < jend; j++) { for(i = ist; i < iend; i++) { for(m = 0; m < 5; m++) { v[k][j][i][m] = v[k][j][i][m] - omega * (ldz[j][i][0][m] * v[k - 1][j][i][0] + ldz[j][i][1][m] * v[k - 1][j][i][1] + ldz[j][i][2][m] * v[k - 1][j][i][2] + ldz[j][i][3][m] * v[k - 1][j][i][3] + ldz[j][i][4][m] * v[k - 1][j][i][4]); } } } #pragma omp for nowait schedule(static) for(j = jst; j < jend; j++) { if(j != jst) { while(flag[j - 1] == 0) { #pragma omp flush ; } } if(j != jend - 1) { while(flag[j] == 1) { #pragma omp flush ; } } for(i = ist; i < iend; i++) { for(m = 0; m < 5; m++) { tv[m] = v[k][j][i][m] - omega * (ldy[j][i][0][m] * v[k][j - 1][i][0] + ldx[j][i][0][m] * v[k][j][i - 1][0] + ldy[j][i][1][m] * v[k][j - 1][i][1] + ldx[j][i][1][m] * v[k][j][i - 1][1] + ldy[j][i][2][m] * v[k][j - 1][i][2] + ldx[j][i][2][m] * v[k][j][i - 1][2] + ldy[j][i][3][m] * v[k][j - 1][i][3] + ldx[j][i][3][m] * v[k][j][i - 1][3] + ldy[j][i][4][m] * v[k][j - 1][i][4] + ldx[j][i][4][m] * v[k][j][i - 1][4]); } /* * --------------------------------------------------------------------- * diagonal block inversion * * forward elimination * --------------------------------------------------------------------- */ for(m = 0; m < 5; m++) { tmat[0][m] = d[j][i][0][m]; tmat[1][m] = d[j][i][1][m]; tmat[2][m] = d[j][i][2][m]; tmat[3][m] = d[j][i][3][m]; tmat[4][m] = d[j][i][4][m]; } /* */ tmp1 = 1.0 / tmat[0][0]; tmp = tmp1 * tmat[0][1]; tmat[1][1] = tmat[1][1] - tmp * tmat[1][0]; tmat[2][1] = tmat[2][1] - tmp * tmat[2][0]; tmat[3][1] = tmat[3][1] - tmp * tmat[3][0]; tmat[4][1] = tmat[4][1] - tmp * tmat[4][0]; tv[1] = tv[1] - tv[0] * tmp; /* */ tmp = tmp1 * tmat[0][2]; tmat[1][2] = tmat[1][2] - tmp * tmat[1][0]; tmat[2][2] = tmat[2][2] - tmp * tmat[2][0]; tmat[3][2] = tmat[3][2] - tmp * tmat[3][0]; tmat[4][2] = tmat[4][2] - tmp * tmat[4][0]; tv[2] = tv[2] - tv[0] * tmp; /* */ tmp = tmp1 * tmat[0][3]; tmat[1][3] = tmat[1][3] - tmp * tmat[1][0]; tmat[2][3] = tmat[2][3] - tmp * tmat[2][0]; tmat[3][3] = tmat[3][3] - tmp * tmat[3][0]; tmat[4][3] = tmat[4][3] - tmp * tmat[4][0]; tv[3] = tv[3] - tv[0] * tmp; /* */ tmp = tmp1 * tmat[0][4]; tmat[1][4] = tmat[1][4] - tmp * tmat[1][0]; tmat[2][4] = tmat[2][4] - tmp * tmat[2][0]; tmat[3][4] = tmat[3][4] - tmp * tmat[3][0]; tmat[4][4] = tmat[4][4] - tmp * tmat[4][0]; tv[4] = tv[4] - tv[0] * tmp; /* */ tmp1 = 1.0 / tmat[1][1]; tmp = tmp1 * tmat[1][2]; tmat[2][2] = tmat[2][2] - tmp * tmat[2][1]; tmat[3][2] = tmat[3][2] - tmp * tmat[3][1]; tmat[4][2] = tmat[4][2] - tmp * tmat[4][1]; tv[2] = tv[2] - tv[1] * tmp; /* */ tmp = tmp1 * tmat[1][3]; tmat[2][3] = tmat[2][3] - tmp * tmat[2][1]; tmat[3][3] = tmat[3][3] - tmp * tmat[3][1]; tmat[4][3] = tmat[4][3] - tmp * tmat[4][1]; tv[3] = tv[3] - tv[1] * tmp; /* */ tmp = tmp1 * tmat[1][4]; tmat[2][4] = tmat[2][4] - tmp * tmat[2][1]; tmat[3][4] = tmat[3][4] - tmp * tmat[3][1]; tmat[4][4] = tmat[4][4] - tmp * tmat[4][1]; tv[4] = tv[4] - tv[1] * tmp; /* */ tmp1 = 1.0 / tmat[2][2]; tmp = tmp1 * tmat[2][3]; tmat[3][3] = tmat[3][3] - tmp * tmat[3][2]; tmat[4][3] = tmat[4][3] - tmp * tmat[4][2]; tv[3] = tv[3] - tv[2] * tmp; /* */ tmp = tmp1 * tmat[2][4]; tmat[3][4] = tmat[3][4] - tmp * tmat[3][2]; tmat[4][4] = tmat[4][4] - tmp * tmat[4][2]; tv[4] = tv[4] - tv[2] * tmp; /* */ tmp1 = 1.0 / tmat[3][3]; tmp = tmp1 * tmat[3][4]; tmat[4][4] = tmat[4][4] - tmp * tmat[4][3]; tv[4] = tv[4] - tv[3] * tmp; /* * --------------------------------------------------------------------- * back substitution * --------------------------------------------------------------------- */ v[k][j][i][4] = tv[4] / tmat[4][4]; tv[3] = tv[3] - tmat[4][3] * v[k][j][i][4]; v[k][j][i][3] = tv[3] / tmat[3][3]; tv[2] = tv[2] - tmat[3][2] * v[k][j][i][3] - tmat[4][2] * v[k][j][i][4]; v[k][j][i][2] = tv[2] / tmat[2][2]; tv[1] = tv[1] - tmat[2][1] * v[k][j][i][2] - tmat[3][1] * v[k][j][i][3] - tmat[4][1] * v[k][j][i][4]; v[k][j][i][1] = tv[1] / tmat[1][1]; tv[0] = tv[0] - tmat[1][0] * v[k][j][i][1] - tmat[2][0] * v[k][j][i][2] - tmat[3][0] * v[k][j][i][3] - tmat[4][0] * v[k][j][i][4]; v[k][j][i][0] = tv[0] / tmat[0][0]; } if(j != jend - 1) flag[j] = 1; if(j != jst) flag[j - 1] = 0; } } /* * --------------------------------------------------------------------- * compute the regular-sparse, block upper triangular solution: * v <-- ( U-inv ) * v * --------------------------------------------------------------------- * to improve cache performance, second two dimensions padded by 1 * for even number sizes only. only needed in v. * --------------------------------------------------------------------- */ void buts(int /*nx*/, int /*ny*/, int /*nz*/, int k, double omega, double v[][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5], void* pointer_tv, double d[][ISIZ1 / 2 * 2 + 1][5][5], double udx[][ISIZ1 / 2 * 2 + 1][5][5], double udy[][ISIZ1 / 2 * 2 + 1][5][5], double udz[][ISIZ1 / 2 * 2 + 1][5][5], int ist, int iend, int jst, int jend, int /*nx0*/, int /*ny0*/) { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ double(*tv)[ISIZ1 / 2 * 2 + 1][5] = (double(*)[ISIZ1 / 2 * 2 + 1][5]) pointer_tv; int i, j, m; double tmp, tmp1; double tmat[5][5]; #pragma omp for nowait schedule(static) for(j = jend - 1; j >= jst; j--) { for(i = iend - 1; i >= ist; i--) { for(m = 0; m < 5; m++) { tv[j][i][m] = omega * (udz[j][i][0][m] * v[k + 1][j][i][0] + udz[j][i][1][m] * v[k + 1][j][i][1] + udz[j][i][2][m] * v[k + 1][j][i][2] + udz[j][i][3][m] * v[k + 1][j][i][3] + udz[j][i][4][m] * v[k + 1][j][i][4]); } } } #pragma omp for nowait schedule(static) for(j = jend - 1; j >= jst; j--) { if(j != jend - 1) { while(flag2[j + 1] == 0) { #pragma omp flush ; } } if(j != jst) { while(flag2[j] == 1) { #pragma omp flush ; } } for(i = iend - 1; i >= ist; i--) { for(m = 0; m < 5; m++) { tv[j][i][m] = tv[j][i][m] + omega * (udy[j][i][0][m] * v[k][j + 1][i][0] + udx[j][i][0][m] * v[k][j][i + 1][0] + udy[j][i][1][m] * v[k][j + 1][i][1] + udx[j][i][1][m] * v[k][j][i + 1][1] + udy[j][i][2][m] * v[k][j + 1][i][2] + udx[j][i][2][m] * v[k][j][i + 1][2] + udy[j][i][3][m] * v[k][j + 1][i][3] + udx[j][i][3][m] * v[k][j][i + 1][3] + udy[j][i][4][m] * v[k][j + 1][i][4] + udx[j][i][4][m] * v[k][j][i + 1][4]); } /* * --------------------------------------------------------------------- * diagonal block inversion * --------------------------------------------------------------------- */ for(m = 0; m < 5; m++) { tmat[0][m] = d[j][i][0][m]; tmat[1][m] = d[j][i][1][m]; tmat[2][m] = d[j][i][2][m]; tmat[3][m] = d[j][i][3][m]; tmat[4][m] = d[j][i][4][m]; } /* */ tmp1 = 1.0 / tmat[0][0]; tmp = tmp1 * tmat[0][1]; tmat[1][1] = tmat[1][1] - tmp * tmat[1][0]; tmat[2][1] = tmat[2][1] - tmp * tmat[2][0]; tmat[3][1] = tmat[3][1] - tmp * tmat[3][0]; tmat[4][1] = tmat[4][1] - tmp * tmat[4][0]; tv[j][i][1] = tv[j][i][1] - tv[j][i][0] * tmp; /* */ tmp = tmp1 * tmat[0][2]; tmat[1][2] = tmat[1][2] - tmp * tmat[1][0]; tmat[2][2] = tmat[2][2] - tmp * tmat[2][0]; tmat[3][2] = tmat[3][2] - tmp * tmat[3][0]; tmat[4][2] = tmat[4][2] - tmp * tmat[4][0]; tv[j][i][2] = tv[j][i][2] - tv[j][i][0] * tmp; /* */ tmp = tmp1 * tmat[0][3]; tmat[1][3] = tmat[1][3] - tmp * tmat[1][0]; tmat[2][3] = tmat[2][3] - tmp * tmat[2][0]; tmat[3][3] = tmat[3][3] - tmp * tmat[3][0]; tmat[4][3] = tmat[4][3] - tmp * tmat[4][0]; tv[j][i][3] = tv[j][i][3] - tv[j][i][0] * tmp; /* */ tmp = tmp1 * tmat[0][4]; tmat[1][4] = tmat[1][4] - tmp * tmat[1][0]; tmat[2][4] = tmat[2][4] - tmp * tmat[2][0]; tmat[3][4] = tmat[3][4] - tmp * tmat[3][0]; tmat[4][4] = tmat[4][4] - tmp * tmat[4][0]; tv[j][i][4] = tv[j][i][4] - tv[j][i][0] * tmp; /* */ tmp1 = 1.0 / tmat[1][1]; tmp = tmp1 * tmat[1][2]; tmat[2][2] = tmat[2][2] - tmp * tmat[2][1]; tmat[3][2] = tmat[3][2] - tmp * tmat[3][1]; tmat[4][2] = tmat[4][2] - tmp * tmat[4][1]; tv[j][i][2] = tv[j][i][2] - tv[j][i][1] * tmp; /* */ tmp = tmp1 * tmat[1][3]; tmat[2][3] = tmat[2][3] - tmp * tmat[2][1]; tmat[3][3] = tmat[3][3] - tmp * tmat[3][1]; tmat[4][3] = tmat[4][3] - tmp * tmat[4][1]; tv[j][i][3] = tv[j][i][3] - tv[j][i][1] * tmp; /* */ tmp = tmp1 * tmat[1][4]; tmat[2][4] = tmat[2][4] - tmp * tmat[2][1]; tmat[3][4] = tmat[3][4] - tmp * tmat[3][1]; tmat[4][4] = tmat[4][4] - tmp * tmat[4][1]; tv[j][i][4] = tv[j][i][4] - tv[j][i][1] * tmp; /* */ tmp1 = 1.0 / tmat[2][2]; tmp = tmp1 * tmat[2][3]; tmat[3][3] = tmat[3][3] - tmp * tmat[3][2]; tmat[4][3] = tmat[4][3] - tmp * tmat[4][2]; tv[j][i][3] = tv[j][i][3] - tv[j][i][2] * tmp; /* */ tmp = tmp1 * tmat[2][4]; tmat[3][4] = tmat[3][4] - tmp * tmat[3][2]; tmat[4][4] = tmat[4][4] - tmp * tmat[4][2]; tv[j][i][4] = tv[j][i][4] - tv[j][i][2] * tmp; /* */ tmp1 = 1.0 / tmat[3][3]; tmp = tmp1 * tmat[3][4]; tmat[4][4] = tmat[4][4] - tmp * tmat[4][3]; tv[j][i][4] = tv[j][i][4] - tv[j][i][3] * tmp; /* * --------------------------------------------------------------------- * back substitution * --------------------------------------------------------------------- */ tv[j][i][4] = tv[j][i][4] / tmat[4][4]; tv[j][i][3] = tv[j][i][3] - tmat[4][3] * tv[j][i][4]; tv[j][i][3] = tv[j][i][3] / tmat[3][3]; tv[j][i][2] = tv[j][i][2] - tmat[3][2] * tv[j][i][3] - tmat[4][2] * tv[j][i][4]; tv[j][i][2] = tv[j][i][2] / tmat[2][2]; tv[j][i][1] = tv[j][i][1] - tmat[2][1] * tv[j][i][2] - tmat[3][1] * tv[j][i][3] - tmat[4][1] * tv[j][i][4]; tv[j][i][1] = tv[j][i][1] / tmat[1][1]; tv[j][i][0] = tv[j][i][0] - tmat[1][0] * tv[j][i][1] - tmat[2][0] * tv[j][i][2] - tmat[3][0] * tv[j][i][3] - tmat[4][0] * tv[j][i][4]; tv[j][i][0] = tv[j][i][0] / tmat[0][0]; v[k][j][i][0] = v[k][j][i][0] - tv[j][i][0]; v[k][j][i][1] = v[k][j][i][1] - tv[j][i][1]; v[k][j][i][2] = v[k][j][i][2] - tv[j][i][2]; v[k][j][i][3] = v[k][j][i][3] - tv[j][i][3]; v[k][j][i][4] = v[k][j][i][4] - tv[j][i][4]; } if(j != jend - 1) flag2[j + 1] = 0; if(j != jst) flag2[j] = 1; } } void domain() { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ nx = nx0; ny = ny0; nz = nz0; /* * --------------------------------------------------------------------- * check the sub-domain size * --------------------------------------------------------------------- */ if((nx < 4) || (ny < 4) || (nz < 4)) { printf(" SUBDOMAIN SIZE IS TOO SMALL - \n" " ADJUST PROBLEM SIZE OR NUMBER OF PROCESSORS\n" " SO THAT NX, NY AND NZ ARE GREATER THAN OR EQUAL\n" " TO 4 THEY ARE CURRENTLY%3d%3d%3d\n", nx, ny, nz); exit(EXIT_FAILURE); } if((nx > ISIZ1) || (ny > ISIZ2) || (nz > ISIZ3)) { printf(" SUBDOMAIN SIZE IS TOO LARGE - \n" " ADJUST PROBLEM SIZE OR NUMBER OF PROCESSORS\n" " SO THAT NX, NY AND NZ ARE LESS THAN OR EQUAL TO \n" " ISIZ1, ISIZ2 AND ISIZ3 RESPECTIVELY. THEY ARE\n" " CURRENTLYi%4d%4d%4d\n", nx, ny, nz); exit(EXIT_FAILURE); } /* * --------------------------------------------------------------------- * set up the start and end in i and j extents for all processors * --------------------------------------------------------------------- */ ist = 1; iend = nx - 1; jst = 1; jend = ny - 1; ii1 = 1; ii2 = nx0 - 1; ji1 = 1; ji2 = ny0 - 2; ki1 = 2; ki2 = nz0 - 1; } /* * --------------------------------------------------------------------- * compute the right hand side based on exact solution * --------------------------------------------------------------------- */ void erhs() { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ int i, j, k, m; double xi, eta, zeta; double q; double u21, u31, u41; double tmp; double u21i, u31i, u41i, u51i; double u21j, u31j, u41j, u51j; double u21k, u31k, u41k, u51k; double u21im1, u31im1, u41im1, u51im1; double u21jm1, u31jm1, u41jm1, u51jm1; double u21km1, u31km1, u41km1, u51km1; double flux[ISIZ1][5]; #pragma omp for for(k = 0; k < nz; k++) { for(j = 0; j < ny; j++) { for(i = 0; i < nx; i++) { for(m = 0; m < 5; m++) { frct[k][j][i][m] = 0.0; } } } } #pragma omp for for(k = 0; k < nz; k++) { zeta = ((double) k) / (nz - 1); for(j = 0; j < ny; j++) { eta = ((double) j) / (ny0 - 1); for(i = 0; i < nx; i++) { xi = ((double) i) / (nx0 - 1); for(m = 0; m < 5; m++) { rsd[k][j][i][m] = ce[0][m] + (ce[1][m] + (ce[4][m] + (ce[7][m] + ce[10][m] * xi) * xi) * xi) * xi + (ce[2][m] + (ce[5][m] + (ce[8][m] + ce[11][m] * eta) * eta) * eta) * eta + (ce[3][m] + (ce[6][m] + (ce[9][m] + ce[12][m] * zeta) * zeta) * zeta) * zeta; } } } } /* * --------------------------------------------------------------------- * xi-direction flux differences * --------------------------------------------------------------------- */ #pragma omp for for(k = 1; k < nz - 1; k++) { for(j = jst; j < jend; j++) { for(i = 0; i < nx; i++) { flux[i][0] = rsd[k][j][i][1]; u21 = rsd[k][j][i][1] / rsd[k][j][i][0]; q = 0.50 * (rsd[k][j][i][1] * rsd[k][j][i][1] + rsd[k][j][i][2] * rsd[k][j][i][2] + rsd[k][j][i][3] * rsd[k][j][i][3]) / rsd[k][j][i][0]; flux[i][1] = rsd[k][j][i][1] * u21 + C2 * (rsd[k][j][i][4] - q); flux[i][2] = rsd[k][j][i][2] * u21; flux[i][3] = rsd[k][j][i][3] * u21; flux[i][4] = (C1 * rsd[k][j][i][4] - C2 * q) * u21; } for(i = ist; i < iend; i++) { for(m = 0; m < 5; m++) { frct[k][j][i][m] = frct[k][j][i][m] - tx2 * (flux[i + 1][m] - flux[i - 1][m]); } } for(i = ist; i < nx; i++) { tmp = 1.0 / rsd[k][j][i][0]; u21i = tmp * rsd[k][j][i][1]; u31i = tmp * rsd[k][j][i][2]; u41i = tmp * rsd[k][j][i][3]; u51i = tmp * rsd[k][j][i][4]; tmp = 1.0 / rsd[k][j][i - 1][0]; u21im1 = tmp * rsd[k][j][i - 1][1]; u31im1 = tmp * rsd[k][j][i - 1][2]; u41im1 = tmp * rsd[k][j][i - 1][3]; u51im1 = tmp * rsd[k][j][i - 1][4]; flux[i][1] = (4.0 / 3.0) * tx3 * (u21i - u21im1); flux[i][2] = tx3 * (u31i - u31im1); flux[i][3] = tx3 * (u41i - u41im1); flux[i][4] = 0.50 * (1.0 - C1 * C5) * tx3 * ((u21i * u21i + u31i * u31i + u41i * u41i) - (u21im1 * u21im1 + u31im1 * u31im1 + u41im1 * u41im1)) + (1.0 / 6.0) * tx3 * (u21i * u21i - u21im1 * u21im1) + C1 * C5 * tx3 * (u51i - u51im1); } for(i = ist; i < iend; i++) { frct[k][j][i][0] = frct[k][j][i][0] + dx1 * tx1 * (rsd[k][j][i - 1][0] - 2.0 * rsd[k][j][i][0] + rsd[k][j][i + 1][0]); frct[k][j][i][1] = frct[k][j][i][1] + tx3 * C3 * C4 * (flux[i + 1][1] - flux[i][1]) + dx2 * tx1 * (rsd[k][j][i - 1][1] - 2.0 * rsd[k][j][i][1] + rsd[k][j][i + 1][1]); frct[k][j][i][2] = frct[k][j][i][2] + tx3 * C3 * C4 * (flux[i + 1][2] - flux[i][2]) + dx3 * tx1 * (rsd[k][j][i - 1][2] - 2.0 * rsd[k][j][i][2] + rsd[k][j][i + 1][2]); frct[k][j][i][3] = frct[k][j][i][3] + tx3 * C3 * C4 * (flux[i + 1][3] - flux[i][3]) + dx4 * tx1 * (rsd[k][j][i - 1][3] - 2.0 * rsd[k][j][i][3] + rsd[k][j][i + 1][3]); frct[k][j][i][4] = frct[k][j][i][4] + tx3 * C3 * C4 * (flux[i + 1][4] - flux[i][4]) + dx5 * tx1 * (rsd[k][j][i - 1][4] - 2.0 * rsd[k][j][i][4] + rsd[k][j][i + 1][4]); } /* * --------------------------------------------------------------------- * fourth-order dissipation * --------------------------------------------------------------------- */ for(m = 0; m < 5; m++) { frct[k][j][1][m] = frct[k][j][1][m] - dssp * (+5.0 * rsd[k][j][1][m] - 4.0 * rsd[k][j][2][m] + rsd[k][j][3][m]); frct[k][j][2][m] = frct[k][j][2][m] - dssp * (-4.0 * rsd[k][j][1][m] + 6.0 * rsd[k][j][2][m] - 4.0 * rsd[k][j][3][m] + rsd[k][j][4][m]); } for(i = 3; i < nx - 3; i++) { for(m = 0; m < 5; m++) { frct[k][j][i][m] = frct[k][j][i][m] - dssp * (rsd[k][j][i - 2][m] - 4.0 * rsd[k][j][i - 1][m] + 6.0 * rsd[k][j][i][m] - 4.0 * rsd[k][j][i + 1][m] + rsd[k][j][i + 2][m]); } } for(m = 0; m < 5; m++) { frct[k][j][nx - 3][m] = frct[k][j][nx - 3][m] - dssp * (rsd[k][j][nx - 5][m] - 4.0 * rsd[k][j][nx - 4][m] + 6.0 * rsd[k][j][nx - 3][m] - 4.0 * rsd[k][j][nx - 2][m]); frct[k][j][nx - 2][m] = frct[k][j][nx - 2][m] - dssp * (rsd[k][j][nx - 4][m] - 4.0 * rsd[k][j][nx - 3][m] + 5.0 * rsd[k][j][nx - 2][m]); } } } /* * --------------------------------------------------------------------- * eta-direction flux differences * --------------------------------------------------------------------- */ #pragma omp for for(k = 1; k < nz - 1; k++) { for(i = ist; i < iend; i++) { for(j = 0; j < ny; j++) { flux[j][0] = rsd[k][j][i][2]; u31 = rsd[k][j][i][2] / rsd[k][j][i][0]; q = 0.50 * (rsd[k][j][i][1] * rsd[k][j][i][1] + rsd[k][j][i][2] * rsd[k][j][i][2] + rsd[k][j][i][3] * rsd[k][j][i][3]) / rsd[k][j][i][0]; flux[j][1] = rsd[k][j][i][1] * u31; flux[j][2] = rsd[k][j][i][2] * u31 + C2 * (rsd[k][j][i][4] - q); flux[j][3] = rsd[k][j][i][3] * u31; flux[j][4] = (C1 * rsd[k][j][i][4] - C2 * q) * u31; } for(j = jst; j < jend; j++) { for(m = 0; m < 5; m++) { frct[k][j][i][m] = frct[k][j][i][m] - ty2 * (flux[j + 1][m] - flux[j - 1][m]); } } for(j = jst; j < ny; j++) { tmp = 1.0 / rsd[k][j][i][0]; u21j = tmp * rsd[k][j][i][1]; u31j = tmp * rsd[k][j][i][2]; u41j = tmp * rsd[k][j][i][3]; u51j = tmp * rsd[k][j][i][4]; tmp = 1.0 / rsd[k][j - 1][i][0]; u21jm1 = tmp * rsd[k][j - 1][i][1]; u31jm1 = tmp * rsd[k][j - 1][i][2]; u41jm1 = tmp * rsd[k][j - 1][i][3]; u51jm1 = tmp * rsd[k][j - 1][i][4]; flux[j][1] = ty3 * (u21j - u21jm1); flux[j][2] = (4.0 / 3.0) * ty3 * (u31j - u31jm1); flux[j][3] = ty3 * (u41j - u41jm1); flux[j][4] = 0.50 * (1.0 - C1 * C5) * ty3 * ((u21j * u21j + u31j * u31j + u41j * u41j) - (u21jm1 * u21jm1 + u31jm1 * u31jm1 + u41jm1 * u41jm1)) + (1.0 / 6.0) * ty3 * (u31j * u31j - u31jm1 * u31jm1) + C1 * C5 * ty3 * (u51j - u51jm1); } for(j = jst; j < jend; j++) { frct[k][j][i][0] = frct[k][j][i][0] + dy1 * ty1 * (rsd[k][j - 1][i][0] - 2.0 * rsd[k][j][i][0] + rsd[k][j + 1][i][0]); frct[k][j][i][1] = frct[k][j][i][1] + ty3 * C3 * C4 * (flux[j + 1][1] - flux[j][1]) + dy2 * ty1 * (rsd[k][j - 1][i][1] - 2.0 * rsd[k][j][i][1] + rsd[k][j + 1][i][1]); frct[k][j][i][2] = frct[k][j][i][2] + ty3 * C3 * C4 * (flux[j + 1][2] - flux[j][2]) + dy3 * ty1 * (rsd[k][j - 1][i][2] - 2.0 * rsd[k][j][i][2] + rsd[k][j + 1][i][2]); frct[k][j][i][3] = frct[k][j][i][3] + ty3 * C3 * C4 * (flux[j + 1][3] - flux[j][3]) + dy4 * ty1 * (rsd[k][j - 1][i][3] - 2.0 * rsd[k][j][i][3] + rsd[k][j + 1][i][3]); frct[k][j][i][4] = frct[k][j][i][4] + ty3 * C3 * C4 * (flux[j + 1][4] - flux[j][4]) + dy5 * ty1 * (rsd[k][j - 1][i][4] - 2.0 * rsd[k][j][i][4] + rsd[k][j + 1][i][4]); } /* * --------------------------------------------------------------------- * fourth-order dissipation * --------------------------------------------------------------------- */ for(m = 0; m < 5; m++) { frct[k][1][i][m] = frct[k][1][i][m] - dssp * (+5.0 * rsd[k][1][i][m] - 4.0 * rsd[k][2][i][m] + rsd[k][3][i][m]); frct[k][2][i][m] = frct[k][2][i][m] - dssp * (-4.0 * rsd[k][1][i][m] + 6.0 * rsd[k][2][i][m] - 4.0 * rsd[k][3][i][m] + rsd[k][4][i][m]); } for(j = 3; j < ny - 3; j++) { for(m = 0; m < 5; m++) { frct[k][j][i][m] = frct[k][j][i][m] - dssp * (rsd[k][j - 2][i][m] - 4.0 * rsd[k][j - 1][i][m] + 6.0 * rsd[k][j][i][m] - 4.0 * rsd[k][j + 1][i][m] + rsd[k][j + 2][i][m]); } } for(m = 0; m < 5; m++) { frct[k][ny - 3][i][m] = frct[k][ny - 3][i][m] - dssp * (rsd[k][ny - 5][i][m] - 4.0 * rsd[k][ny - 4][i][m] + 6.0 * rsd[k][ny - 3][i][m] - 4.0 * rsd[k][ny - 2][i][m]); frct[k][ny - 2][i][m] = frct[k][ny - 2][i][m] - dssp * (rsd[k][ny - 4][i][m] - 4.0 * rsd[k][ny - 3][i][m] + 5.0 * rsd[k][ny - 2][i][m]); } } } /* * --------------------------------------------------------------------- * zeta-direction flux differences * --------------------------------------------------------------------- */ #pragma omp for for(j = jst; j < jend; j++) { for(i = ist; i < iend; i++) { for(k = 0; k < nz; k++) { flux[k][0] = rsd[k][j][i][3]; u41 = rsd[k][j][i][3] / rsd[k][j][i][0]; q = 0.50 * (rsd[k][j][i][1] * rsd[k][j][i][1] + rsd[k][j][i][2] * rsd[k][j][i][2] + rsd[k][j][i][3] * rsd[k][j][i][3]) / rsd[k][j][i][0]; flux[k][1] = rsd[k][j][i][1] * u41; flux[k][2] = rsd[k][j][i][2] * u41; flux[k][3] = rsd[k][j][i][3] * u41 + C2 * (rsd[k][j][i][4] - q); flux[k][4] = (C1 * rsd[k][j][i][4] - C2 * q) * u41; } for(k = 1; k < nz - 1; k++) { for(m = 0; m < 5; m++) { frct[k][j][i][m] = frct[k][j][i][m] - tz2 * (flux[k + 1][m] - flux[k - 1][m]); } } for(k = 1; k < nz; k++) { tmp = 1.0 / rsd[k][j][i][0]; u21k = tmp * rsd[k][j][i][1]; u31k = tmp * rsd[k][j][i][2]; u41k = tmp * rsd[k][j][i][3]; u51k = tmp * rsd[k][j][i][4]; tmp = 1.0 / rsd[k - 1][j][i][0]; u21km1 = tmp * rsd[k - 1][j][i][1]; u31km1 = tmp * rsd[k - 1][j][i][2]; u41km1 = tmp * rsd[k - 1][j][i][3]; u51km1 = tmp * rsd[k - 1][j][i][4]; flux[k][1] = tz3 * (u21k - u21km1); flux[k][2] = tz3 * (u31k - u31km1); flux[k][3] = (4.0 / 3.0) * tz3 * (u41k - u41km1); flux[k][4] = 0.50 * (1.0 - C1 * C5) * tz3 * ((u21k * u21k + u31k * u31k + u41k * u41k) - (u21km1 * u21km1 + u31km1 * u31km1 + u41km1 * u41km1)) + (1.0 / 6.0) * tz3 * (u41k * u41k - u41km1 * u41km1) + C1 * C5 * tz3 * (u51k - u51km1); } for(k = 1; k < nz - 1; k++) { frct[k][j][i][0] = frct[k][j][i][0] + dz1 * tz1 * (rsd[k + 1][j][i][0] - 2.0 * rsd[k][j][i][0] + rsd[k - 1][j][i][0]); frct[k][j][i][1] = frct[k][j][i][1] + tz3 * C3 * C4 * (flux[k + 1][1] - flux[k][1]) + dz2 * tz1 * (rsd[k + 1][j][i][1] - 2.0 * rsd[k][j][i][1] + rsd[k - 1][j][i][1]); frct[k][j][i][2] = frct[k][j][i][2] + tz3 * C3 * C4 * (flux[k + 1][2] - flux[k][2]) + dz3 * tz1 * (rsd[k + 1][j][i][2] - 2.0 * rsd[k][j][i][2] + rsd[k - 1][j][i][2]); frct[k][j][i][3] = frct[k][j][i][3] + tz3 * C3 * C4 * (flux[k + 1][3] - flux[k][3]) + dz4 * tz1 * (rsd[k + 1][j][i][3] - 2.0 * rsd[k][j][i][3] + rsd[k - 1][j][i][3]); frct[k][j][i][4] = frct[k][j][i][4] + tz3 * C3 * C4 * (flux[k + 1][4] - flux[k][4]) + dz5 * tz1 * (rsd[k + 1][j][i][4] - 2.0 * rsd[k][j][i][4] + rsd[k - 1][j][i][4]); } /* * --------------------------------------------------------------------- * fourth-order dissipation * --------------------------------------------------------------------- */ for(m = 0; m < 5; m++) { frct[1][j][i][m] = frct[1][j][i][m] - dssp * (+5.0 * rsd[1][j][i][m] - 4.0 * rsd[2][j][i][m] + rsd[3][j][i][m]); frct[2][j][i][m] = frct[2][j][i][m] - dssp * (-4.0 * rsd[1][j][i][m] + 6.0 * rsd[2][j][i][m] - 4.0 * rsd[3][j][i][m] + rsd[4][j][i][m]); } for(k = 3; k < nz - 3; k++) { for(m = 0; m < 5; m++) { frct[k][j][i][m] = frct[k][j][i][m] - dssp * (rsd[k - 2][j][i][m] - 4.0 * rsd[k - 1][j][i][m] + 6.0 * rsd[k][j][i][m] - 4.0 * rsd[k + 1][j][i][m] + rsd[k + 2][j][i][m]); } } for(m = 0; m < 5; m++) { frct[nz - 3][j][i][m] = frct[nz - 3][j][i][m] - dssp * (rsd[nz - 5][j][i][m] - 4.0 * rsd[nz - 4][j][i][m] + 6.0 * rsd[nz - 3][j][i][m] - 4.0 * rsd[nz - 2][j][i][m]); frct[nz - 2][j][i][m] = frct[nz - 2][j][i][m] - dssp * (rsd[nz - 4][j][i][m] - 4.0 * rsd[nz - 3][j][i][m] + 5.0 * rsd[nz - 2][j][i][m]); } } } } /* * --------------------------------------------------------------------- * compute the solution error * --------------------------------------------------------------------- */ void error() { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ int i, j, k, m; double tmp; double u000ijk[5]; for(m = 0; m < 5; m++) { errnm[m] = 0.0; } for(k = 1; k < nz - 1; k++) { for(j = jst; j < jend; j++) { for(i = ist; i < iend; i++) { exact(i, j, k, u000ijk); for(m = 0; m < 5; m++) { tmp = (u000ijk[m] - u[k][j][i][m]); errnm[m] = errnm[m] + tmp * tmp; } } } } for(m = 0; m < 5; m++) { errnm[m] = sqrt(errnm[m] / ((nx0 - 2) * (ny0 - 2) * (nz0 - 2))); } } /* * --------------------------------------------------------------------- * compute the exact solution at (i,j,k) * --------------------------------------------------------------------- */ void exact(int i, int j, int k, double u000ijk[]) { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ int m; double xi, eta, zeta; xi = ((double) i) / (nx0 - 1); eta = ((double) j) / (ny0 - 1); zeta = ((double) k) / (nz - 1); for(m = 0; m < 5; m++) { u000ijk[m] = ce[0][m] + (ce[1][m] + (ce[4][m] + (ce[7][m] + ce[10][m] * xi) * xi) * xi) * xi + (ce[2][m] + (ce[5][m] + (ce[8][m] + ce[11][m] * eta) * eta) * eta) * eta + (ce[3][m] + (ce[6][m] + (ce[9][m] + ce[12][m] * zeta) * zeta) * zeta) * zeta; } } /* * --------------------------------------------------------------------- * compute the lower triangular part of the jacobian matrix * --------------------------------------------------------------------- */ void jacld(int k) { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ int i, j; double r43; double c1345; double c34; double tmp1, tmp2, tmp3; r43 = (4.0 / 3.0); c1345 = C1 * C3 * C4 * C5; c34 = C3 * C4; #pragma omp for nowait schedule(static) for(j = jst; j < jend; j++) { for(i = ist; i < iend; i++) { /* * --------------------------------------------------------------------- * form the block daigonal * --------------------------------------------------------------------- */ tmp1 = rho_i[k][j][i]; tmp2 = tmp1 * tmp1; tmp3 = tmp1 * tmp2; d[j][i][0][0] = 1.0 + dt * 2.0 * (tx1 * dx1 + ty1 * dy1 + tz1 * dz1); d[j][i][1][0] = 0.0; d[j][i][2][0] = 0.0; d[j][i][3][0] = 0.0; d[j][i][4][0] = 0.0; d[j][i][0][1] = -dt * 2.0 * (tx1 * r43 + ty1 + tz1) * c34 * tmp2 * u[k][j][i][1]; d[j][i][1][1] = 1.0 + dt * 2.0 * c34 * tmp1 * (tx1 * r43 + ty1 + tz1) + dt * 2.0 * (tx1 * dx2 + ty1 * dy2 + tz1 * dz2); d[j][i][2][1] = 0.0; d[j][i][3][1] = 0.0; d[j][i][4][1] = 0.0; d[j][i][0][2] = -dt * 2.0 * (tx1 + ty1 * r43 + tz1) * c34 * tmp2 * u[k][j][i][2]; d[j][i][1][2] = 0.0; d[j][i][2][2] = 1.0 + dt * 2.0 * c34 * tmp1 * (tx1 + ty1 * r43 + tz1) + dt * 2.0 * (tx1 * dx3 + ty1 * dy3 + tz1 * dz3); d[j][i][3][2] = 0.0; d[j][i][4][2] = 0.0; d[j][i][0][3] = -dt * 2.0 * (tx1 + ty1 + tz1 * r43) * c34 * tmp2 * u[k][j][i][3]; d[j][i][1][3] = 0.0; d[j][i][2][3] = 0.0; d[j][i][3][3] = 1.0 + dt * 2.0 * c34 * tmp1 * (tx1 + ty1 + tz1 * r43) + dt * 2.0 * (tx1 * dx4 + ty1 * dy4 + tz1 * dz4); d[j][i][4][3] = 0.0; d[j][i][0][4] = -dt * 2.0 * (((tx1 * (r43 * c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (c34 - c1345)) * (u[k][j][i][1] * u[k][j][i][1]) + (tx1 * (c34 - c1345) + ty1 * (r43 * c34 - c1345) + tz1 * (c34 - c1345)) * (u[k][j][i][2] * u[k][j][i][2]) + (tx1 * (c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (r43 * c34 - c1345)) * (u[k][j][i][3] * u[k][j][i][3])) * tmp3 + (tx1 + ty1 + tz1) * c1345 * tmp2 * u[k][j][i][4]); d[j][i][1][4] = dt * 2.0 * tmp2 * u[k][j][i][1] * (tx1 * (r43 * c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (c34 - c1345)); d[j][i][2][4] = dt * 2.0 * tmp2 * u[k][j][i][2] * (tx1 * (c34 - c1345) + ty1 * (r43 * c34 - c1345) + tz1 * (c34 - c1345)); d[j][i][3][4] = dt * 2.0 * tmp2 * u[k][j][i][3] * (tx1 * (c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (r43 * c34 - c1345)); d[j][i][4][4] = 1.0 + dt * 2.0 * (tx1 + ty1 + tz1) * c1345 * tmp1 + dt * 2.0 * (tx1 * dx5 + ty1 * dy5 + tz1 * dz5); /* * --------------------------------------------------------------------- * form the first block sub-diagonal * --------------------------------------------------------------------- */ tmp1 = rho_i[k - 1][j][i]; tmp2 = tmp1 * tmp1; tmp3 = tmp1 * tmp2; a[j][i][0][0] = -dt * tz1 * dz1; a[j][i][1][0] = 0.0; a[j][i][2][0] = 0.0; a[j][i][3][0] = -dt * tz2; a[j][i][4][0] = 0.0; a[j][i][0][1] = -dt * tz2 * (-(u[k - 1][j][i][1] * u[k - 1][j][i][3]) * tmp2) - dt * tz1 * (-c34 * tmp2 * u[k - 1][j][i][1]); a[j][i][1][1] = -dt * tz2 * (u[k - 1][j][i][3] * tmp1) - dt * tz1 * c34 * tmp1 - dt * tz1 * dz2; a[j][i][2][1] = 0.0; a[j][i][3][1] = -dt * tz2 * (u[k - 1][j][i][1] * tmp1); a[j][i][4][1] = 0.0; a[j][i][0][2] = -dt * tz2 * (-(u[k - 1][j][i][2] * u[k - 1][j][i][3]) * tmp2) - dt * tz1 * (-c34 * tmp2 * u[k - 1][j][i][2]); a[j][i][1][2] = 0.0; a[j][i][2][2] = -dt * tz2 * (u[k - 1][j][i][3] * tmp1) - dt * tz1 * (c34 * tmp1) - dt * tz1 * dz3; a[j][i][3][2] = -dt * tz2 * (u[k - 1][j][i][2] * tmp1); a[j][i][4][2] = 0.0; a[j][i][0][3] = -dt * tz2 * (-(u[k - 1][j][i][3] * tmp1) * (u[k - 1][j][i][3] * tmp1) + C2 * qs[k - 1][j][i] * tmp1) - dt * tz1 * (-r43 * c34 * tmp2 * u[k - 1][j][i][3]); a[j][i][1][3] = -dt * tz2 * (-C2 * (u[k - 1][j][i][1] * tmp1)); a[j][i][2][3] = -dt * tz2 * (-C2 * (u[k - 1][j][i][2] * tmp1)); a[j][i][3][3] = -dt * tz2 * (2.0 - C2) * (u[k - 1][j][i][3] * tmp1) - dt * tz1 * (r43 * c34 * tmp1) - dt * tz1 * dz4; a[j][i][4][3] = -dt * tz2 * C2; a[j][i][0][4] = -dt * tz2 * ((C2 * 2.0 * qs[k - 1][j][i] - C1 * u[k - 1][j][i][4]) * u[k - 1][j][i][3] * tmp2) - dt * tz1 * (-(c34 - c1345) * tmp3 * (u[k - 1][j][i][1] * u[k - 1][j][i][1]) - (c34 - c1345) * tmp3 * (u[k - 1][j][i][2] * u[k - 1][j][i][2]) - (r43 * c34 - c1345) * tmp3 * (u[k - 1][j][i][3] * u[k - 1][j][i][3]) - c1345 * tmp2 * u[k - 1][j][i][4]); a[j][i][1][4] = -dt * tz2 * (-C2 * (u[k - 1][j][i][1] * u[k - 1][j][i][3]) * tmp2) - dt * tz1 * (c34 - c1345) * tmp2 * u[k - 1][j][i][1]; a[j][i][2][4] = -dt * tz2 * (-C2 * (u[k - 1][j][i][2] * u[k - 1][j][i][3]) * tmp2) - dt * tz1 * (c34 - c1345) * tmp2 * u[k - 1][j][i][2]; a[j][i][3][4] = -dt * tz2 * (C1 * (u[k - 1][j][i][4] * tmp1) - C2 * (qs[k - 1][j][i] * tmp1 + u[k - 1][j][i][3] * u[k - 1][j][i][3] * tmp2)) - dt * tz1 * (r43 * c34 - c1345) * tmp2 * u[k - 1][j][i][3]; a[j][i][4][4] = -dt * tz2 * (C1 * (u[k - 1][j][i][3] * tmp1)) - dt * tz1 * c1345 * tmp1 - dt * tz1 * dz5; /* * --------------------------------------------------------------------- * form the second block sub-diagonal * --------------------------------------------------------------------- */ tmp1 = rho_i[k][j - 1][i]; tmp2 = tmp1 * tmp1; tmp3 = tmp1 * tmp2; b[j][i][0][0] = -dt * ty1 * dy1; b[j][i][1][0] = 0.0; b[j][i][2][0] = -dt * ty2; b[j][i][3][0] = 0.0; b[j][i][4][0] = 0.0; b[j][i][0][1] = -dt * ty2 * (-(u[k][j - 1][i][1] * u[k][j - 1][i][2]) * tmp2) - dt * ty1 * (-c34 * tmp2 * u[k][j - 1][i][1]); b[j][i][1][1] = -dt * ty2 * (u[k][j - 1][i][2] * tmp1) - dt * ty1 * (c34 * tmp1) - dt * ty1 * dy2; b[j][i][2][1] = -dt * ty2 * (u[k][j - 1][i][1] * tmp1); b[j][i][3][1] = 0.0; b[j][i][4][1] = 0.0; b[j][i][0][2] = -dt * ty2 * (-(u[k][j - 1][i][2] * tmp1) * (u[k][j - 1][i][2] * tmp1) + C2 * (qs[k][j - 1][i] * tmp1)) - dt * ty1 * (-r43 * c34 * tmp2 * u[k][j - 1][i][2]); b[j][i][1][2] = -dt * ty2 * (-C2 * (u[k][j - 1][i][1] * tmp1)); b[j][i][2][2] = -dt * ty2 * ((2.0 - C2) * (u[k][j - 1][i][2] * tmp1)) - dt * ty1 * (r43 * c34 * tmp1) - dt * ty1 * dy3; b[j][i][3][2] = -dt * ty2 * (-C2 * (u[k][j - 1][i][3] * tmp1)); b[j][i][4][2] = -dt * ty2 * C2; b[j][i][0][3] = -dt * ty2 * (-(u[k][j - 1][i][2] * u[k][j - 1][i][3]) * tmp2) - dt * ty1 * (-c34 * tmp2 * u[k][j - 1][i][3]); b[j][i][1][3] = 0.0; b[j][i][2][3] = -dt * ty2 * (u[k][j - 1][i][3] * tmp1); b[j][i][3][3] = -dt * ty2 * (u[k][j - 1][i][2] * tmp1) - dt * ty1 * (c34 * tmp1) - dt * ty1 * dy4; b[j][i][4][3] = 0.0; b[j][i][0][4] = -dt * ty2 * ((C2 * 2.0 * qs[k][j - 1][i] - C1 * u[k][j - 1][i][4]) * (u[k][j - 1][i][2] * tmp2)) - dt * ty1 * (-(c34 - c1345) * tmp3 * (u[k][j - 1][i][1] * u[k][j - 1][i][1]) - (r43 * c34 - c1345) * tmp3 * (u[k][j - 1][i][2] * u[k][j - 1][i][2]) - (c34 - c1345) * tmp3 * (u[k][j - 1][i][3] * u[k][j - 1][i][3]) - c1345 * tmp2 * u[k][j - 1][i][4]); b[j][i][1][4] = -dt * ty2 * (-C2 * (u[k][j - 1][i][1] * u[k][j - 1][i][2]) * tmp2) - dt * ty1 * (c34 - c1345) * tmp2 * u[k][j - 1][i][1]; b[j][i][2][4] = -dt * ty2 * (C1 * (u[k][j - 1][i][4] * tmp1) - C2 * (qs[k][j - 1][i] * tmp1 + u[k][j - 1][i][2] * u[k][j - 1][i][2] * tmp2)) - dt * ty1 * (r43 * c34 - c1345) * tmp2 * u[k][j - 1][i][2]; b[j][i][3][4] = -dt * ty2 * (-C2 * (u[k][j - 1][i][2] * u[k][j - 1][i][3]) * tmp2) - dt * ty1 * (c34 - c1345) * tmp2 * u[k][j - 1][i][3]; b[j][i][4][4] = -dt * ty2 * (C1 * (u[k][j - 1][i][2] * tmp1)) - dt * ty1 * c1345 * tmp1 - dt * ty1 * dy5; /* * --------------------------------------------------------------------- * form the third block sub-diagonal * --------------------------------------------------------------------- */ tmp1 = rho_i[k][j][i - 1]; tmp2 = tmp1 * tmp1; tmp3 = tmp1 * tmp2; c[j][i][0][0] = -dt * tx1 * dx1; c[j][i][1][0] = -dt * tx2; c[j][i][2][0] = 0.0; c[j][i][3][0] = 0.0; c[j][i][4][0] = 0.0; c[j][i][0][1] = -dt * tx2 * (-(u[k][j][i - 1][1] * tmp1) * (u[k][j][i - 1][1] * tmp1) + C2 * qs[k][j][i - 1] * tmp1) - dt * tx1 * (-r43 * c34 * tmp2 * u[k][j][i - 1][1]); c[j][i][1][1] = -dt * tx2 * ((2.0 - C2) * (u[k][j][i - 1][1] * tmp1)) - dt * tx1 * (r43 * c34 * tmp1) - dt * tx1 * dx2; c[j][i][2][1] = -dt * tx2 * (-C2 * (u[k][j][i - 1][2] * tmp1)); c[j][i][3][1] = -dt * tx2 * (-C2 * (u[k][j][i - 1][3] * tmp1)); c[j][i][4][1] = -dt * tx2 * C2; c[j][i][0][2] = -dt * tx2 * (-(u[k][j][i - 1][1] * u[k][j][i - 1][2]) * tmp2) - dt * tx1 * (-c34 * tmp2 * u[k][j][i - 1][2]); c[j][i][1][2] = -dt * tx2 * (u[k][j][i - 1][2] * tmp1); c[j][i][2][2] = -dt * tx2 * (u[k][j][i - 1][1] * tmp1) - dt * tx1 * (c34 * tmp1) - dt * tx1 * dx3; c[j][i][3][2] = 0.0; c[j][i][4][2] = 0.0; c[j][i][0][3] = -dt * tx2 * (-(u[k][j][i - 1][1] * u[k][j][i - 1][3]) * tmp2) - dt * tx1 * (-c34 * tmp2 * u[k][j][i - 1][3]); c[j][i][1][3] = -dt * tx2 * (u[k][j][i - 1][3] * tmp1); c[j][i][2][3] = 0.0; c[j][i][3][3] = -dt * tx2 * (u[k][j][i - 1][1] * tmp1) - dt * tx1 * (c34 * tmp1) - dt * tx1 * dx4; c[j][i][4][3] = 0.0; c[j][i][0][4] = -dt * tx2 * ((C2 * 2.0 * qs[k][j][i - 1] - C1 * u[k][j][i - 1][4]) * u[k][j][i - 1][1] * tmp2) - dt * tx1 * (-(r43 * c34 - c1345) * tmp3 * (u[k][j][i - 1][1] * u[k][j][i - 1][1]) - (c34 - c1345) * tmp3 * (u[k][j][i - 1][2] * u[k][j][i - 1][2]) - (c34 - c1345) * tmp3 * (u[k][j][i - 1][3] * u[k][j][i - 1][3]) - c1345 * tmp2 * u[k][j][i - 1][4]); c[j][i][1][4] = -dt * tx2 * (C1 * (u[k][j][i - 1][4] * tmp1) - C2 * (u[k][j][i - 1][1] * u[k][j][i - 1][1] * tmp2 + qs[k][j][i - 1] * tmp1)) - dt * tx1 * (r43 * c34 - c1345) * tmp2 * u[k][j][i - 1][1]; c[j][i][2][4] = -dt * tx2 * (-C2 * (u[k][j][i - 1][2] * u[k][j][i - 1][1]) * tmp2) - dt * tx1 * (c34 - c1345) * tmp2 * u[k][j][i - 1][2]; c[j][i][3][4] = -dt * tx2 * (-C2 * (u[k][j][i - 1][3] * u[k][j][i - 1][1]) * tmp2) - dt * tx1 * (c34 - c1345) * tmp2 * u[k][j][i - 1][3]; c[j][i][4][4] = -dt * tx2 * (C1 * (u[k][j][i - 1][1] * tmp1)) - dt * tx1 * c1345 * tmp1 - dt * tx1 * dx5; } } } /* * --------------------------------------------------------------------- * compute the upper triangular part of the jacobian matrix * --------------------------------------------------------------------- */ void jacu(int k) { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ int i, j; double r43; double c1345; double c34; double tmp1, tmp2, tmp3; r43 = (4.0 / 3.0); c1345 = C1 * C3 * C4 * C5; c34 = C3 * C4; #pragma omp for nowait schedule(static) for(j = jend - 1; j >= jst; j--) { for(i = iend - 1; i >= ist; i--) { /* * --------------------------------------------------------------------- * form the block daigonal * --------------------------------------------------------------------- */ tmp1 = rho_i[k][j][i]; tmp2 = tmp1 * tmp1; tmp3 = tmp1 * tmp2; d[j][i][0][0] = 1.0 + dt * 2.0 * (tx1 * dx1 + ty1 * dy1 + tz1 * dz1); d[j][i][1][0] = 0.0; d[j][i][2][0] = 0.0; d[j][i][3][0] = 0.0; d[j][i][4][0] = 0.0; d[j][i][0][1] = dt * 2.0 * (-tx1 * r43 - ty1 - tz1) * (c34 * tmp2 * u[k][j][i][1]); d[j][i][1][1] = 1.0 + dt * 2.0 * c34 * tmp1 * (tx1 * r43 + ty1 + tz1) + dt * 2.0 * (tx1 * dx2 + ty1 * dy2 + tz1 * dz2); d[j][i][2][1] = 0.0; d[j][i][3][1] = 0.0; d[j][i][4][1] = 0.0; d[j][i][0][2] = dt * 2.0 * (-tx1 - ty1 * r43 - tz1) * (c34 * tmp2 * u[k][j][i][2]); d[j][i][1][2] = 0.0; d[j][i][2][2] = 1.0 + dt * 2.0 * c34 * tmp1 * (tx1 + ty1 * r43 + tz1) + dt * 2.0 * (tx1 * dx3 + ty1 * dy3 + tz1 * dz3); d[j][i][3][2] = 0.0; d[j][i][4][2] = 0.0; d[j][i][0][3] = dt * 2.0 * (-tx1 - ty1 - tz1 * r43) * (c34 * tmp2 * u[k][j][i][3]); d[j][i][1][3] = 0.0; d[j][i][2][3] = 0.0; d[j][i][3][3] = 1.0 + dt * 2.0 * c34 * tmp1 * (tx1 + ty1 + tz1 * r43) + dt * 2.0 * (tx1 * dx4 + ty1 * dy4 + tz1 * dz4); d[j][i][4][3] = 0.0; d[j][i][0][4] = -dt * 2.0 * (((tx1 * (r43 * c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (c34 - c1345)) * (u[k][j][i][1] * u[k][j][i][1]) + (tx1 * (c34 - c1345) + ty1 * (r43 * c34 - c1345) + tz1 * (c34 - c1345)) * (u[k][j][i][2] * u[k][j][i][2]) + (tx1 * (c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (r43 * c34 - c1345)) * (u[k][j][i][3] * u[k][j][i][3])) * tmp3 + (tx1 + ty1 + tz1) * c1345 * tmp2 * u[k][j][i][4]); d[j][i][1][4] = dt * 2.0 * (tx1 * (r43 * c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (c34 - c1345)) * tmp2 * u[k][j][i][1]; d[j][i][2][4] = dt * 2.0 * (tx1 * (c34 - c1345) + ty1 * (r43 * c34 - c1345) + tz1 * (c34 - c1345)) * tmp2 * u[k][j][i][2]; d[j][i][3][4] = dt * 2.0 * (tx1 * (c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (r43 * c34 - c1345)) * tmp2 * u[k][j][i][3]; d[j][i][4][4] = 1.0 + dt * 2.0 * (tx1 + ty1 + tz1) * c1345 * tmp1 + dt * 2.0 * (tx1 * dx5 + ty1 * dy5 + tz1 * dz5); /* * --------------------------------------------------------------------- * form the first block sub-diagonal * --------------------------------------------------------------------- */ tmp1 = rho_i[k][j][i + 1]; tmp2 = tmp1 * tmp1; tmp3 = tmp1 * tmp2; a[j][i][0][0] = -dt * tx1 * dx1; a[j][i][1][0] = dt * tx2; a[j][i][2][0] = 0.0; a[j][i][3][0] = 0.0; a[j][i][4][0] = 0.0; a[j][i][0][1] = dt * tx2 * (-(u[k][j][i + 1][1] * tmp1) * (u[k][j][i + 1][1] * tmp1) + C2 * qs[k][j][i + 1] * tmp1) - dt * tx1 * (-r43 * c34 * tmp2 * u[k][j][i + 1][1]); a[j][i][1][1] = dt * tx2 * ((2.0 - C2) * (u[k][j][i + 1][1] * tmp1)) - dt * tx1 * (r43 * c34 * tmp1) - dt * tx1 * dx2; a[j][i][2][1] = dt * tx2 * (-C2 * (u[k][j][i + 1][2] * tmp1)); a[j][i][3][1] = dt * tx2 * (-C2 * (u[k][j][i + 1][3] * tmp1)); a[j][i][4][1] = dt * tx2 * C2; a[j][i][0][2] = dt * tx2 * (-(u[k][j][i + 1][1] * u[k][j][i + 1][2]) * tmp2) - dt * tx1 * (-c34 * tmp2 * u[k][j][i + 1][2]); a[j][i][1][2] = dt * tx2 * (u[k][j][i + 1][2] * tmp1); a[j][i][2][2] = dt * tx2 * (u[k][j][i + 1][1] * tmp1) - dt * tx1 * (c34 * tmp1) - dt * tx1 * dx3; a[j][i][3][2] = 0.0; a[j][i][4][2] = 0.0; a[j][i][0][3] = dt * tx2 * (-(u[k][j][i + 1][1] * u[k][j][i + 1][3]) * tmp2) - dt * tx1 * (-c34 * tmp2 * u[k][j][i + 1][3]); a[j][i][1][3] = dt * tx2 * (u[k][j][i + 1][3] * tmp1); a[j][i][2][3] = 0.0; a[j][i][3][3] = dt * tx2 * (u[k][j][i + 1][1] * tmp1) - dt * tx1 * (c34 * tmp1) - dt * tx1 * dx4; a[j][i][4][3] = 0.0; a[j][i][0][4] = dt * tx2 * ((C2 * 2.0 * qs[k][j][i + 1] - C1 * u[k][j][i + 1][4]) * (u[k][j][i + 1][1] * tmp2)) - dt * tx1 * (-(r43 * c34 - c1345) * tmp3 * (u[k][j][i + 1][1] * u[k][j][i + 1][1]) - (c34 - c1345) * tmp3 * (u[k][j][i + 1][2] * u[k][j][i + 1][2]) - (c34 - c1345) * tmp3 * (u[k][j][i + 1][3] * u[k][j][i + 1][3]) - c1345 * tmp2 * u[k][j][i + 1][4]); a[j][i][1][4] = dt * tx2 * (C1 * (u[k][j][i + 1][4] * tmp1) - C2 * (u[k][j][i + 1][1] * u[k][j][i + 1][1] * tmp2 + qs[k][j][i + 1] * tmp1)) - dt * tx1 * (r43 * c34 - c1345) * tmp2 * u[k][j][i + 1][1]; a[j][i][2][4] = dt * tx2 * (-C2 * (u[k][j][i + 1][2] * u[k][j][i + 1][1]) * tmp2) - dt * tx1 * (c34 - c1345) * tmp2 * u[k][j][i + 1][2]; a[j][i][3][4] = dt * tx2 * (-C2 * (u[k][j][i + 1][3] * u[k][j][i + 1][1]) * tmp2) - dt * tx1 * (c34 - c1345) * tmp2 * u[k][j][i + 1][3]; a[j][i][4][4] = dt * tx2 * (C1 * (u[k][j][i + 1][1] * tmp1)) - dt * tx1 * c1345 * tmp1 - dt * tx1 * dx5; /* * --------------------------------------------------------------------- * form the second block sub-diagonal * --------------------------------------------------------------------- */ tmp1 = rho_i[k][j + 1][i]; tmp2 = tmp1 * tmp1; tmp3 = tmp1 * tmp2; b[j][i][0][0] = -dt * ty1 * dy1; b[j][i][1][0] = 0.0; b[j][i][2][0] = dt * ty2; b[j][i][3][0] = 0.0; b[j][i][4][0] = 0.0; b[j][i][0][1] = dt * ty2 * (-(u[k][j + 1][i][1] * u[k][j + 1][i][2]) * tmp2) - dt * ty1 * (-c34 * tmp2 * u[k][j + 1][i][1]); b[j][i][1][1] = dt * ty2 * (u[k][j + 1][i][2] * tmp1) - dt * ty1 * (c34 * tmp1) - dt * ty1 * dy2; b[j][i][2][1] = dt * ty2 * (u[k][j + 1][i][1] * tmp1); b[j][i][3][1] = 0.0; b[j][i][4][1] = 0.0; b[j][i][0][2] = dt * ty2 * (-(u[k][j + 1][i][2] * tmp1) * (u[k][j + 1][i][2] * tmp1) + C2 * (qs[k][j + 1][i] * tmp1)) - dt * ty1 * (-r43 * c34 * tmp2 * u[k][j + 1][i][2]); b[j][i][1][2] = dt * ty2 * (-C2 * (u[k][j + 1][i][1] * tmp1)); b[j][i][2][2] = dt * ty2 * ((2.0 - C2) * (u[k][j + 1][i][2] * tmp1)) - dt * ty1 * (r43 * c34 * tmp1) - dt * ty1 * dy3; b[j][i][3][2] = dt * ty2 * (-C2 * (u[k][j + 1][i][3] * tmp1)); b[j][i][4][2] = dt * ty2 * C2; b[j][i][0][3] = dt * ty2 * (-(u[k][j + 1][i][2] * u[k][j + 1][i][3]) * tmp2) - dt * ty1 * (-c34 * tmp2 * u[k][j + 1][i][3]); b[j][i][1][3] = 0.0; b[j][i][2][3] = dt * ty2 * (u[k][j + 1][i][3] * tmp1); b[j][i][3][3] = dt * ty2 * (u[k][j + 1][i][2] * tmp1) - dt * ty1 * (c34 * tmp1) - dt * ty1 * dy4; b[j][i][4][3] = 0.0; b[j][i][0][4] = dt * ty2 * ((C2 * 2.0 * qs[k][j + 1][i] - C1 * u[k][j + 1][i][4]) * (u[k][j + 1][i][2] * tmp2)) - dt * ty1 * (-(c34 - c1345) * tmp3 * (u[k][j + 1][i][1] * u[k][j + 1][i][1]) - (r43 * c34 - c1345) * tmp3 * (u[k][j + 1][i][2] * u[k][j + 1][i][2]) - (c34 - c1345) * tmp3 * (u[k][j + 1][i][3] * u[k][j + 1][i][3]) - c1345 * tmp2 * u[k][j + 1][i][4]); b[j][i][1][4] = dt * ty2 * (-C2 * (u[k][j + 1][i][1] * u[k][j + 1][i][2]) * tmp2) - dt * ty1 * (c34 - c1345) * tmp2 * u[k][j + 1][i][1]; b[j][i][2][4] = dt * ty2 * (C1 * (u[k][j + 1][i][4] * tmp1) - C2 * (qs[k][j + 1][i] * tmp1 + u[k][j + 1][i][2] * u[k][j + 1][i][2] * tmp2)) - dt * ty1 * (r43 * c34 - c1345) * tmp2 * u[k][j + 1][i][2]; b[j][i][3][4] = dt * ty2 * (-C2 * (u[k][j + 1][i][2] * u[k][j + 1][i][3]) * tmp2) - dt * ty1 * (c34 - c1345) * tmp2 * u[k][j + 1][i][3]; b[j][i][4][4] = dt * ty2 * (C1 * (u[k][j + 1][i][2] * tmp1)) - dt * ty1 * c1345 * tmp1 - dt * ty1 * dy5; /* * --------------------------------------------------------------------- * form the third block sub-diagonal * --------------------------------------------------------------------- */ tmp1 = rho_i[k + 1][j][i]; tmp2 = tmp1 * tmp1; tmp3 = tmp1 * tmp2; c[j][i][0][0] = -dt * tz1 * dz1; c[j][i][1][0] = 0.0; c[j][i][2][0] = 0.0; c[j][i][3][0] = dt * tz2; c[j][i][4][0] = 0.0; c[j][i][0][1] = dt * tz2 * (-(u[k + 1][j][i][1] * u[k + 1][j][i][3]) * tmp2) - dt * tz1 * (-c34 * tmp2 * u[k + 1][j][i][1]); c[j][i][1][1] = dt * tz2 * (u[k + 1][j][i][3] * tmp1) - dt * tz1 * c34 * tmp1 - dt * tz1 * dz2; c[j][i][2][1] = 0.0; c[j][i][3][1] = dt * tz2 * (u[k + 1][j][i][1] * tmp1); c[j][i][4][1] = 0.0; c[j][i][0][2] = dt * tz2 * (-(u[k + 1][j][i][2] * u[k + 1][j][i][3]) * tmp2) - dt * tz1 * (-c34 * tmp2 * u[k + 1][j][i][2]); c[j][i][1][2] = 0.0; c[j][i][2][2] = dt * tz2 * (u[k + 1][j][i][3] * tmp1) - dt * tz1 * (c34 * tmp1) - dt * tz1 * dz3; c[j][i][3][2] = dt * tz2 * (u[k + 1][j][i][2] * tmp1); c[j][i][4][2] = 0.0; c[j][i][0][3] = dt * tz2 * (-(u[k + 1][j][i][3] * tmp1) * (u[k + 1][j][i][3] * tmp1) + C2 * (qs[k + 1][j][i] * tmp1)) - dt * tz1 * (-r43 * c34 * tmp2 * u[k + 1][j][i][3]); c[j][i][1][3] = dt * tz2 * (-C2 * (u[k + 1][j][i][1] * tmp1)); c[j][i][2][3] = dt * tz2 * (-C2 * (u[k + 1][j][i][2] * tmp1)); c[j][i][3][3] = dt * tz2 * (2.0 - C2) * (u[k + 1][j][i][3] * tmp1) - dt * tz1 * (r43 * c34 * tmp1) - dt * tz1 * dz4; c[j][i][4][3] = dt * tz2 * C2; c[j][i][0][4] = dt * tz2 * ((C2 * 2.0 * qs[k + 1][j][i] - C1 * u[k + 1][j][i][4]) * (u[k + 1][j][i][3] * tmp2)) - dt * tz1 * (-(c34 - c1345) * tmp3 * (u[k + 1][j][i][1] * u[k + 1][j][i][1]) - (c34 - c1345) * tmp3 * (u[k + 1][j][i][2] * u[k + 1][j][i][2]) - (r43 * c34 - c1345) * tmp3 * (u[k + 1][j][i][3] * u[k + 1][j][i][3]) - c1345 * tmp2 * u[k + 1][j][i][4]); c[j][i][1][4] = dt * tz2 * (-C2 * (u[k + 1][j][i][1] * u[k + 1][j][i][3]) * tmp2) - dt * tz1 * (c34 - c1345) * tmp2 * u[k + 1][j][i][1]; c[j][i][2][4] = dt * tz2 * (-C2 * (u[k + 1][j][i][2] * u[k + 1][j][i][3]) * tmp2) - dt * tz1 * (c34 - c1345) * tmp2 * u[k + 1][j][i][2]; c[j][i][3][4] = dt * tz2 * (C1 * (u[k + 1][j][i][4] * tmp1) - C2 * (qs[k + 1][j][i] * tmp1 + u[k + 1][j][i][3] * u[k + 1][j][i][3] * tmp2)) - dt * tz1 * (r43 * c34 - c1345) * tmp2 * u[k + 1][j][i][3]; c[j][i][4][4] = dt * tz2 * (C1 * (u[k + 1][j][i][3] * tmp1)) - dt * tz1 * c1345 * tmp1 - dt * tz1 * dz5; } } } /* * --------------------------------------------------------------------- * to compute the l2-norm of vector v. * --------------------------------------------------------------------- * to improve cache performance, second two dimensions padded by 1 * for even number sizes only. Only needed in v. * --------------------------------------------------------------------- */ void l2norm(int nx0, int ny0, int nz0, int ist, int iend, int jst, int jend, double v[][ISIZ2 / 2 * 2 + 1][ISIZ1 / 2 * 2 + 1][5], double sum[]) { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ int i, j, k, m; double sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0; #pragma omp single for(m = 0; m < 5; m++) { sum[m] = 0.0; } #pragma omp for nowait for(k = 1; k < nz0 - 1; k++) { for(j = jst; j < jend; j++) { for(i = ist; i < iend; i++) { sum0 = sum0 + v[i][j][k][0] * v[i][j][k][0]; sum1 = sum1 + v[i][j][k][1] * v[i][j][k][1]; sum2 = sum2 + v[i][j][k][2] * v[i][j][k][2]; sum3 = sum3 + v[i][j][k][3] * v[i][j][k][3]; sum4 = sum4 + v[i][j][k][4] * v[i][j][k][4]; } } } #pragma omp critical { sum[0] += sum0; sum[1] += sum1; sum[2] += sum2; sum[3] += sum3; sum[4] += sum4; } #pragma omp barrier #pragma omp single for(m = 0; m < 5; m++) { sum[m] = sqrt(sum[m] / ((nx0 - 2) * (ny0 - 2) * (nz0 - 2))); } } void pintgr() { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ int i, j, k; int ibeg, ifin, ifin1; int jbeg, jfin, jfin1; double phi1[ISIZ3 + 2][ISIZ2 + 2]; double phi2[ISIZ3 + 2][ISIZ2 + 2]; double frc1, frc2, frc3; /* * --------------------------------------------------------------------- * set up the sub-domains for integeration in each processor * --------------------------------------------------------------------- */ ibeg = ii1; ifin = ii2; jbeg = ji1; jfin = ji2; ifin1 = ifin - 1; jfin1 = jfin - 1; /* * --------------------------------------------------------------------- * initialize * --------------------------------------------------------------------- */ for(i = 0; i <= ISIZ2 + 1; i++) { for(k = 0; k <= ISIZ3 + 1; k++) { phi1[k][i] = 0.0; phi2[k][i] = 0.0; } } for(j = jbeg; j < jfin; j++) { for(i = ibeg; i < ifin; i++) { k = ki1; phi1[j][i] = C2 * (u[k][j][i][4] - 0.50 * (u[k][j][i][1] * u[k][j][i][1] + u[k][j][i][2] * u[k][j][i][2] + u[k][j][i][3] * u[k][j][i][3]) / u[k][j][i][0]); k = ki2 - 1; phi2[j][i] = C2 * (u[k][j][i][4] - 0.50 * (u[k][j][i][1] * u[k][j][i][1] + u[k][j][i][2] * u[k][j][i][2] + u[k][j][i][3] * u[k][j][i][3]) / u[k][j][i][0]); } } frc1 = 0.0; for(j = jbeg; j < jfin1; j++) { for(i = ibeg; i < ifin1; i++) { frc1 = frc1 + (phi1[j][i] + phi1[j][i + 1] + phi1[j + 1][i] + phi1[j + 1][i + 1] + phi2[j][i] + phi2[j][i + 1] + phi2[j + 1][i] + phi2[j + 1][i + 1]); } } frc1 = dxi * deta * frc1; /* * --------------------------------------------------------------------- * initialize * --------------------------------------------------------------------- */ for(i = 0; i <= ISIZ2 + 1; i++) { for(k = 0; k <= ISIZ3 + 1; k++) { phi1[k][i] = 0.0; phi2[k][i] = 0.0; } } if(jbeg == ji1) { for(k = ki1; k < ki2; k++) { for(i = ibeg; i < ifin; i++) { phi1[k][i] = C2 * (u[k][jbeg][i][4] - 0.50 * (u[k][jbeg][i][1] * u[k][jbeg][i][1] + u[k][jbeg][i][2] * u[k][jbeg][i][2] + u[k][jbeg][i][3] * u[k][jbeg][i][3]) / u[k][jbeg][i][0]); } } } if(jfin == ji2) { for(k = ki1; k < ki2; k++) { for(i = ibeg; i < ifin; i++) { phi2[k][i] = C2 * (u[k][jfin - 1][i][4] - 0.50 * (u[k][jfin - 1][i][1] * u[k][jfin - 1][i][1] + u[k][jfin - 1][i][2] * u[k][jfin - 1][i][2] + u[k][jfin - 1][i][3] * u[k][jfin - 1][i][3]) / u[k][jfin - 1][i][0]); } } } frc2 = 0.0; for(k = ki1; k < ki2 - 1; k++) { for(i = ibeg; i < ifin1; i++) { frc2 = frc2 + (phi1[k][i] + phi1[k][i + 1] + phi1[k + 1][i] + phi1[k + 1][i + 1] + phi2[k][i] + phi2[k][i + 1] + phi2[k + 1][i] + phi2[k + 1][i + 1]); } } frc2 = dxi * dzeta * frc2; /* * --------------------------------------------------------------------- * initialize * --------------------------------------------------------------------- */ for(i = 0; i <= ISIZ2 + 1; i++) { for(k = 0; k <= ISIZ3 + 1; k++) { phi1[k][i] = 0.0; phi2[k][i] = 0.0; } } if(ibeg == ii1) { for(k = ki1; k < ki2; k++) { for(j = jbeg; j < jfin; j++) { phi1[k][j] = C2 * (u[k][j][ibeg][4] - 0.50 * (u[k][j][ibeg][1] * u[k][j][ibeg][1] + u[k][j][ibeg][2] * u[k][j][ibeg][2] + u[k][j][ibeg][3] * u[k][j][ibeg][3]) / u[k][j][ibeg][0]); } } } if(ifin == ii2) { for(k = ki1; k < ki2; k++) { for(j = jbeg; j < jfin; j++) { phi2[k][j] = C2 * (u[k][j][ifin - 1][4] - 0.50 * (u[k][j][ifin - 1][1] * u[k][j][ifin - 1][1] + u[k][j][ifin - 1][2] * u[k][j][ifin - 1][2] + u[k][j][ifin - 1][3] * u[k][j][ifin - 1][3]) / u[k][j][ifin - 1][0]); } } } frc3 = 0.0; for(k = ki1; k < ki2 - 1; k++) { for(j = jbeg; j < jfin1; j++) { frc3 = frc3 + (phi1[k][j] + phi1[k][j + 1] + phi1[k + 1][j] + phi1[k + 1][j + 1] + phi2[k][j] + phi2[k][j + 1] + phi2[k + 1][j] + phi2[k + 1][j + 1]); } } frc3 = deta * dzeta * frc3; frc = 0.25 * (frc1 + frc2 + frc3); } void read_input() { /* * --------------------------------------------------------------------- * if input file does not exist, it uses defaults * ipr = 1 for detailed progress output * inorm = how often the norm is printed (once every inorm iterations) * itmax = number of pseudo time steps * dt = time step * omega 1 over-relaxation factor for SSOR * tolrsd = steady state residual tolerance levels * nx, ny, nz = number of grid points in x, y, z directions * --------------------------------------------------------------------- */ FILE* fp; int avoid_warning = 0; if((fp = fopen("inputlu.data", "r")) != nullptr) { printf("Reading from input file inputlu.data\n"); while(fgetc(fp) != '\n') ; while(fgetc(fp) != '\n') ; avoid_warning = fscanf(fp, "%d%d", &ipr, &inorm); while(fgetc(fp) != '\n') ; while(fgetc(fp) != '\n') ; while(fgetc(fp) != '\n') ; avoid_warning = fscanf(fp, "%d", &itmax); while(fgetc(fp) != '\n') ; while(fgetc(fp) != '\n') ; while(fgetc(fp) != '\n') ; avoid_warning = fscanf(fp, "%lf", &dt); while(fgetc(fp) != '\n') ; while(fgetc(fp) != '\n') ; while(fgetc(fp) != '\n') ; avoid_warning = fscanf(fp, "%lf", &omega); while(fgetc(fp) != '\n') ; while(fgetc(fp) != '\n') ; while(fgetc(fp) != '\n') ; avoid_warning = fscanf(fp, "%lf%lf%lf%lf%lf", &tolrsd[0], &tolrsd[1], &tolrsd[2], &tolrsd[3], &tolrsd[4]); while(fgetc(fp) != '\n') ; while(fgetc(fp) != '\n') ; avoid_warning = fscanf(fp, "%d%d%d", &nx0, &ny0, &nz0); fclose(fp); } else { ipr = IPR_DEFAULT; inorm = INORM_DEFAULT; itmax = ITMAX_DEFAULT; dt = DT_DEFAULT; omega = OMEGA_DEFAULT; tolrsd[0] = TOLRSD1_DEF; tolrsd[1] = TOLRSD2_DEF; tolrsd[2] = TOLRSD3_DEF; tolrsd[3] = TOLRSD4_DEF; tolrsd[4] = TOLRSD5_DEF; nx0 = ISIZ1; ny0 = ISIZ2; nz0 = ISIZ3; } (void) avoid_warning; /* * --------------------------------------------------------------------- * check problem size * --------------------------------------------------------------------- */ if((nx0 < 4) || (ny0 < 4) || (nz0 < 4)) { printf(" PROBLEM SIZE IS TOO SMALL - \n" " SET EACH OF NX, NY AND NZ AT LEAST EQUAL TO 5\n"); exit(EXIT_FAILURE); } if((nx0 > ISIZ1) || (ny0 > ISIZ2) || (nz0 > ISIZ3)) { printf(" PROBLEM SIZE IS TOO LARGE - \n" " NX, NY AND NZ SHOULD BE EQUAL TO \n" " ISIZ1, ISIZ2 AND ISIZ3 RESPECTIVELY\n"); exit(EXIT_FAILURE); } printf("\n\n NAS Parallel Benchmarks 4.1 Parallel C++ version with OpenMP - LU " "Benchmark\n\n"); printf(" Size: %4dx%4dx%4d\n", nx0, ny0, nz0); printf(" Iterations: %4d\n", itmax); printf("\n"); } /* * --------------------------------------------------------------------- * compute the right hand sides * --------------------------------------------------------------------- */ void rhs() { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ int i, j, k, m; double q; double tmp, utmp[ISIZ3][6], rtmp[ISIZ3][5]; double u21, u31, u41; double u21i, u31i, u41i, u51i; double u21j, u31j, u41j, u51j; double u21k, u31k, u41k, u51k; double u21im1, u31im1, u41im1, u51im1; double u21jm1, u31jm1, u41jm1, u51jm1; double u21km1, u31km1, u41km1, u51km1; double flux[ISIZ1][5]; if(timeron) { timer_start(T_RHS); } #pragma omp for for(k = 0; k < nz; k++) { for(j = 0; j < ny; j++) { for(i = 0; i < nx; i++) { for(m = 0; m < 5; m++) { rsd[k][j][i][m] = -frct[k][j][i][m]; } tmp = 1.0 / u[k][j][i][0]; rho_i[k][j][i] = tmp; qs[k][j][i] = 0.50 * (u[k][j][i][1] * u[k][j][i][1] + u[k][j][i][2] * u[k][j][i][2] + u[k][j][i][3] * u[k][j][i][3]) * tmp; } } } if(timeron) { timer_start(T_RHSX); } /* * --------------------------------------------------------------------- * xi-direction flux differences * --------------------------------------------------------------------- */ #pragma omp for for(k = 1; k < nz - 1; k++) { for(j = jst; j < jend; j++) { for(i = 0; i < nx; i++) { flux[i][0] = u[k][j][i][1]; u21 = u[k][j][i][1] * rho_i[k][j][i]; q = qs[k][j][i]; flux[i][1] = u[k][j][i][1] * u21 + C2 * (u[k][j][i][4] - q); flux[i][2] = u[k][j][i][2] * u21; flux[i][3] = u[k][j][i][3] * u21; flux[i][4] = (C1 * u[k][j][i][4] - C2 * q) * u21; } for(i = ist; i < iend; i++) { for(m = 0; m < 5; m++) { rsd[k][j][i][m] = rsd[k][j][i][m] - tx2 * (flux[i + 1][m] - flux[i - 1][m]); } } for(i = ist; i < nx; i++) { tmp = rho_i[k][j][i]; u21i = tmp * u[k][j][i][1]; u31i = tmp * u[k][j][i][2]; u41i = tmp * u[k][j][i][3]; u51i = tmp * u[k][j][i][4]; tmp = rho_i[k][j][i - 1]; u21im1 = tmp * u[k][j][i - 1][1]; u31im1 = tmp * u[k][j][i - 1][2]; u41im1 = tmp * u[k][j][i - 1][3]; u51im1 = tmp * u[k][j][i - 1][4]; flux[i][1] = (4.0 / 3.0) * tx3 * (u21i - u21im1); flux[i][2] = tx3 * (u31i - u31im1); flux[i][3] = tx3 * (u41i - u41im1); flux[i][4] = 0.50 * (1.0 - C1 * C5) * tx3 * ((u21i * u21i + u31i * u31i + u41i * u41i) - (u21im1 * u21im1 + u31im1 * u31im1 + u41im1 * u41im1)) + (1.0 / 6.0) * tx3 * (u21i * u21i - u21im1 * u21im1) + C1 * C5 * tx3 * (u51i - u51im1); } for(i = ist; i < iend; i++) { rsd[k][j][i][0] = rsd[k][j][i][0] + dx1 * tx1 * (u[k][j][i - 1][0] - 2.0 * u[k][j][i][0] + u[k][j][i + 1][0]); rsd[k][j][i][1] = rsd[k][j][i][1] + tx3 * C3 * C4 * (flux[i + 1][1] - flux[i][1]) + dx2 * tx1 * (u[k][j][i - 1][1] - 2.0 * u[k][j][i][1] + u[k][j][i + 1][1]); rsd[k][j][i][2] = rsd[k][j][i][2] + tx3 * C3 * C4 * (flux[i + 1][2] - flux[i][2]) + dx3 * tx1 * (u[k][j][i - 1][2] - 2.0 * u[k][j][i][2] + u[k][j][i + 1][2]); rsd[k][j][i][3] = rsd[k][j][i][3] + tx3 * C3 * C4 * (flux[i + 1][3] - flux[i][3]) + dx4 * tx1 * (u[k][j][i - 1][3] - 2.0 * u[k][j][i][3] + u[k][j][i + 1][3]); rsd[k][j][i][4] = rsd[k][j][i][4] + tx3 * C3 * C4 * (flux[i + 1][4] - flux[i][4]) + dx5 * tx1 * (u[k][j][i - 1][4] - 2.0 * u[k][j][i][4] + u[k][j][i + 1][4]); } /* * --------------------------------------------------------------------- * fourth-order dissipation * --------------------------------------------------------------------- */ for(m = 0; m < 5; m++) { rsd[k][j][1][m] = rsd[k][j][1][m] - dssp * (+5.0 * u[k][j][1][m] - 4.0 * u[k][j][2][m] + u[k][j][3][m]); rsd[k][j][2][m] = rsd[k][j][2][m] - dssp * (-4.0 * u[k][j][1][m] + 6.0 * u[k][j][2][m] - 4.0 * u[k][j][3][m] + u[k][j][4][m]); } for(i = 3; i < nx - 3; i++) { for(m = 0; m < 5; m++) { rsd[k][j][i][m] = rsd[k][j][i][m] - dssp * (u[k][j][i - 2][m] - 4.0 * u[k][j][i - 1][m] + 6.0 * u[k][j][i][m] - 4.0 * u[k][j][i + 1][m] + u[k][j][i + 2][m]); } } for(m = 0; m < 5; m++) { rsd[k][j][nx - 3][m] = rsd[k][j][nx - 3][m] - dssp * (u[k][j][nx - 5][m] - 4.0 * u[k][j][nx - 4][m] + 6.0 * u[k][j][nx - 3][m] - 4.0 * u[k][j][nx - 2][m]); rsd[k][j][nx - 2][m] = rsd[k][j][nx - 2][m] - dssp * (u[k][j][nx - 4][m] - 4.0 * u[k][j][nx - 3][m] + 5.0 * u[k][j][nx - 2][m]); } } } if(timeron) { timer_stop(T_RHSX); } if(timeron) { timer_start(T_RHSY); } /* * --------------------------------------------------------------------- * eta-direction flux differences * --------------------------------------------------------------------- */ #pragma omp for for(k = 1; k < nz - 1; k++) { for(i = ist; i < iend; i++) { for(j = 0; j < ny; j++) { flux[j][0] = u[k][j][i][2]; u31 = u[k][j][i][2] * rho_i[k][j][i]; q = qs[k][j][i]; flux[j][1] = u[k][j][i][1] * u31; flux[j][2] = u[k][j][i][2] * u31 + C2 * (u[k][j][i][4] - q); flux[j][3] = u[k][j][i][3] * u31; flux[j][4] = (C1 * u[k][j][i][4] - C2 * q) * u31; } for(j = jst; j < jend; j++) { for(m = 0; m < 5; m++) { rsd[k][j][i][m] = rsd[k][j][i][m] - ty2 * (flux[j + 1][m] - flux[j - 1][m]); } } for(j = jst; j < ny; j++) { tmp = rho_i[k][j][i]; u21j = tmp * u[k][j][i][1]; u31j = tmp * u[k][j][i][2]; u41j = tmp * u[k][j][i][3]; u51j = tmp * u[k][j][i][4]; tmp = rho_i[k][j - 1][i]; u21jm1 = tmp * u[k][j - 1][i][1]; u31jm1 = tmp * u[k][j - 1][i][2]; u41jm1 = tmp * u[k][j - 1][i][3]; u51jm1 = tmp * u[k][j - 1][i][4]; flux[j][1] = ty3 * (u21j - u21jm1); flux[j][2] = (4.0 / 3.0) * ty3 * (u31j - u31jm1); flux[j][3] = ty3 * (u41j - u41jm1); flux[j][4] = 0.50 * (1.0 - C1 * C5) * ty3 * ((u21j * u21j + u31j * u31j + u41j * u41j) - (u21jm1 * u21jm1 + u31jm1 * u31jm1 + u41jm1 * u41jm1)) + (1.0 / 6.0) * ty3 * (u31j * u31j - u31jm1 * u31jm1) + C1 * C5 * ty3 * (u51j - u51jm1); } for(j = jst; j < jend; j++) { rsd[k][j][i][0] = rsd[k][j][i][0] + dy1 * ty1 * (u[k][j - 1][i][0] - 2.0 * u[k][j][i][0] + u[k][j + 1][i][0]); rsd[k][j][i][1] = rsd[k][j][i][1] + ty3 * C3 * C4 * (flux[j + 1][1] - flux[j][1]) + dy2 * ty1 * (u[k][j - 1][i][1] - 2.0 * u[k][j][i][1] + u[k][j + 1][i][1]); rsd[k][j][i][2] = rsd[k][j][i][2] + ty3 * C3 * C4 * (flux[j + 1][2] - flux[j][2]) + dy3 * ty1 * (u[k][j - 1][i][2] - 2.0 * u[k][j][i][2] + u[k][j + 1][i][2]); rsd[k][j][i][3] = rsd[k][j][i][3] + ty3 * C3 * C4 * (flux[j + 1][3] - flux[j][3]) + dy4 * ty1 * (u[k][j - 1][i][3] - 2.0 * u[k][j][i][3] + u[k][j + 1][i][3]); rsd[k][j][i][4] = rsd[k][j][i][4] + ty3 * C3 * C4 * (flux[j + 1][4] - flux[j][4]) + dy5 * ty1 * (u[k][j - 1][i][4] - 2.0 * u[k][j][i][4] + u[k][j + 1][i][4]); } } /* * --------------------------------------------------------------------- * fourth-order dissipation * --------------------------------------------------------------------- */ for(i = ist; i < iend; i++) { for(m = 0; m < 5; m++) { rsd[k][1][i][m] = rsd[k][1][i][m] - dssp * (+5.0 * u[k][1][i][m] - 4.0 * u[k][2][i][m] + u[k][3][i][m]); rsd[k][2][i][m] = rsd[k][2][i][m] - dssp * (-4.0 * u[k][1][i][m] + 6.0 * u[k][2][i][m] - 4.0 * u[k][3][i][m] + u[k][4][i][m]); } } for(j = 3; j < ny - 3; j++) { for(i = ist; i < iend; i++) { for(m = 0; m < 5; m++) { rsd[k][j][i][m] = rsd[k][j][i][m] - dssp * (u[k][j - 2][i][m] - 4.0 * u[k][j - 1][i][m] + 6.0 * u[k][j][i][m] - 4.0 * u[k][j + 1][i][m] + u[k][j + 2][i][m]); } } } for(i = ist; i < iend; i++) { for(m = 0; m < 5; m++) { rsd[k][ny - 3][i][m] = rsd[k][ny - 3][i][m] - dssp * (u[k][ny - 5][i][m] - 4.0 * u[k][ny - 4][i][m] + 6.0 * u[k][ny - 3][i][m] - 4.0 * u[k][ny - 2][i][m]); rsd[k][ny - 2][i][m] = rsd[k][ny - 2][i][m] - dssp * (u[k][ny - 4][i][m] - 4.0 * u[k][ny - 3][i][m] + 5.0 * u[k][ny - 2][i][m]); } } } if(timeron) { timer_stop(T_RHSY); } if(timeron) { timer_start(T_RHSZ); } /* * --------------------------------------------------------------------- * zeta-direction flux differences * --------------------------------------------------------------------- */ #pragma omp for for(j = jst; j < jend; j++) { for(i = ist; i < iend; i++) { for(k = 0; k < nz; k++) { utmp[k][0] = u[k][j][i][0]; utmp[k][1] = u[k][j][i][1]; utmp[k][2] = u[k][j][i][2]; utmp[k][3] = u[k][j][i][3]; utmp[k][4] = u[k][j][i][4]; utmp[k][5] = rho_i[k][j][i]; } for(k = 0; k < nz; k++) { flux[k][0] = utmp[k][3]; u41 = utmp[k][3] * utmp[k][5]; q = qs[k][j][i]; flux[k][1] = utmp[k][1] * u41; flux[k][2] = utmp[k][2] * u41; flux[k][3] = utmp[k][3] * u41 + C2 * (utmp[k][4] - q); flux[k][4] = (C1 * utmp[k][4] - C2 * q) * u41; } for(k = 1; k < nz - 1; k++) { for(m = 0; m < 5; m++) { rtmp[k][m] = rsd[k][j][i][m] - tz2 * (flux[k + 1][m] - flux[k - 1][m]); } } for(k = 1; k < nz; k++) { tmp = utmp[k][5]; u21k = tmp * utmp[k][1]; u31k = tmp * utmp[k][2]; u41k = tmp * utmp[k][3]; u51k = tmp * utmp[k][4]; tmp = utmp[k - 1][5]; u21km1 = tmp * utmp[k - 1][1]; u31km1 = tmp * utmp[k - 1][2]; u41km1 = tmp * utmp[k - 1][3]; u51km1 = tmp * utmp[k - 1][4]; flux[k][1] = tz3 * (u21k - u21km1); flux[k][2] = tz3 * (u31k - u31km1); flux[k][3] = (4.0 / 3.0) * tz3 * (u41k - u41km1); flux[k][4] = 0.50 * (1.0 - C1 * C5) * tz3 * ((u21k * u21k + u31k * u31k + u41k * u41k) - (u21km1 * u21km1 + u31km1 * u31km1 + u41km1 * u41km1)) + (1.0 / 6.0) * tz3 * (u41k * u41k - u41km1 * u41km1) + C1 * C5 * tz3 * (u51k - u51km1); } for(k = 1; k < nz - 1; k++) { rtmp[k][0] = rtmp[k][0] + dz1 * tz1 * (utmp[k - 1][0] - 2.0 * utmp[k][0] + utmp[k + 1][0]); rtmp[k][1] = rtmp[k][1] + tz3 * C3 * C4 * (flux[k + 1][1] - flux[k][1]) + dz2 * tz1 * (utmp[k - 1][1] - 2.0 * utmp[k][1] + utmp[k + 1][1]); rtmp[k][2] = rtmp[k][2] + tz3 * C3 * C4 * (flux[k + 1][2] - flux[k][2]) + dz3 * tz1 * (utmp[k - 1][2] - 2.0 * utmp[k][2] + utmp[k + 1][2]); rtmp[k][3] = rtmp[k][3] + tz3 * C3 * C4 * (flux[k + 1][3] - flux[k][3]) + dz4 * tz1 * (utmp[k - 1][3] - 2.0 * utmp[k][3] + utmp[k + 1][3]); rtmp[k][4] = rtmp[k][4] + tz3 * C3 * C4 * (flux[k + 1][4] - flux[k][4]) + dz5 * tz1 * (utmp[k - 1][4] - 2.0 * utmp[k][4] + utmp[k + 1][4]); } /* * --------------------------------------------------------------------- * fourth-order dissipation * --------------------------------------------------------------------- */ for(m = 0; m < 5; m++) { rsd[1][j][i][m] = rtmp[1][m] - dssp * (+5.0 * utmp[1][m] - 4.0 * utmp[2][m] + utmp[3][m]); rsd[2][j][i][m] = rtmp[2][m] - dssp * (-4.0 * utmp[1][m] + 6.0 * utmp[2][m] - 4.0 * utmp[3][m] + utmp[4][m]); } for(k = 3; k < nz - 3; k++) { for(m = 0; m < 5; m++) { rsd[k][j][i][m] = rtmp[k][m] - dssp * (utmp[k - 2][m] - 4.0 * utmp[k - 1][m] + 6.0 * utmp[k][m] - 4.0 * utmp[k + 1][m] + utmp[k + 2][m]); } } for(m = 0; m < 5; m++) { rsd[nz - 3][j][i][m] = rtmp[nz - 3][m] - dssp * (utmp[nz - 5][m] - 4.0 * utmp[nz - 4][m] + 6.0 * utmp[nz - 3][m] - 4.0 * utmp[nz - 2][m]); rsd[nz - 2][j][i][m] = rtmp[nz - 2][m] - dssp * (utmp[nz - 4][m] - 4.0 * utmp[nz - 3][m] + 5.0 * utmp[nz - 2][m]); } } } if(timeron) { timer_stop(T_RHSZ); } if(timeron) { timer_stop(T_RHS); } } /* * --------------------------------------------------------------------- * set the boundary values of dependent variables * --------------------------------------------------------------------- */ void setbv() { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ int i, j, k, m; double temp1[5], temp2[5]; /* * --------------------------------------------------------------------- * set the dependent variable values along the top and bottom faces * --------------------------------------------------------------------- */ #pragma omp for for(j = 0; j < ny; j++) { for(i = 0; i < nx; i++) { exact(i, j, 0, temp1); exact(i, j, nz - 1, temp2); for(m = 0; m < 5; m++) { u[0][j][i][m] = temp1[m]; u[nz - 1][j][i][m] = temp2[m]; } } } /* * --------------------------------------------------------------------- * set the dependent variable values along north and south faces * --------------------------------------------------------------------- */ #pragma omp for for(k = 0; k < nz; k++) { for(i = 0; i < nx; i++) { exact(i, 0, k, temp1); exact(i, ny - 1, k, temp2); for(m = 0; m < 5; m++) { u[k][0][i][m] = temp1[m]; u[k][ny - 1][i][m] = temp2[m]; } } } /* * --------------------------------------------------------------------- * set the dependent variable values along east and west faces * --------------------------------------------------------------------- */ #pragma omp for for(k = 0; k < nz; k++) { for(j = 0; j < ny; j++) { exact(0, j, k, temp1); exact(nx - 1, j, k, temp2); for(m = 0; m < 5; m++) { u[k][j][0][m] = temp1[m]; u[k][j][nx - 1][m] = temp2[m]; } } } } void setcoeff() { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- * set up coefficients * --------------------------------------------------------------------- */ dxi = 1.0 / (nx0 - 1); deta = 1.0 / (ny0 - 1); dzeta = 1.0 / (nz0 - 1); tx1 = 1.0 / (dxi * dxi); tx2 = 1.0 / (2.0 * dxi); tx3 = 1.0 / dxi; ty1 = 1.0 / (deta * deta); ty2 = 1.0 / (2.0 * deta); ty3 = 1.0 / deta; tz1 = 1.0 / (dzeta * dzeta); tz2 = 1.0 / (2.0 * dzeta); tz3 = 1.0 / dzeta; /* * --------------------------------------------------------------------- * diffusion coefficients * --------------------------------------------------------------------- */ dx1 = 0.75; dx2 = dx1; dx3 = dx1; dx4 = dx1; dx5 = dx1; dy1 = 0.75; dy2 = dy1; dy3 = dy1; dy4 = dy1; dy5 = dy1; dz1 = 1.00; dz2 = dz1; dz3 = dz1; dz4 = dz1; dz5 = dz1; /* * --------------------------------------------------------------------- * fourth difference dissipation * --------------------------------------------------------------------- */ dssp = (max(max(dx1, dy1), dz1)) / 4.0; /* * --------------------------------------------------------------------- * coefficients of the exact solution to the first pde * --------------------------------------------------------------------- */ ce[0][0] = 2.0; ce[1][0] = 0.0; ce[2][0] = 0.0; ce[3][0] = 4.0; ce[4][0] = 5.0; ce[5][0] = 3.0; ce[6][0] = 5.0e-01; ce[7][0] = 2.0e-02; ce[8][0] = 1.0e-02; ce[9][0] = 3.0e-02; ce[10][0] = 5.0e-01; ce[11][0] = 4.0e-01; ce[12][0] = 3.0e-01; /* * --------------------------------------------------------------------- * coefficients of the exact solution to the second pde * --------------------------------------------------------------------- */ ce[0][1] = 1.0; ce[1][1] = 0.0; ce[2][1] = 0.0; ce[3][1] = 0.0; ce[4][1] = 1.0; ce[5][1] = 2.0; ce[6][1] = 3.0; ce[7][1] = 1.0e-02; ce[8][1] = 3.0e-02; ce[9][1] = 2.0e-02; ce[10][1] = 4.0e-01; ce[11][1] = 3.0e-01; ce[12][1] = 5.0e-01; /* * --------------------------------------------------------------------- * coefficients of the exact solution to the third pde * --------------------------------------------------------------------- */ ce[0][2] = 2.0; ce[1][2] = 2.0; ce[2][2] = 0.0; ce[3][2] = 0.0; ce[4][2] = 0.0; ce[5][2] = 2.0; ce[6][2] = 3.0; ce[7][2] = 4.0e-02; ce[8][2] = 3.0e-02; ce[9][2] = 5.0e-02; ce[10][2] = 3.0e-01; ce[11][2] = 5.0e-01; ce[12][2] = 4.0e-01; /* * --------------------------------------------------------------------- * coefficients of the exact solution to the fourth pde * --------------------------------------------------------------------- */ ce[0][3] = 2.0; ce[1][3] = 2.0; ce[2][3] = 0.0; ce[3][3] = 0.0; ce[4][3] = 0.0; ce[5][3] = 2.0; ce[6][3] = 3.0; ce[7][3] = 3.0e-02; ce[8][3] = 5.0e-02; ce[9][3] = 4.0e-02; ce[10][3] = 2.0e-01; ce[11][3] = 1.0e-01; ce[12][3] = 3.0e-01; /* * --------------------------------------------------------------------- * coefficients of the exact solution to the fifth pde * --------------------------------------------------------------------- */ ce[0][4] = 5.0; ce[1][4] = 4.0; ce[2][4] = 3.0; ce[3][4] = 2.0; ce[4][4] = 1.0e-01; ce[5][4] = 4.0e-01; ce[6][4] = 3.0e-01; ce[7][4] = 5.0e-02; ce[8][4] = 4.0e-02; ce[9][4] = 3.0e-02; ce[10][4] = 1.0e-01; ce[11][4] = 3.0e-01; ce[12][4] = 2.0e-01; } /* * --------------------------------------------------------------------- * set the initial values of independent variables based on tri-linear * interpolation of boundary values in the computational space. * --------------------------------------------------------------------- */ void setiv() { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ int i, j, k, m; double xi, eta, zeta; double pxi, peta, pzeta; double ue_1jk[5], ue_nx0jk[5], ue_i1k[5]; double ue_iny0k[5], ue_ij1[5], ue_ijnz[5]; #pragma omp for for(k = 1; k < nz - 1; k++) { zeta = ((double) k) / (nz - 1); for(j = 1; j < ny - 1; j++) { eta = ((double) j) / (ny0 - 1); for(i = 1; i < nx - 1; i++) { xi = ((double) i) / (nx0 - 1); exact(0, j, k, ue_1jk); exact(nx0 - 1, j, k, ue_nx0jk); exact(i, 0, k, ue_i1k); exact(i, ny0 - 1, k, ue_iny0k); exact(i, j, 0, ue_ij1); exact(i, j, nz - 1, ue_ijnz); for(m = 0; m < 5; m++) { pxi = (1.0 - xi) * ue_1jk[m] + xi * ue_nx0jk[m]; peta = (1.0 - eta) * ue_i1k[m] + eta * ue_iny0k[m]; pzeta = (1.0 - zeta) * ue_ij1[m] + zeta * ue_ijnz[m]; u[k][j][i][m] = pxi + peta + pzeta - pxi * peta - peta * pzeta - pzeta * pxi + pxi * peta * pzeta; } } } } } /* * --------------------------------------------------------------------- * to perform pseudo-time stepping SSOR iterations * for five nonlinear pde's. * --------------------------------------------------------------------- */ void ssor(int niter) { /* * --------------------------------------------------------------------- * local variables * --------------------------------------------------------------------- */ int i, j, k, m, n; int istep; double tmp, tv[ISIZ2 * (ISIZ1 / 2 * 2 + 1) * 5]; double delunm[5]; /* * --------------------------------------------------------------------- * begin pseudo-time stepping iterations * --------------------------------------------------------------------- */ tmp = 1.0 / (omega * (2.0 - omega)); /* * --------------------------------------------------------------------- * initialize a,b,c,d to zero (guarantees that page tables have been * formed, if applicable on given architecture, before timestepping). * --------------------------------------------------------------------- */ #pragma omp parallel for private(i, j, n, m) for(j = 0; j < ISIZ2; j++) { for(i = 0; i < ISIZ1; i++) { for(n = 0; n < 5; n++) { for(m = 0; m < 5; m++) { a[j][i][n][m] = 0.0; b[j][i][n][m] = 0.0; c[j][i][n][m] = 0.0; d[j][i][n][m] = 0.0; } } } } for(i = 1; i <= T_LAST; i++) { timer_clear(i); } #pragma omp parallel { /* * --------------------------------------------------------------------- * compute the steady-state residuals * --------------------------------------------------------------------- */ rhs(); /* * --------------------------------------------------------------------- * compute the L2 norms of newton iteration residuals * --------------------------------------------------------------------- */ l2norm(nx0, ny0, nz0, ist, iend, jst, jend, rsd, rsdnm); } /* end parallel */ for(i = 1; i <= T_LAST; i++) { timer_clear(i); } timer_start(1); #pragma omp parallel private(istep, i, j, k, m) { /* * --------------------------------------------------------------------- * the timestep loop * --------------------------------------------------------------------- */ for(istep = 1; istep <= niter; istep++) { if((istep % 20) == 0 || istep == itmax || istep == 1) { #pragma omp master if(niter > 1) { printf(" Time step %4d\n", istep); } } /* * --------------------------------------------------------------------- * perform SSOR iteration * --------------------------------------------------------------------- */ if(timeron) { #pragma omp master timer_start(T_RHS); } #pragma omp for for(k = 1; k < nz - 1; k++) { for(j = jst; j < jend; j++) { for(i = ist; i < iend; i++) { for(m = 0; m < 5; m++) { rsd[k][j][i][m] = dt * rsd[k][j][i][m]; } } } } if(timeron) { #pragma omp master timer_stop(T_RHS); } for(k = 1; k < nz - 1; k++) { /* * --------------------------------------------------------------------- * form the lower triangular part of the jacobian matrix * --------------------------------------------------------------------- */ if(timeron) { #pragma omp master timer_start(T_JACLD); } jacld(k); if(timeron) { #pragma omp master timer_stop(T_JACLD); } /* * --------------------------------------------------------------------- * perform the lower triangular solution * --------------------------------------------------------------------- */ if(timeron) { #pragma omp master timer_start(T_BLTS); } blts(nx, ny, nz, k, omega, rsd, a, b, c, d, ist, iend, jst, jend, nx0, ny0); if(timeron) { #pragma omp master timer_stop(T_BLTS); } } #pragma omp barrier for(k = nz - 2; k > 0; k--) { /* * --------------------------------------------------------------------- * form the strictly upper triangular part of the jacobian matrix * --------------------------------------------------------------------- */ if(timeron) { #pragma omp master timer_start(T_JACU); } jacu(k); if(timeron) { #pragma omp master timer_stop(T_JACU); } /* * --------------------------------------------------------------------- * perform the upper triangular solution * --------------------------------------------------------------------- */ if(timeron) { #pragma omp master timer_start(T_BUTS); } buts(nx, ny, nz, k, omega, rsd, tv, d, a, b, c, ist, iend, jst, jend, nx0, ny0); if(timeron) { #pragma omp master timer_stop(T_BUTS); } } #pragma omp barrier /* * --------------------------------------------------------------------- * update the variables * --------------------------------------------------------------------- */ if(timeron) { #pragma omp master timer_start(T_ADD); } #pragma omp for for(k = 1; k < nz - 1; k++) { for(j = jst; j < jend; j++) { for(i = ist; i < iend; i++) { for(m = 0; m < 5; m++) { u[k][j][i][m] = u[k][j][i][m] + tmp * rsd[k][j][i][m]; } } } } if(timeron) { #pragma omp master timer_stop(T_ADD); } /* * --------------------------------------------------------------------- * compute the max-norms of newton iteration corrections * --------------------------------------------------------------------- */ if((istep % inorm) == 0) { if(timeron) { #pragma omp master timer_start(T_L2NORM); } l2norm(nx0, ny0, nz0, ist, iend, jst, jend, rsd, delunm); if(timeron) { #pragma omp master timer_stop(T_L2NORM); } } /* * --------------------------------------------------------------------- * compute the steady-state residuals * --------------------------------------------------------------------- */ rhs(); /* * --------------------------------------------------------------------- * compute the max-norms of newton iteration residuals * --------------------------------------------------------------------- */ if(((istep % inorm) == 0) || (istep == itmax)) { if(timeron) { #pragma omp master timer_start(T_L2NORM); } l2norm(nx0, ny0, nz0, ist, iend, jst, jend, rsd, rsdnm); if(timeron) { #pragma omp master timer_stop(T_L2NORM); } } /* * --------------------------------------------------------------------- * check the newton-iteration residuals against the tolerance levels * --------------------------------------------------------------------- */ if((rsdnm[0] < tolrsd[0]) && (rsdnm[1] < tolrsd[1]) && (rsdnm[2] < tolrsd[2]) && (rsdnm[3] < tolrsd[3]) && (rsdnm[4] < tolrsd[4])) { #pragma omp master printf(" \n convergence was achieved after %4d pseudo-time steps\n", istep); break; } } } /* end parallel */ timer_stop(1); maxtime = timer_read(1); } /* * --------------------------------------------------------------------- * verification routine * --------------------------------------------------------------------- */ void verify(double xcr[], double xce[], double xci, char* class_npb, boolean* verified) { double xcrref[5], xceref[5], xciref; double xcrdif[5], xcedif[5], xcidif; double epsilon, dtref = 0.0; int m; /* * --------------------------------------------------------------------- * tolerance level * --------------------------------------------------------------------- */ epsilon = 1.0e-08; *class_npb = 'U'; *verified = TRUE; for(m = 0; m < 5; m++) { xcrref[m] = 1.0; xceref[m] = 1.0; } xciref = 1.0; if((nx0 == 12) && (ny0 == 12) && (nz0 == 12) && (itmax == 50)) { *class_npb = 'S'; dtref = 5.0e-1; /* * --------------------------------------------------------------------- * reference values of RMS-norms of residual, for the (12X12X12) grid, * after 50 time steps, with DT = 5.0d-01 * --------------------------------------------------------------------- */ xcrref[0] = 1.6196343210976702e-02; xcrref[1] = 2.1976745164821318e-03; xcrref[2] = 1.5179927653399185e-03; xcrref[3] = 1.5029584435994323e-03; xcrref[4] = 3.4264073155896461e-02; /* * --------------------------------------------------------------------- * reference values of RMS-norms of solution error, for the (12X12X12) grid, * after 50 time steps, with DT = 5.0d-01 * --------------------------------------------------------------------- */ xceref[0] = 6.4223319957960924e-04; xceref[1] = 8.4144342047347926e-05; xceref[2] = 5.8588269616485186e-05; xceref[3] = 5.8474222595157350e-05; xceref[4] = 1.3103347914111294e-03; /* * --------------------------------------------------------------------- * reference value of surface integral, for the (12X12X12) grid, * after 50 time steps, with DT = 5.0d-01 * --------------------------------------------------------------------- */ xciref = 7.8418928865937083e+00; } else if((nx0 == 33) && (ny0 == 33) && (nz0 == 33) && (itmax == 300)) { *class_npb = 'W'; /* SPEC95fp size */ dtref = 1.5e-3; /* * --------------------------------------------------------------------- * reference values of RMS-norms of residual, for the (33x33x33) grid, * after 300 time steps, with DT = 1.5d-3 * --------------------------------------------------------------------- */ xcrref[0] = 0.1236511638192e+02; xcrref[1] = 0.1317228477799e+01; xcrref[2] = 0.2550120713095e+01; xcrref[3] = 0.2326187750252e+01; xcrref[4] = 0.2826799444189e+02; /* * --------------------------------------------------------------------- * reference values of RMS-norms of solution error, for the (33X33X33) grid, * --------------------------------------------------------------------- */ xceref[0] = 0.4867877144216e+00; xceref[1] = 0.5064652880982e-01; xceref[2] = 0.9281818101960e-01; xceref[3] = 0.8570126542733e-01; xceref[4] = 0.1084277417792e+01; /* * --------------------------------------------------------------------- * rReference value of surface integral, for the (33X33X33) grid, * after 300 time steps, with DT = 1.5d-3 * --------------------------------------------------------------------- */ xciref = 0.1161399311023e+02; } else if((nx0 == 64) && (ny0 == 64) && (nz0 == 64) && (itmax == 250)) { *class_npb = 'A'; dtref = 2.0e+0; /* * --------------------------------------------------------------------- * reference values of RMS-norms of residual, for the (64X64X64) grid, * after 250 time steps, with DT = 2.0d+00 * --------------------------------------------------------------------- */ xcrref[0] = 7.7902107606689367e+02; xcrref[1] = 6.3402765259692870e+01; xcrref[2] = 1.9499249727292479e+02; xcrref[3] = 1.7845301160418537e+02; xcrref[4] = 1.8384760349464247e+03; /* * --------------------------------------------------------------------- * reference values of RMS-norms of solution error, for the (64X64X64) grid, * after 250 time steps, with DT = 2.0d+00 * --------------------------------------------------------------------- */ xceref[0] = 2.9964085685471943e+01; xceref[1] = 2.8194576365003349e+00; xceref[2] = 7.3473412698774742e+00; xceref[3] = 6.7139225687777051e+00; xceref[4] = 7.0715315688392578e+01; /* * --------------------------------------------------------------------- * reference value of surface integral, for the (64X64X64) grid, * after 250 time steps, with DT = 2.0d+00 * --------------------------------------------------------------------- */ xciref = 2.6030925604886277e+01; } else if((nx0 == 102) && (ny0 == 102) && (nz0 == 102) && (itmax == 250)) { *class_npb = 'B'; dtref = 2.0e+0; /* * --------------------------------------------------------------------- * reference values of RMS-norms of residual, for the (102X102X102) grid, * after 250 time steps, with DT = 2.0d+00 * --------------------------------------------------------------------- */ xcrref[0] = 3.5532672969982736e+03; xcrref[1] = 2.6214750795310692e+02; xcrref[2] = 8.8333721850952190e+02; xcrref[3] = 7.7812774739425265e+02; xcrref[4] = 7.3087969592545314e+03; /* * --------------------------------------------------------------------- * reference values of RMS-norms of solution error, for the (102X102X102) * grid, after 250 time steps, with DT = 2.0d+00 * --------------------------------------------------------------------- */ xceref[0] = 1.1401176380212709e+02; xceref[1] = 8.1098963655421574e+00; xceref[2] = 2.8480597317698308e+01; xceref[3] = 2.5905394567832939e+01; xceref[4] = 2.6054907504857413e+02; /* c--------------------------------------------------------------------- * reference value of surface integral, for the (102X102X102) grid, * after 250 time steps, with DT = 2.0d+00 * --------------------------------------------------------------------- */ xciref = 4.7887162703308227e+01; } else if((nx0 == 162) && (ny0 == 162) && (nz0 == 162) && (itmax == 250)) { *class_npb = 'C'; dtref = 2.0e+0; /* * --------------------------------------------------------------------- * reference values of RMS-norms of residual, for the (162X162X162) grid, * after 250 time steps, with DT = 2.0d+00 * --------------------------------------------------------------------- */ xcrref[0] = 1.03766980323537846e+04; xcrref[1] = 8.92212458801008552e+02; xcrref[2] = 2.56238814582660871e+03; xcrref[3] = 2.19194343857831427e+03; xcrref[4] = 1.78078057261061185e+04; /* * --------------------------------------------------------------------- * reference values of RMS-norms of solution error, for the (162X162X162) * grid, after 250 time steps, with DT = 2.0d+00 * --------------------------------------------------------------------- */ xceref[0] = 2.15986399716949279e+02; xceref[1] = 1.55789559239863600e+01; xceref[2] = 5.41318863077207766e+01; xceref[3] = 4.82262643154045421e+01; xceref[4] = 4.55902910043250358e+02; /* * --------------------------------------------------------------------- * reference value of surface integral, for the (162X162X162) grid, * after 250 time steps, with DT = 2.0d+00 * --------------------------------------------------------------------- */ xciref = 6.66404553572181300e+01; /* * --------------------------------------------------------------------- * reference value of surface integral, for the (162X162X162) grid, * after 250 time steps, with DT = 2.0d+00 * --------------------------------------------------------------------- */ xciref = 6.66404553572181300e+01; } else if((nx0 == 408) && (ny0 == 408) && (nz0 == 408) && (itmax == 300)) { *class_npb = 'D'; dtref = 1.0e+0; /* * --------------------------------------------------------------------- * reference values of RMS-norms of residual, for the (408X408X408) grid, * after 300 time steps, with DT = 1.0d+00 * --------------------------------------------------------------------- */ xcrref[0] = 0.4868417937025e+05; xcrref[1] = 0.4696371050071e+04; xcrref[2] = 0.1218114549776e+05; xcrref[3] = 0.1033801493461e+05; xcrref[4] = 0.7142398413817e+05; /* * --------------------------------------------------------------------- * reference values of RMS-norms of solution error, for the (408X408X408) * grid, after 300 time steps, with DT = 1.0d+00 * --------------------------------------------------------------------- */ xceref[0] = 0.3752393004482e+03; xceref[1] = 0.3084128893659e+02; xceref[2] = 0.9434276905469e+02; xceref[3] = 0.8230686681928e+02; xceref[4] = 0.7002620636210e+03; /* * --------------------------------------------------------------------- * reference value of surface integral, for the (408X408X408) grid, * after 300 time steps, with DT = 1.0d+00 * --------------------------------------------------------------------- */ xciref = 0.8334101392503e+02; } else if((nx0 == 1020) && (ny0 == 1020) && (nz0 == 1020) && (itmax == 300)) { *class_npb = 'E'; dtref = 0.5e+0; /* * --------------------------------------------------------------------- * reference values of RMS-norms of residual, for the (1020X1020X1020) grid, * after 300 time steps, with DT = 0.5d+00 * --------------------------------------------------------------------- */ xcrref[0] = 0.2099641687874e+06; xcrref[1] = 0.2130403143165e+05; xcrref[2] = 0.5319228789371e+05; xcrref[3] = 0.4509761639833e+05; xcrref[4] = 0.2932360006590e+06; /* * --------------------------------------------------------------------- * reference values of RMS-norms of solution error, for the (1020X1020X1020) * grid, after 300 time steps, with DT = 0.5d+00 * --------------------------------------------------------------------- */ xceref[0] = 0.4800572578333e+03; xceref[1] = 0.4221993400184e+02; xceref[2] = 0.1210851906824e+03; xceref[3] = 0.1047888986770e+03; xceref[4] = 0.8363028257389e+03; /* * --------------------------------------------------------------------- * reference value of surface integral, for the (1020X1020X1020) grid, * after 300 time steps, with DT = 0.5d+00 * --------------------------------------------------------------------- */ xciref = 0.9512163272273e+02; } else { *verified = FALSE; } /* * --------------------------------------------------------------------- * verification test for residuals if gridsize is one of * the defined grid sizes above (class .ne. 'U') * --------------------------------------------------------------------- * compute the difference of solution values and the known reference values. * --------------------------------------------------------------------- */ for(m = 0; m < 5; m++) { xcrdif[m] = fabs((xcr[m] - xcrref[m]) / xcrref[m]); xcedif[m] = fabs((xce[m] - xceref[m]) / xceref[m]); } xcidif = fabs((xci - xciref) / xciref); /* * --------------------------------------------------------------------- * output the comparison of computed results to known cases. * --------------------------------------------------------------------- */ if(*class_npb != 'U') { printf("\n Verification being performed for class_npb %c\n", *class_npb); printf(" Accuracy setting for epsilon = %20.13E\n", epsilon); *verified = (fabs(dt - dtref) <= epsilon); if(!(*verified)) { *class_npb = 'U'; printf(" DT does not match the reference value of %15.8E\n", dtref); } } else { printf(" Unknown class_npb\n"); } if(*class_npb != 'U') { printf(" Comparison of RMS-norms of residual\n"); } else { printf(" RMS-norms of residual\n"); } for(m = 0; m < 5; m++) { if(*class_npb == 'U') { printf(" %2d %20.13E\n", m + 1, xcr[m]); } else if(xcrdif[m] <= epsilon) { printf(" %2d %20.13E%20.13E%20.13E\n", m + 1, xcr[m], xcrref[m], xcrdif[m]); } else { *verified = FALSE; printf(" FAILURE: %2d %20.13E%20.13E%20.13E\n", m + 1, xcr[m], xcrref[m], xcrdif[m]); } } if(*class_npb != 'U') { printf(" Comparison of RMS-norms of solution error\n"); } else { printf(" RMS-norms of solution error\n"); } for(m = 0; m < 5; m++) { if(*class_npb == 'U') { printf(" %2d %20.13E\n", m + 1, xce[m]); } else if(xcedif[m] <= epsilon) { printf(" %2d %20.13E%20.13E%20.13E\n", m + 1, xce[m], xceref[m], xcedif[m]); } else { *verified = FALSE; printf(" FAILURE: %2d %20.13E%20.13E%20.13E\n", m + 1, xce[m], xceref[m], xcedif[m]); } } if(*class_npb != 'U') { printf(" Comparison of surface integral\n"); } else { printf(" Surface integral\n"); } if(*class_npb == 'U') { printf(" %20.13E\n", xci); } else if(xcidif <= epsilon) { printf(" %20.13E%20.13E%20.13E\n", xci, xciref, xcidif); } else { *verified = FALSE; printf(" FAILURE: %20.13E%20.13E%20.13E\n", xci, xciref, xcidif); } if(*class_npb == 'U') { printf(" No reference values provided\n"); printf("No verification performed\n"); } else if(*verified) { printf(" Verification Successful\n"); } else { printf(" Verification failed\n"); } }