# # Ported to limbo by Nick Zhang (zhang@xeti.com), 11/13/98 # Ported from Linpack.java # # Modified 3/3/97 by David M. Doolin (dmd) doolin@cs.utk.edu # Fixed error in matgen() method. Added some comments. # # Modified 1/22/97 by Paul McMahan mcmahan@cs.utk.edu # Added more MacOS options to form. # # Optimized by Jonathan Hardwick (jch@cs.cmu.edu), 3/28/96 # Compare to Linkpack.java. # Optimizations performed: # - added "final" modifier to performance-critical methods. # - changed lines of the form "a[i] = a[i] + x" to "a[i] += x". # - minimized array references using common subexpression elimination. # - eliminated unused variables. # - undid an unrolled loop. # - added temporary 1D arrays to hold frequently-used columns of 2D arrays. # - wrote my own math->fabs() method # See http://www.cs.cmu.edu/~jch/java/linpack.html for more details. # # # Ported to Java by Reed Wade (wade@cs.utk.edu) 2/96 # built using JDK 1.0 on solaris # using "javac -O Linpack.java" # # # Translated to C by Bonnie Toy 5/88 # (modified on 2/25/94 to fix a problem with daxpy for # unequal increments or equal increments not equal to 1. # Jack Dongarra) # implement Linpack; include "sys.m"; include "draw.m"; include "math.m"; Linpack: module { init: fn(ctxt: ref Draw->Context, argv: list of string); run_benchmark: fn() ; }; sys: Sys; math: Math; timeOrig := -1; init(ctxt: ref Draw->Context, argv: list of string) { sys = load Sys Sys->PATH; math = load Math Math->PATH; run_benchmark(); } run_benchmark() { mflops_result := 0.0; residn_result := 0.0; time_result := 0.0; eps_result := 0.0; a := array[200] of {* => array[200] of {* => 0.0}}; b := array[200] of real; x := array[200] of real; total:real; norma:real; normx:real; resid:real; time : real; kf :real; ntimes:int; info:int; kflops: int; ipvt := array[200] of int; lda := 201; ldaa := 200; cray := .056; n := 100; ops := (2.0e0*(real (n*n*n)))/3.0 + 2.0*(real (n*n)); norma = matgen(a,lda,n,b); time = second(); info = dgefa(a,lda,n,ipvt); dgesl(a,lda,n,ipvt,b,0); total = second() - time; for (i := 0; i < n; i++) { x[i] = b[i]; } norma = matgen(a,lda,n,b); for (i = 0; i < n; i++) { b[i] = -b[i]; } dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i < n; i++) { if (resid <= math->fabs(b[i])) resid = math->fabs(b[i]); if (normx <= math->fabs(x[i])) normx = math->fabs(x[i]); } eps_result = epslon(1.0); residn_result = resid/((real n)*norma*normx*eps_result ); residn_result += 0.005; # for rounding residn_result = residn_result*( real 100); residn_result /= real 100; time_result = total; time_result += 0.005; # for rounding time_result = time_result*(real 100); time_result /= real 100; mflops_result = ops/(1.0e6*total); mflops_result += 0.0005; # for rounding mflops_result = mflops_result* real(1000); mflops_result /= real 1000; sys->print("Mflops/s: %g Time: %g secs Norm Res: %g Precision: %g\n", mflops_result , time_result, residn_result , eps_result); } matgen (a: array of array of real, lda:int, n:int, b: array of real): real { init := 1325; norma := 0.0; # Next two for() statements switched. Solver wants # matrix in column order. --dmd 3/3/97 for (i := 0; i < n; i++) { for (j := 0; j < n; j++) { init = 3125*init % 65536; a[j][i] = ( (real init) - 32768.0)/16384.0; if(a[j][i] > norma) norma = a[j][i]; } } for (i = 0; i < n; i++) { b[i] = 0.0; } for (j := 0; j < n; j++) { for (i = 0; i < n; i++) { b[i] += a[j][i]; } } return norma; } # # dgefa factors a double precision matrix by gaussian elimination. # # dgefa is usually called by dgeco, but it can be called # directly with a saving in time if rcond is not needed. # (time for dgeco) = (1 + 9/n)*(time for dgefa) . # # on entry # # a double precision[n][lda] # the matrix to be factored. # # lda integer # the leading dimension of the array a . # # n integer # the order of the matrix a . # # on return # # a an upper triangular matrix and the multipliers # which were used to obtain it. # the factorization can be written a = l*u where # l is a product of permutation and unit lower # triangular matrices and u is upper triangular. # # ipvt integer[n] # an integer vector of pivot indices. # # info integer # = 0 normal value. # = k if u[k][k] .eq. 0.0 . this is not an error # condition for this subroutine, but it does # indicate that dgesl or dgedi will divide by zero # if called. use rcond in dgeco for a reliable # indication of singularity. # # linpack. this version dated 08/14/78. # cleve moler, university of new mexico, argonne national lab. # # functions # # blas daxpy,dscal,idamax dgefa(a: array of array of real, lda: int, n: int, ipvt: array of int) : int { col_k: array of real; col_j: array of real; t:real; j: int; kp1: int; l: int; # gaussian elimination with partial pivoting info := 0; nm1 := n - 1; if (nm1 >= 0) { for (k := 0; k < nm1; k++) { col_k = a[k]; kp1 = k + 1; # find l = pivot index l = idamax(n-k,col_k,k,1) + k; ipvt[k] = l; # zero pivot implies this column already triangularized if (col_k[l] != 0.0) { # interchange if necessary if (l != k) { t = col_k[l]; col_k[l] = col_k[k]; col_k[k] = t; } # compute multipliers t = -1.0/col_k[k]; dscal(n-(kp1),t,col_k,kp1,1); # row elimination with column indexing for (j = kp1; j < n; j++) { col_j = a[j]; t = col_j[l]; if (l != k) { col_j[l] = col_j[k]; col_j[k] = t; } daxpy(n-(kp1),t,col_k,kp1,1, col_j,kp1,1); } } # if else { info = k; } } } ipvt[n-1] = n-1; if (a[(n-1)][(n-1)] == 0.0) info = n-1; return info; } # # dgesl solves the double precision system # a * x = b or trans(a) * x = b # using the factors computed by dgeco or dgefa. # # on entry # # a double precision[n][lda] # the output from dgeco or dgefa. # # lda integer # the leading dimension of the array a . # # n integer # the order of the matrix a . # # ipvt integer[n] # the pivot vector from dgeco or dgefa. # b double precision[n] # the right hand side vector. # # job integer # = 0 to solve a*x = b , # = nonzero to solve trans(a)*x = b where # trans(a) is the transpose. # # on return # # b the solution vector x . # # error condition # # a division by zero will occur if the input factor contains a # zero on the diagonal. technically this indicates singularity # but it is often caused by improper arguments or improper # setting of lda . it will not occur if the subroutines are # called correctly and if dgeco has set rcond .gt. 0.0 # or dgefa has set info .eq. 0 . # # to compute inverse(a) * c where c is a matrix # with p columns # dgeco(a,lda,n,ipvt,rcond,z) # if (!rcond is too small){ # for (j=0,j
= 1) { for (k = 0; k < nm1; k++) { l = ipvt[k]; t = b[l]; if (l != k){ b[l] = b[k]; b[k] = t; } kp1 = k + 1; daxpy(n-(kp1),t,a[k],kp1,1,b,kp1,1); } } # now solve u*x = y for (kb = 0; kb < n; kb++) { k = n - (kb + 1); b[k] /= a[k][k]; t = -b[k]; daxpy(k,t,a[k],0,1,b,0,1); } } else { # job = nonzero, solve trans(a) * x = b. first solve trans(u)*y = b for (k = 0; k < n; k++) { t = ddot(k,a[k],0,1,b,0,1); b[k] = (b[k] - t)/a[k][k]; } # now solve trans(l)*x = y if (nm1 >= 1) { for (kb = 1; kb < nm1; kb++) { k = n - (kb+1); kp1 = k + 1; b[k] += ddot(n-(kp1),a[k],kp1,1,b,kp1,1); l = ipvt[k]; if (l != k) { t = b[l]; b[l] = b[k]; b[k] = t; } } } } } # # constant times a vector plus a vector. # jack dongarra, linpack, 3/11/78. # daxpy( n:int, da:real, dx: array of real, dx_off:int, incx:int, dy: array of real, dy_off:int, incy:int) { i:int; ix:int; iy:int; if ((n > 0) && (da != 0.0)) { if (incx != 1 || incy != 1) { # code for unequal increments or equal increments not equal to 1 ix = 0; iy = 0; if (incx < 0) ix = (-n+1)*incx; if (incy < 0) iy = (-n+1)*incy; for (i = 0;i < n; i++) { dy[iy +dy_off] += da*dx[ix +dx_off]; ix += incx; iy += incy; } return; } else { # code for both increments equal to 1 for (i=0; i < n; i++) dy[i +dy_off] += da*dx[i +dx_off]; } } } # # forms the dot product of two vectors. # jack dongarra, linpack, 3/11/78. # ddot(n:int, dx: array of real, dx_off:int, incx:int, dy: array of real, dy_off:int, incy:int):real { i: int; ix: int; iy: int; dtemp := 0.0; if (n > 0) { if (incx != 1 || incy != 1) { # code for unequal increments or equal increments not equal to 1 ix = 0; iy = 0; if (incx < 0) ix = (-n+1)*incx; if (incy < 0) iy = (-n+1)*incy; for (i = 0;i < n; i++) { dtemp += dx[ix +dx_off]*dy[iy +dy_off]; ix += incx; iy += incy; } } else { # code for both increments equal to 1 for (i=0; i < n; i++) dtemp += dx[i +dx_off]*dy[i +dy_off]; } } return(dtemp); } # # scales a vector by a constant. # jack dongarra, linpack, 3/11/78. # dscal(n:int, da:real, dx:array of real, dx_off:int, incx:int) { i:int; nincx:int; if (n > 0) { if (incx != 1) { # code for increment not equal to 1 nincx = n*incx; for (i = 0; i < nincx; i += incx) dx[i +dx_off] *= da; } else { # code for increment equal to 1 for (i = 0; i < n; i++) dx[i +dx_off] *= da; } } } # # finds the index of element having max. math->fabsolute value. # jack dongarra, linpack, 3/11/78. # idamax(n:int, dx:array of real, dx_off:int, incx:int):int { dmax:real; dtemp:real; i:int; ix:int; itemp:=0; if (n < 1) { itemp = -1; } else if (n ==1) { itemp = 0; } else if (incx != 1) { # code for increment not equal to 1 dmax = math->fabs(dx[0 +dx_off]); ix = 1 + incx; for (i = 1; i < n; i++) { dtemp = math->fabs(dx[ix + dx_off]); if (dtemp > dmax) { itemp = i; dmax = dtemp; } ix += incx; } } else { # code for increment equal to 1 itemp = 0; dmax = math->fabs(dx[0 +dx_off]); for (i = 1; i < n; i++) { dtemp = math->fabs(dx[i + dx_off]); if (dtemp > dmax) { itemp = i; dmax = dtemp; } } } return (itemp); } # # estimate unit roundoff in quantities of size x. # # this program should function properly on all systems # satisfying the following two assumptions, # 1. the base used in representing dfloating point # numbers is not a power of three. # 2. the quantity a in statement 10 is represented to # the accuracy used in dfloating point variables # that are stored in memory. # the statement number 10 and the go to 10 are intended to # force optimizing compilers to generate code satisfying # assumption 2. # under these assumptions, it should be true that, # a is not exactly equal to four-thirds, # b has a zero for its last bit or digit, # c is not exactly equal to one, # eps measures the separation of 1.0 from # the next larger dfloating point number. # the developers of eispack would appreciate being informed # about any systems where these assumptions do not hold. # # ***************************************************************** # this routine is one of the auxiliary routines used by eispack iii # to avoid machine dependencies. # ***************************************************************** # # this version dated 4/6/83. # epslon (x:real):real { a := 4.0e0/3.0e0; eps := 0.0; while (eps == 0.0) { b := a - 1.0; c := b + b + b; eps = math->fabs(c-1.0); } return(eps*math->fabs(x)); } # # purpose: # multiply matrix m times vector x and add the result to vector y. # # parameters: # # n1 integer, number of elements in vector y, and number of rows in # matrix m # # y double [n1], vector of length n1 to which is added # the product m*x # # n2 integer, number of elements in vector x, and number of columns # in matrix m # # ldm integer, leading dimension of array m # # x double [n2], vector of length n2 # # m double [ldm][n2], matrix of n1 rows and n2 columns # dmxpy (n1:int, y:array of real, n2:int, ldm:int, x:array of real, m: array of array of real) { # cleanup odd vector for (j := 0; j < n2; j++) { for (i := 0; i < n1; i++) { y[i] += x[j]*m[j][i]; } } } second(): real { if (timeOrig==-1) { timeOrig = sys->millisec();} return real(sys->millisec() - timeOrig)/real(1000); }