program benchm c c-----This is a benchmark program for performance test for Fortran loops. c-----The program executes a number of Fortran DO-loops and lists c-----the execution times for different loop lengths, the Mflops-rates c-----and the performance parameters R-inf and n-half. Stripmining c-----effects can be taken into account by setting the proper value c-----of the parameter 'nregl' (see below). c-----NOTE: the tables in Chapter 4 of the book by Dongarra, Duff, c----- Sorenson and Van der Vorst have been obtained by using c----- this program. c c-----Version 1.2 c-----19 February 1990 c-----Program written by Henk A. Van der Vorst (TU Delft). c-----The present version of the program is in double precision and c-----measures the performance of CONVEX C-2 computers. It is easily c-----modified for use on other computers. For documentation see the c-----comment in the main program. c c-----subroutine timem has to be adjusted for each machine, it should c-----deliver the accumulated time expressed in seconds. c implicit double precision (a-h,o-z) common x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) common/work/x5(1000,1000),x6(1000,1000) common/indi/in1(1000),in2(1000) common/par/amat(3,3),bvec(3),tvec(1000),nregl,inext common/step/islice,min(10),maxx(10),ist(10),kd,kf common/const/actu,bctu,cctu,dctu,ectu,t0,katu,nstat character com2*50,mach*15,comp*15,dat*15 c----------------------------------------------------------------- c c-----The following strings will appear in the output and should be c-----adjusted to denote the current compiler ('comp'), machine ('mach') c-----and date of testing. c comp=' f77 ' mach='c-210' dat=' 02/01/90 ' c c nregl is the vector register length of the machine, if the c machine has no vector registers, or if one wishes to ignore c the effect of stripmining, then set nregl=100000 c nregl=20000 c print 10 10 format(/,1x,50(1h-)) print 20, mach,comp,dat 20 format(1x,a,a,a) c c-----Definition of the different loop lengths for which the times c-----will be measured. c islice=1 min(1)=20 maxx(1)=200 ist(1)=20 min(2)=maxx(1)+100 maxx(2)=1000 ist(2)=100 nstat=1000 c c-----nstat gives the number of repetitions of a complete kernel in c-----order to obtain more accurate timings, the present version of c-----the program allows for nstat=1000 maximum. For larger values c-----the second dimension of the declared arrays should be adjusted. c call timem(t1) do 30 jj=1,nstat 30 call dummy(n,jj,x1,x2,x3,x4,s1,s2) call timem(t2) t0=t2-t1 print 40,nstat,t0 40 format(/,2x,' number of repetitions=',i5,/, 1 2x,' total time for dummy calls=',d12.5) c------------------------------------------------------------------ c vector updates call case1(x1,x2,x3,x4) c dot products call case2(x1,x2,x3,x4) c vector multiply and divide call case3(x1,x2,x3,x4) c elements wang alg. for lower bidiag system call case4(x1,x2,x3,x4) c gather scatter effects call case5(x1,x2,x3,x4) c max and store/load call case6(x1,x2,x3,x4) c loop unrolling effects call case8(x1,x2,x3,x4) c standard functions call case11(x1,x2,x3,x4) c------------------------------------------------------------------ stop end subroutine case1(x1,x2,x3,x4) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) external kern11,kern12,kern13,kern14,kern15,set11,set12, 1 set14,set15 character com2*50 ka=2 ikern=11 com2=' x=x+alfa*y ' call perf(kern11,ikern,ka,com2,set11,x1,x2,x3,x4) ka=4 ikern=12 com2=' x=x+alfa*y, v=v+beta*w ' call perf(kern12,ikern,ka,com2,set12,x1,x2,x3,x4) ka=3 ikern=13 com2=' x=x+alfa*y, z=beta*y ' call perf(kern13,ikern,ka,com2,set12,x1,x2,x3,x4) ka=2 ikern=14 com2=' x=x+alfa*y, stride=2 , divide perf by 2 ' call perf(kern14,ikern,ka,com2,set14,x1,x2,x3,x4) ka=2 ikern=15 com2=' x=x+alfa*y, stride=5 , divide perf by 5 ' call perf(kern15,ikern,ka,com2,set15,x1,x2,x3,x4) return end subroutine case2(x1,x2,x3,x4) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) external kern21,set11 character com2*50 ka=2 ikern=21 com2=' dot product, stride 1 ' call perf(kern21,ikern,ka,com2,set11,x1,x2,x3,x4) return end subroutine case3(x1,x2,x3,x4) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) external kern31,kern32,kern33,kern34,set12 character com2*50 ka=1 ikern=31 com2=' z=y*x ' call perf(kern31,ikern,ka,com2,set12,x1,x2,x3,x4) ka=1 ikern=32 com2=' z=alfa*x ' call perf(kern32,ikern,ka,com2,set12,x1,x2,x3,x4) ka=1 ikern=33 com2=' z=y/x ' call perf(kern33,ikern,ka,com2,set12,x1,x2,x3,x4) ka=1 ikern=34 com2=' z=x/alfa ' call perf(kern34,ikern,ka,com2,set12,x1,x2,x3,x4) return end subroutine case4(x1,x2,x3,x4) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) external kern41,kern42,set41 character com2*50 ka=3 ikern=41 com2=' x=x+a*y, a=a*b ' call perf(kern41,ikern,ka,com2,set41,x1,x2,x3,x4) ka=2 ikern=42 com2=' linear recursion x(i)=y(i)+a(i)*x(i-1) ' call perf(kern42,ikern,ka,com2,set41,x1,x2,x3,x4) return end subroutine case5(x1,x2,x3,x4) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) external kern51,kern52,kern53,kern54,kern55,set51 character com2*50 ka=1 ikern=5 com2=' gather: x1(i)=x2(ind(i)) ' call perf(kern51,ikern,ka,com2,set51,x1,x2,x3,x4) ka=1 ikern=52 com2=' scatter: x1(ind(i))=x2(i) ' call perf(kern52,ikern,ka,com2,set51,x1,x2,x3,x4) ka=2 ikern=53 com2=' gather saxpy x1=x1+s*x2(ind(i)) ' call perf(kern53,ikern,ka,com2,set51,x1,x2,x3,x4) ka=2 ikern=54 com2=' scatter saxpy x1(ind(i))=x1(ind(i))+s*x2 ' call perf(kern54,ikern,ka,com2,set51,x1,x2,x3,x4) ka=2 ikern=55 com2=' innerprod gather: sum(x1(ind(i))*x2(i)) ' call perf(kern55,ikern,ka,com2,set51,x1,x2,x3,x4) return end subroutine case6(x1,x2,x3,x4) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) external kern61,kern62,kern63,set61 character com2*50 ka=1 ikern=61 com2=' max of vector ' call perf(kern61,ikern,ka,com2,set61,x1,x2,x3,x4) ka=1 ikern=62 com2=' x(i)=alfa ' call perf(kern62,ikern,ka,com2,set61,x1,x2,x3,x4) ka=1 ikern=63 com2=' x(i)=y(i) ' call perf(kern63,ikern,ka,com2,set61,x1,x2,x3,x4) return end subroutine case8(x1,x2,x3,x4) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) external kern81,kern82,set81 character com2*50 ka=4 ikern=81 com2=' x=x+a*y+b*z ' call perf(kern81,ikern,ka,com2,set81,x1,x2,x3,x4) ka=8 ikern=82 com2=' x=x+a*x1+b*x2+c*x3+d*x4, loop unr. twice ' call perf(kern82,ikern,ka,com2,set81,x1,x2,x3,x4) return end subroutine case11(x1,x2,x3,x4) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) external ker111,ker112,ker113,ker114,set11 character com2*50 ka=1 ikern=111 com2=' x=exp(y) ' call perf(ker111,ikern,ka,com2,set11,x1,x2,x3,x4) ka=1 ikern=112 com2=' x=sqrt(y) ' call perf(ker112,ikern,ka,com2,set11,x1,x2,x3,x4) ka=1 ikern=113 com2=' x=sin(y) ' call perf(ker113,ikern,ka,com2,set11,x1,x2,x3,x4) ka=1 ikern=114 com2=' x=log(y) ' call perf(ker114,ikern,ka,com2,set11,x1,x2,x3,x4) return end subroutine perf(kernel,iker,ka,com2,setval,x1,x2,x3,x4) implicit double precision (a-h,o-z) common/step/islice,min(10),maxx(10),ist(10),kd,kf common/const/actu,bctu,cctu,dctu,ectu,t0,katu,nstat common/work/x5(1000,1000),x6(1000,1000) character com2*50 external kernel,setval dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) katu=ka c call ini(iker,com2) c jj=1 do 1121 m=1,islice do 1121 n=min(m),maxx(m),ist(m) call setval(n,jj,x1,x2,x3,x4,s1,s2) call timem(t1) do 121 k=1,nstat c--------------------------------------------------------- call kernel(n, k,x1,x2,x3,x4,s1,s2) c--------------------------------------------------------- 121 continue c call timem(t2) time=t2-t1-t0 call keep(time,n,iker) 1121 continue call eval return end subroutine set11(n,jj,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) s1=.333 do 101 j=1,1000 do 10 i=1,n x1(i,j)=.45678 10 x2(i,j)=.12345 101 continue return end subroutine set12(n,jj,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) s1=.333 s2=.555 do 101 j=1,1000 do 10 i=1,n x1(i,j)=.45678 x2(i,j)=.12345 x3(i,j)=.87654 10 x4(i,j)=.54321 101 continue return end subroutine set14(n,jj,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) s1=.333 do 101 j=1,1000 do 10 i=1,n,2 x1(i,j)=.45678 10 x2(i,j)=.12345 101 continue return end subroutine set15(n,jj,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) s1=.333 do 101 j=1,1000 do 10 i=1,n,5 x1(i,j)=.45678 10 x2(i,j)=.12345 101 continue return end subroutine kern11(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c x=x+alfa*y, vector update do 10 i=1,n 10 x1(i,j)=x1(i,j)+s1*x2(i,j) return end subroutine kern12(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c two vector updates do 10 i=1,n x1(i,j)=x1(i,j)+s1*x2(i,j) 10 x3(i,j)=x3(i,j)+s2*x4(i,j) return end subroutine kern13(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c update + scalar vector multiply do 10 i=1,n x1(i,j)=x1(i,j)+s1*x2(i,j) 10 x3(i,j)=s2*x2(i,j) return end subroutine kern14(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c x=x+alfa*y, vector update stride 2 do 10 i=1,n,2 10 x1(i,j)=x1(i,j)+s1*x2(i,j) return end subroutine kern15(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c x=x+alfa*y, vector update stride 5 do 10 i=1,n,5 10 x1(i,j)=x1(i,j)+s1*x2(i,j) return end subroutine kern21(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c dot product, stride 1 s1=0. do 10 i=1,n 10 s1=s1+x1(i,j)*x2(i,j) return end subroutine kern31(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c vector multiply do 10 i=1,n 10 x3(i,j)=x1(i,j)*x2(i,j) return end subroutine kern32(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c vector scalar multiply do 10 i=1,n 10 x3(i,j)=s1*x1(i,j) return end subroutine kern33(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c vector vector divide do 10 i=1,n 10 x3(i,j)=x1(i,j)/x2(i,j) return end subroutine kern34(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c vector scalar divide do 10 i=1,n 10 x3(i,j)=x1(i,j)/s1 return end subroutine set41(n,jj,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c elements of wang algorithm do 101 j=1,1000 do 10 i=1,n x1(i,j)=.12345 x2(i,j)=.23456 x3(i,j)=.34567 10 x4(i,j)=.45678 101 continue return end subroutine kern41(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c element of wang algorithm version b, step b1 do 10 i=1,n x1(i,j)=x1(i,j)+x2(i,j)*x3(i,j) 10 x2(i,j)=x2(i,j)*x4(i,j) return end subroutine kern42(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c element of wang algorithm, step c2, bidiag system do 10 i=2,n 10 x1(i,j)=x2(i,j)+x3(i,j)*x1(i-1,j) return end subroutine set51(n,jj,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) common/work/x5(1000,1000),x6(1000,1000) common/indi/in1(1000),in2(1000) c initializing for indirect addressing do 10 i=1,250 in1(2*i-1)=251-i 10 in1(2*i)=1000 -i do 101 j=1,1000 do 20 i=1,1000 x1(i,j)=.12345 x2(i,j)=.23456 20 continue 101 continue s1=.34567 return end subroutine kern51(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) common/work/x5(1000,1000),x6(1000,1000) common/indi/in1(1000),in2(1000) c fortran gather do 10 i=1,n 10 x1(i,j)=x2(in1(i),j) return end subroutine kern52(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) common/work/x5(1000,1000),x6(1000,1000) common/indi/in1(1000),in2(1000) c fortran scatter do 10 i=1,n 10 x1(in1(i),j)=x2(i,j) return end subroutine kern53(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) common/work/x5(1000,1000),x6(1000,1000),xm(1000,1000,4) common/indi/in1(1000),in2(1000) c fortran gather saxpy do 10 i=1,n x1(i,j)=x1(i,j)+s1*x2(in1(i),j) 10 continue return end subroutine kern54(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) common/work/x5(1000,1000),x6(1000,1000) common/indi/in1(1000),in2(1000) c fortran scatter saxpy do 10 i=1,n 10 x1(in1(i),j)=x1(in1(i),j)+s1*x2(i,j) return end subroutine kern55(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) common/work/x5(1000,1000),x6(1000,1000) common/indi/in1(1000),in2(1000) c innerproduct with one gather s2=0. do 10 i=1,n 10 s2=s2+x1(i,j)*x2(in1(i),j) return end subroutine set61(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) s1=.333 s2=.555 do 101 j=1,1000 do 10 i=1,n x1(i,j)=sin(i/2.) 10 continue 101 continue return end subroutine kern61(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c max of vector s2=-1.e29 do 10 i=1,n 10 s2=max(s2,x1(i,j)) return end subroutine kern62(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c store a scalar do 10 i=1,n x4(i,j)=s1 10 continue return end subroutine kern63(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c store a vector do 10 i=1,n x4(i,j)=x1(i,j) 10 continue return end subroutine set81(n,jj,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) common/work/x5(1000,1000),x6(1000,1000) c loop unrolling do 101 j=1,1000 do 10 i=1,n x1(i,j)=.12345 x2(i,j)=.23456 x3(i,j)=.34567 x4(i,j)=.45678 10 x5(i,j)=.56789 101 continue return end subroutine kern81(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) common/work/x5(1000,1000),x6(1000,1000) c loop unrolling once do 10 i=1,n 10 x1(i,j)=x1(i,j)+s1*x2(i,j)+s2*x3(i,j) return end subroutine kern82(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) common/work/x5(1000,1000),x6(1000,1000) c loop unrolling twice s3=.54321 s4=.65432 do 10 i=1,n x1(i,j)=x1(i,j)+s1*x2(i,j)+s2*x3(i,j)+s3*x4(i,j)+s4*x5(i,j) 10 continue return end subroutine ker111(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c vector exp do 10 i=1,n 10 x1(i,j)=exp(x2(i,j)) return end subroutine ker112(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c vector sqrt do 10 i=1,n 10 x1(i,j)=sqrt(x2(i,j)) return end subroutine ker113(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c vector sin do 10 i=1,n 10 x1(i,j)=sin(x2(i,j)) return end subroutine ker114(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) c vector log do 10 i=1,n 10 x1(i,j)=log(x2(i,j)) return end subroutine dummy(n,j,x1,x2,x3,x4,s1,s2) implicit double precision (a-h,o-z) dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000) return end subroutine timem(t) implicit double precision (a-h,o-z) real *4 tijd c---CPU time on fujitsu vp-series c call clock(g,2,2) c t=g*1.e-6 c---CPU time on convex c-1 t=cputime(0.) c---CPU time on CRAY, CYBER 205 c t=SECOND() c---CPU time on HP1100 c call asktim(itime) c t=itime/100.d0 c---CPU time om ibm 3090 c call qlock(t) return end subroutine ini(iker,com2) implicit double precision (a-h,o-z) common/const/actu,bctu,cctu,dctu,ectu,t0,katu,nstat common/par/amat(3,3),bvec(3),tvec(1000),nregl,inext character com2*50 inext=0 do 1 j=1,3 bvec(j)=0. do 1 i=1,3 amat(i,j)=0. 1 continue actu=0. bctu=0. cctu=0. dctu=0. ectu=0. print 10 10 format(/,1x,50(1h-)) print 20, iker,com2 20 format(1x,' kernel ',i3,1x,a50) print 30 30 format(/,11h n ,15h time ,10h mflops ) return end subroutine keep(time,n,iker) implicit double precision (a-h,o-z) common/const/actu,bctu,cctu,dctu,ectu,t0,katu,nstat common/par/amat(3,3),bvec(3),tvec(1000),nregl,inext if (time.le.0.) time=0.0005 flops=katu*nstat*1.d-6*n/time print 10, n,time,flops 10 format(2x,i6,2x,e12.5,2x,f9.3) time=time/nstat inext=inext+1 tvec(inext)=time amat(1,1)=amat(1,1)+1. amat(1,2)=amat(1,2)+n amat(1,3)=amat(1,3)+((n-1)/nregl) amat(2,2)=amat(2,2)+n*n amat(2,3)=amat(2,3)+n*((n-1)/nregl) amat(3,3)=amat(3,3)+((n-1)/nregl)**2 bvec(1)=bvec(1)+time bvec(2)=bvec(2)+n*time bvec(3)=bvec(3)+((n-1)/nregl)*time time=time*1.e6 actu=actu+(n/time)**2 bctu=bctu+n/(time**2) cctu=cctu+(1./time)**2 dctu=dctu+n/time ectu=ectu+1./time return end subroutine eval implicit double precision (a-h,o-z) common/const/actu,bctu,cctu,dctu,ectu,t0,katu,nstat common/par/amat(3,3),bvec(3),tvec(1000),nregl,inext beta=(ectu-bctu*dctu/actu)/(cctu-bctu**2/actu) alfa=(dctu-beta*bctu)/actu rtu=katu/alfa enhalf=beta/alfa if (enhalf .le. 1.) enhalf = 1. print 1,rtu,enhalf 1 format(/,' asymp. perf= ',f9.3,' nhalf=',f9.1) c parameters including the effect of stripmining amat(2,2)=amat(2,2)-amat(1,2)**2/amat(1,1) amat(2,3)=amat(2,3)-amat(1,2)*amat(1,3)/amat(1,1) amat(3,3)=amat(3,3)-amat(1,3)**2/amat(1,1) bvec(2)=bvec(2)-amat(1,2)*bvec(1)/amat(1,1) bvec(3)=bvec(3)-amat(1,3)*bvec(1)/amat(1,1) amat(3,3)=amat(3,3)-amat(2,3)**2/amat(2,2) bvec(3)=bvec(3)-amat(2,3)*bvec(2)/amat(2,2) bvec(3)=0. if (amat(3,3).ne.0.) 1 bvec(3)=bvec(3)/amat(3,3) bvec(2)=(bvec(2)-amat(2,3)*bvec(3))/amat(2,2) bvec(1)=(bvec(1)-amat(1,2)*bvec(2)-amat(1,3)*bvec(3))/amat(1,1) rtu=katu*1.e-6/(bvec(2)+bvec(3)/nregl) enhalf=bvec(1)/(bvec(2)+2.*bvec(3)/nregl) nhalf=enhalf if(nhalf.ge.nregl) 1 enhalf=(bvec(1)+bvec(3)*(( nhalf-1)/nregl))/(bvec(2)+2.* 2 bvec(3)/nregl) if(enhalf.lt.1.) enhalf =1. print 2,rtu,enhalf 2 format(/' parameters with stripmining effects included:'/, 1 ' asymp. perf=',f9.3,' mflops, enhalf=',f9.1) return end