program benchm
c
c-----This is a benchmark program for performance test for Fortran loops.
c-----The program executes a number of Fortran DO-loops and lists
c-----the execution times for different loop lengths, the Mflops-rates
c-----and the performance parameters R-inf and n-half. Stripmining
c-----effects can be taken into account by setting the proper value
c-----of the parameter 'nregl' (see below).
c-----NOTE: the tables in Chapter 4 of the book by Dongarra, Duff,
c-----      Sorenson and Van der Vorst have been obtained by using
c-----      this program.
c
c-----Version 1.2
c-----19 February 1990
c-----Program written by Henk A. Van der Vorst (TU Delft).
c-----The present version of the program is in double precision and
c-----measures the performance of CONVEX C-2 computers. It is easily
c-----modified for use on other computers. For documentation see the
c-----comment in the main program.
c
c-----subroutine timem has to be adjusted for each machine, it should
c-----deliver the accumulated time expressed in seconds.
c
      implicit double precision (a-h,o-z)
      common x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      common/work/x5(1000,1000),x6(1000,1000)
      common/indi/in1(1000),in2(1000)
      common/par/amat(3,3),bvec(3),tvec(1000),nregl,inext
      common/step/islice,min(10),maxx(10),ist(10),kd,kf
      common/const/actu,bctu,cctu,dctu,ectu,t0,katu,nstat
      character com2*50,mach*15,comp*15,dat*15
c-----------------------------------------------------------------
c
c-----The following strings will appear in the output and should be
c-----adjusted to denote the current compiler ('comp'), machine ('mach')
c-----and date of testing.
c
      comp=' f77    '
      mach='c-210'
      dat=' 02/01/90 '

c
c  nregl is the vector register length of the machine, if the
c  machine has no vector registers, or if one wishes to ignore
c  the effect of stripmining, then set nregl=100000
c
      nregl=20000
c
      print 10
 10   format(/,1x,50(1h-))
      print 20, mach,comp,dat
 20   format(1x,a,a,a)
c
c-----Definition of the different loop lengths for which the times
c-----will be measured.
c
      islice=1
      min(1)=20
      maxx(1)=200
      ist(1)=20
      min(2)=maxx(1)+100
      maxx(2)=1000
      ist(2)=100
      nstat=1000
c
c-----nstat gives the number of repetitions of a complete kernel in
c-----order to obtain more accurate timings, the present version of
c-----the program allows for nstat=1000 maximum. For larger values
c-----the second dimension of the declared arrays should be adjusted.
c
      call timem(t1)
      do   30 jj=1,nstat
 30   call dummy(n,jj,x1,x2,x3,x4,s1,s2)
      call timem(t2)
      t0=t2-t1
      print  40,nstat,t0
 40   format(/,2x,' number of repetitions=',i5,/,
     1         2x,' total time for dummy calls=',d12.5)
c------------------------------------------------------------------
c  vector updates
      call case1(x1,x2,x3,x4)
c  dot products
      call case2(x1,x2,x3,x4)
c  vector multiply and divide
      call case3(x1,x2,x3,x4)
c  elements wang alg. for lower bidiag system
      call case4(x1,x2,x3,x4)
c  gather scatter effects
      call case5(x1,x2,x3,x4)
c  max and store/load
      call case6(x1,x2,x3,x4)
c  loop unrolling effects
      call case8(x1,x2,x3,x4)
c  standard functions
      call case11(x1,x2,x3,x4)
c------------------------------------------------------------------
      stop
      end
      subroutine case1(x1,x2,x3,x4)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      external kern11,kern12,kern13,kern14,kern15,set11,set12,
     1         set14,set15
      character com2*50
         ka=2
         ikern=11
         com2=' x=x+alfa*y  '
         call perf(kern11,ikern,ka,com2,set11,x1,x2,x3,x4)
         ka=4
         ikern=12
         com2=' x=x+alfa*y, v=v+beta*w '
         call perf(kern12,ikern,ka,com2,set12,x1,x2,x3,x4)
         ka=3
         ikern=13
         com2=' x=x+alfa*y, z=beta*y '
         call perf(kern13,ikern,ka,com2,set12,x1,x2,x3,x4)
         ka=2
         ikern=14
         com2=' x=x+alfa*y, stride=2 , divide perf by 2 '
         call perf(kern14,ikern,ka,com2,set14,x1,x2,x3,x4)
         ka=2
         ikern=15
         com2=' x=x+alfa*y, stride=5 , divide perf by 5 '
         call perf(kern15,ikern,ka,com2,set15,x1,x2,x3,x4)
      return
      end
      subroutine case2(x1,x2,x3,x4)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      external kern21,set11
      character com2*50
         ka=2
         ikern=21
         com2=' dot product, stride 1 '
         call perf(kern21,ikern,ka,com2,set11,x1,x2,x3,x4)
      return
      end
      subroutine case3(x1,x2,x3,x4)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      external kern31,kern32,kern33,kern34,set12
      character com2*50
         ka=1
         ikern=31
         com2=' z=y*x       '
         call perf(kern31,ikern,ka,com2,set12,x1,x2,x3,x4)
         ka=1
         ikern=32
         com2=' z=alfa*x               '
         call perf(kern32,ikern,ka,com2,set12,x1,x2,x3,x4)
         ka=1
         ikern=33
         com2=' z=y/x                '
         call perf(kern33,ikern,ka,com2,set12,x1,x2,x3,x4)
         ka=1
         ikern=34
         com2=' z=x/alfa             '
         call perf(kern34,ikern,ka,com2,set12,x1,x2,x3,x4)
      return
      end
      subroutine case4(x1,x2,x3,x4)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      external kern41,kern42,set41
      character com2*50
         ka=3
         ikern=41
         com2=' x=x+a*y,  a=a*b  '
         call perf(kern41,ikern,ka,com2,set41,x1,x2,x3,x4)
         ka=2
         ikern=42
         com2=' linear recursion x(i)=y(i)+a(i)*x(i-1) '
         call perf(kern42,ikern,ka,com2,set41,x1,x2,x3,x4)
      return
      end
      subroutine case5(x1,x2,x3,x4)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      external kern51,kern52,kern53,kern54,kern55,set51
      character com2*50
         ka=1
         ikern=5
         com2=' gather: x1(i)=x2(ind(i))  '
         call perf(kern51,ikern,ka,com2,set51,x1,x2,x3,x4)
         ka=1
         ikern=52
         com2=' scatter: x1(ind(i))=x2(i)  '
         call perf(kern52,ikern,ka,com2,set51,x1,x2,x3,x4)
         ka=2
         ikern=53
         com2=' gather saxpy x1=x1+s*x2(ind(i))  '
         call perf(kern53,ikern,ka,com2,set51,x1,x2,x3,x4)
         ka=2
         ikern=54
         com2=' scatter saxpy x1(ind(i))=x1(ind(i))+s*x2  '
         call perf(kern54,ikern,ka,com2,set51,x1,x2,x3,x4)
         ka=2
         ikern=55
         com2=' innerprod gather: sum(x1(ind(i))*x2(i))  '
         call perf(kern55,ikern,ka,com2,set51,x1,x2,x3,x4)
      return
      end
      subroutine case6(x1,x2,x3,x4)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      external kern61,kern62,kern63,set61
      character com2*50
         ka=1
         ikern=61
         com2=' max of vector '
         call perf(kern61,ikern,ka,com2,set61,x1,x2,x3,x4)
         ka=1
         ikern=62
         com2=' x(i)=alfa  '
         call perf(kern62,ikern,ka,com2,set61,x1,x2,x3,x4)
         ka=1
         ikern=63
         com2=' x(i)=y(i)    '
         call perf(kern63,ikern,ka,com2,set61,x1,x2,x3,x4)
      return
      end
      subroutine case8(x1,x2,x3,x4)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      external kern81,kern82,set81
      character com2*50
         ka=4
         ikern=81
         com2=' x=x+a*y+b*z   '
         call perf(kern81,ikern,ka,com2,set81,x1,x2,x3,x4)
         ka=8
         ikern=82
         com2=' x=x+a*x1+b*x2+c*x3+d*x4, loop unr. twice '
         call perf(kern82,ikern,ka,com2,set81,x1,x2,x3,x4)
      return
      end
      subroutine case11(x1,x2,x3,x4)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      external ker111,ker112,ker113,ker114,set11
      character com2*50
         ka=1
         ikern=111
         com2=' x=exp(y)    '
         call perf(ker111,ikern,ka,com2,set11,x1,x2,x3,x4)
         ka=1
         ikern=112
         com2=' x=sqrt(y)   '
         call perf(ker112,ikern,ka,com2,set11,x1,x2,x3,x4)
         ka=1
         ikern=113
         com2=' x=sin(y)   '
         call perf(ker113,ikern,ka,com2,set11,x1,x2,x3,x4)
         ka=1
         ikern=114
         com2=' x=log(y)   '
         call perf(ker114,ikern,ka,com2,set11,x1,x2,x3,x4)
      return
      end
      subroutine perf(kernel,iker,ka,com2,setval,x1,x2,x3,x4)
      implicit double precision (a-h,o-z)
      common/step/islice,min(10),maxx(10),ist(10),kd,kf
      common/const/actu,bctu,cctu,dctu,ectu,t0,katu,nstat
      common/work/x5(1000,1000),x6(1000,1000)
      character com2*50
      external kernel,setval
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      katu=ka
c
      call ini(iker,com2)
c
      jj=1
      do 1121 m=1,islice
      do 1121 n=min(m),maxx(m),ist(m)
         call setval(n,jj,x1,x2,x3,x4,s1,s2)
         call timem(t1)
         do 121 k=1,nstat
c---------------------------------------------------------
         call kernel(n, k,x1,x2,x3,x4,s1,s2)
c---------------------------------------------------------
  121    continue
c
         call timem(t2)
         time=t2-t1-t0
         call keep(time,n,iker)
 1121    continue
      call eval
      return
      end
      subroutine set11(n,jj,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      s1=.333
      do 101 j=1,1000
      do 10 i=1,n
         x1(i,j)=.45678
  10     x2(i,j)=.12345
 101  continue
      return
      end
      subroutine set12(n,jj,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      s1=.333
      s2=.555
      do 101 j=1,1000
      do 10 i=1,n
         x1(i,j)=.45678
         x2(i,j)=.12345
         x3(i,j)=.87654
  10     x4(i,j)=.54321
 101  continue
      return
      end
      subroutine set14(n,jj,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      s1=.333
      do 101 j=1,1000
      do 10 i=1,n,2
         x1(i,j)=.45678
  10     x2(i,j)=.12345
 101  continue
      return
      end
      subroutine set15(n,jj,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      s1=.333
      do 101 j=1,1000
      do 10 i=1,n,5
         x1(i,j)=.45678
  10     x2(i,j)=.12345
 101  continue
      return
      end
      subroutine kern11(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c x=x+alfa*y,  vector update
      do 10 i=1,n
  10     x1(i,j)=x1(i,j)+s1*x2(i,j)
      return
      end
      subroutine kern12(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c two vector updates
      do 10 i=1,n
         x1(i,j)=x1(i,j)+s1*x2(i,j)
  10     x3(i,j)=x3(i,j)+s2*x4(i,j)
      return
      end
      subroutine kern13(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c update + scalar vector multiply
      do 10 i=1,n
         x1(i,j)=x1(i,j)+s1*x2(i,j)
  10     x3(i,j)=s2*x2(i,j)
      return
      end
      subroutine kern14(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c x=x+alfa*y,  vector update stride 2
      do 10 i=1,n,2
  10     x1(i,j)=x1(i,j)+s1*x2(i,j)
      return
      end
      subroutine kern15(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c x=x+alfa*y,  vector update stride 5
      do 10 i=1,n,5
  10     x1(i,j)=x1(i,j)+s1*x2(i,j)
      return
      end
      subroutine kern21(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c dot product, stride 1
      s1=0.
      do 10 i=1,n
  10     s1=s1+x1(i,j)*x2(i,j)
      return
      end
      subroutine kern31(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c vector multiply
      do 10 i=1,n
  10     x3(i,j)=x1(i,j)*x2(i,j)
      return
      end
      subroutine kern32(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c vector scalar multiply
      do 10 i=1,n
  10     x3(i,j)=s1*x1(i,j)
      return
      end
      subroutine kern33(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c vector vector divide
      do 10 i=1,n
  10     x3(i,j)=x1(i,j)/x2(i,j)
      return
      end
      subroutine kern34(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c vector scalar divide
      do 10 i=1,n
  10     x3(i,j)=x1(i,j)/s1
      return
      end
      subroutine set41(n,jj,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c  elements of wang algorithm
      do 101 j=1,1000
      do 10 i=1,n
         x1(i,j)=.12345
         x2(i,j)=.23456
         x3(i,j)=.34567
 10      x4(i,j)=.45678
 101  continue
      return
      end
      subroutine kern41(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c  element of wang algorithm version b, step b1
      do 10 i=1,n
         x1(i,j)=x1(i,j)+x2(i,j)*x3(i,j)
  10     x2(i,j)=x2(i,j)*x4(i,j)
      return
      end
      subroutine kern42(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c  element of wang algorithm, step c2, bidiag system
      do 10 i=2,n
  10     x1(i,j)=x2(i,j)+x3(i,j)*x1(i-1,j)
      return
      end
      subroutine set51(n,jj,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      common/work/x5(1000,1000),x6(1000,1000)
      common/indi/in1(1000),in2(1000)
c initializing for indirect addressing
      do 10 i=1,250
         in1(2*i-1)=251-i
 10      in1(2*i)=1000 -i
      do 101 j=1,1000
      do 20 i=1,1000
         x1(i,j)=.12345
         x2(i,j)=.23456
 20   continue
 101  continue
      s1=.34567
      return
      end
      subroutine kern51(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      common/work/x5(1000,1000),x6(1000,1000)
      common/indi/in1(1000),in2(1000)
c fortran gather
      do 10 i=1,n
  10     x1(i,j)=x2(in1(i),j)
      return
      end
      subroutine kern52(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      common/work/x5(1000,1000),x6(1000,1000)
      common/indi/in1(1000),in2(1000)
c fortran scatter
      do 10 i=1,n
  10     x1(in1(i),j)=x2(i,j)
      return
      end
      subroutine kern53(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      common/work/x5(1000,1000),x6(1000,1000),xm(1000,1000,4)
      common/indi/in1(1000),in2(1000)
c  fortran gather saxpy
      do 10 i=1,n
         x1(i,j)=x1(i,j)+s1*x2(in1(i),j)
  10  continue
      return
      end
      subroutine kern54(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      common/work/x5(1000,1000),x6(1000,1000)
      common/indi/in1(1000),in2(1000)
c  fortran scatter saxpy
      do 10 i=1,n
  10     x1(in1(i),j)=x1(in1(i),j)+s1*x2(i,j)
      return
      end
      subroutine kern55(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      common/work/x5(1000,1000),x6(1000,1000)
      common/indi/in1(1000),in2(1000)
c  innerproduct with one gather
      s2=0.
      do 10 i=1,n
  10     s2=s2+x1(i,j)*x2(in1(i),j)
      return
      end
      subroutine set61(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      s1=.333
      s2=.555
      do 101 j=1,1000
      do 10 i=1,n
         x1(i,j)=sin(i/2.)
  10  continue
 101  continue
      return
      end
      subroutine kern61(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c  max of vector
      s2=-1.e29
      do 10 i=1,n
  10     s2=max(s2,x1(i,j))
      return
      end
      subroutine kern62(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c  store a scalar
      do 10 i=1,n
         x4(i,j)=s1
  10  continue
      return
      end
      subroutine kern63(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c  store a vector
      do 10 i=1,n
         x4(i,j)=x1(i,j)
  10  continue
      return
      end
      subroutine set81(n,jj,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      common/work/x5(1000,1000),x6(1000,1000)
c  loop unrolling
      do 101 j=1,1000
      do 10 i=1,n
         x1(i,j)=.12345
         x2(i,j)=.23456
         x3(i,j)=.34567
         x4(i,j)=.45678
  10     x5(i,j)=.56789
 101  continue
      return
      end
      subroutine kern81(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      common/work/x5(1000,1000),x6(1000,1000)
c  loop unrolling once
      do 10 i=1,n
  10     x1(i,j)=x1(i,j)+s1*x2(i,j)+s2*x3(i,j)
      return
      end
      subroutine kern82(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      common/work/x5(1000,1000),x6(1000,1000)
c  loop unrolling twice
      s3=.54321
      s4=.65432
      do 10 i=1,n
         x1(i,j)=x1(i,j)+s1*x2(i,j)+s2*x3(i,j)+s3*x4(i,j)+s4*x5(i,j)
  10  continue
      return
      end
      subroutine ker111(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c vector exp
      do 10 i=1,n
  10     x1(i,j)=exp(x2(i,j))
      return
      end
      subroutine ker112(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c vector sqrt
      do 10 i=1,n
  10     x1(i,j)=sqrt(x2(i,j))
      return
      end
      subroutine ker113(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c vector sin
      do 10 i=1,n
  10     x1(i,j)=sin(x2(i,j))
      return
      end
      subroutine ker114(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
c vector log
      do 10 i=1,n
  10     x1(i,j)=log(x2(i,j))
      return
      end
      subroutine dummy(n,j,x1,x2,x3,x4,s1,s2)
      implicit double precision (a-h,o-z)
      dimension x1(1000,1000),x2(1000,1000),x3(1000,1000),x4(1000,1000)
      return
      end
      subroutine timem(t)
      implicit double precision (a-h,o-z)
      real *4 tijd
c---CPU time on fujitsu vp-series
c      call clock(g,2,2)
c      t=g*1.e-6
c---CPU time on convex c-1
      t=cputime(0.)
c---CPU time on CRAY, CYBER 205
c      t=SECOND()
c---CPU time on HP1100
c     call asktim(itime)
c     t=itime/100.d0
c---CPU time om ibm 3090
c     call qlock(t)
      return
      end
      subroutine ini(iker,com2)
      implicit double precision (a-h,o-z)
      common/const/actu,bctu,cctu,dctu,ectu,t0,katu,nstat
      common/par/amat(3,3),bvec(3),tvec(1000),nregl,inext
      character com2*50
      inext=0
      do 1 j=1,3
         bvec(j)=0.
         do 1 i=1,3
            amat(i,j)=0.
 1    continue
      actu=0.
      bctu=0.
      cctu=0.
      dctu=0.
      ectu=0.
      print 10
 10   format(/,1x,50(1h-))
      print    20, iker,com2
 20   format(1x,' kernel ',i3,1x,a50)
      print   30
 30   format(/,11h       n   ,15h     time      ,10h  mflops  )
      return
      end
      subroutine keep(time,n,iker)
      implicit double precision (a-h,o-z)
      common/const/actu,bctu,cctu,dctu,ectu,t0,katu,nstat
      common/par/amat(3,3),bvec(3),tvec(1000),nregl,inext
      if (time.le.0.) time=0.0005
      flops=katu*nstat*1.d-6*n/time
      print   10, n,time,flops
  10  format(2x,i6,2x,e12.5,2x,f9.3)
      time=time/nstat
      inext=inext+1
      tvec(inext)=time
      amat(1,1)=amat(1,1)+1.
      amat(1,2)=amat(1,2)+n
      amat(1,3)=amat(1,3)+((n-1)/nregl)
      amat(2,2)=amat(2,2)+n*n
      amat(2,3)=amat(2,3)+n*((n-1)/nregl)
      amat(3,3)=amat(3,3)+((n-1)/nregl)**2
      bvec(1)=bvec(1)+time
      bvec(2)=bvec(2)+n*time
      bvec(3)=bvec(3)+((n-1)/nregl)*time
      time=time*1.e6
      actu=actu+(n/time)**2
      bctu=bctu+n/(time**2)
      cctu=cctu+(1./time)**2
      dctu=dctu+n/time
      ectu=ectu+1./time
      return
      end
      subroutine eval
      implicit double precision (a-h,o-z)
      common/const/actu,bctu,cctu,dctu,ectu,t0,katu,nstat
      common/par/amat(3,3),bvec(3),tvec(1000),nregl,inext
      beta=(ectu-bctu*dctu/actu)/(cctu-bctu**2/actu)
      alfa=(dctu-beta*bctu)/actu
      rtu=katu/alfa
      enhalf=beta/alfa
      if (enhalf .le. 1.) enhalf = 1.
      print   1,rtu,enhalf
  1   format(/,'    asymp. perf= ',f9.3,'  nhalf=',f9.1)
c  parameters including the effect of stripmining
      amat(2,2)=amat(2,2)-amat(1,2)**2/amat(1,1)
      amat(2,3)=amat(2,3)-amat(1,2)*amat(1,3)/amat(1,1)
      amat(3,3)=amat(3,3)-amat(1,3)**2/amat(1,1)
      bvec(2)=bvec(2)-amat(1,2)*bvec(1)/amat(1,1)
      bvec(3)=bvec(3)-amat(1,3)*bvec(1)/amat(1,1)
      amat(3,3)=amat(3,3)-amat(2,3)**2/amat(2,2)
      bvec(3)=bvec(3)-amat(2,3)*bvec(2)/amat(2,2)
      bvec(3)=0.
      if (amat(3,3).ne.0.)
     1    bvec(3)=bvec(3)/amat(3,3)
      bvec(2)=(bvec(2)-amat(2,3)*bvec(3))/amat(2,2)
      bvec(1)=(bvec(1)-amat(1,2)*bvec(2)-amat(1,3)*bvec(3))/amat(1,1)
      rtu=katu*1.e-6/(bvec(2)+bvec(3)/nregl)
      enhalf=bvec(1)/(bvec(2)+2.*bvec(3)/nregl)
      nhalf=enhalf
      if(nhalf.ge.nregl)
     1   enhalf=(bvec(1)+bvec(3)*(( nhalf-1)/nregl))/(bvec(2)+2.*
     2   bvec(3)/nregl)
      if(enhalf.lt.1.) enhalf =1.
      print 2,rtu,enhalf
 2    format(/'   parameters with stripmining effects included:'/,
     1         '  asymp. perf=',f9.3,' mflops,    enhalf=',f9.1)
      return
      end