(*************************************************************************) (* program GTREE version 1.0, copyright 1996 by James E. Corter *) (* new "generalized triples" algorithm for fitting additive trees *) (* *) (* GTREE is a program in the PASCAL language for fitting the additive *) (* tree model to proximity data, using the "generalized triples" *) (* algorithm of Corter (1992,1996). The data may be either similarities *) (* or distance data. A listing of tree parameters and their estimates, *) (* drawing of the tree, and summary statistics are reported. Various *) (* other output options are available. A help file accompanies this *) (* source file. See it for more information on using the program. *) (* *) (* VERSION 1.0: combine all pairs that are mutual NN in T matrix, *) (* as long as their T score is not beaten by any dominated score. *) (* Use efficient way of computing prox. matrix indices ij,ik,jk *) (* *) (* This program was written by James E. Corter. Questions or comments *) (* regarding the program may be addressed to him at: *) (* Box 41, Teachers College *) (* Columbia University, New York, NY 10027 *) (* tel. (212) 678-3843 INTERNET: jec34@columbia.edu *) (* *) (* GTREE copyright 1996 by James E. Corter *) (* *) (* last changes 12/30/96 by jec *) (*************************************************************************) program gtree(input,output,infile,outfile,sfile); Uses Dos; (* change these constants for bigger datasets, or for different compilers *) (* for IBM-PC Turbo PASCAL 6.0 version, set to 80,160,3160,(80) *) const maxobject = 80; twiceobject = 160; maxsize = 3160; (* must be at least 1/2*maxobject*(maxobject-1) *) linelength = 80; (* linelength of output device (printer, etc.) *) type grouptype = set of 1..maxobject; matrixarray = array[1..maxsize] of real; nodearray = array[1..twiceobject] of real; rootingtype = (asis,minvar,specified); var prox: matrixarray; (* input proximities *) d: matrixarray; (* transformed data distances *) numobject: integer; (* number of objects *) numprox: integer; (* number of inter-object distances *) n: integer; (* number of nodes at current iteration *) size: integer; (* number of distances at this iteration *) similarities,lowerhalf: boolean; (* similarity or distance data; lowerhalf or full matrix *) proxcode: real; (* 1.0 if distances, -1.0 if sim. *) field,decplaces: integer; (* output format control parms *) addconstant, (* additive constant for positivity and triangle inequality *) diff,maxdis,sum,sum1,sum2,sum3: real; x,y,hold,int,ij,whichgroup: integer; group: array[1..maxobject] of grouptype; (* pairs of objects to be joined at current iteration *) numgroup,nreduce: integer; (* number of such pairs *) numnode,rootnode: integer; (* number of nodes in tree; number of root node *) parent: array[1..twiceobject] of integer; (* "parent" of a node *) child: array[1..twiceobject,1..2] of integer; (* "children" of node *) length: nodearray; (* length of arc between a node and its parent *) height: nodearray; (* height (average length) of node in tree *) leafcount: array[1..twiceobject] of integer; (* number of objects in the branch determined by node *) branchset: array[1..twiceobject] of grouptype; (* keeps track of objects in each branch *) index: array[1..maxobject] of integer; (* keeps track of true node number as d matrix collapses *) use: array[1..maxobject] of boolean; (* keeps track of pairs being combined on this step *) rootarclength,rootd: real; (* length of arc root is on *) leafss: nodearray; (* holds sums-of-squares due to leaf-length for a node *) name: array[1..maxobject,1..20] of char; (* object names *) namelength: integer; (* length of longest name *) subtractconstant: boolean; (* if triang. ineq. already satisfied, subtract a constant so that it is exactly satisfied? *) rooting: rootingtype; (* three options are available for selection of root: asis=just put root at last two nodes minvar=find root with min. variance of root-to-leaf distances specified=root at specified nodes *) rootchild: array[1..2] of integer; (* for specified rooting *) printdata,printtransformed,printnumbers,specifyorder, printtree,printmodeldis,printresiduals,printheight: boolean; (* output options requests *) linesize: integer; (* output linelength for crt, etc. *) feed: integer; (* signals when output is about to run off line *) nodeposition: nodearray; (* hold calculated y-position of tree nodes*) infile,outfile: text; infilnam,outfilnam: string[12]; (* end main program variable declarations section *) (* general purpose subroutines: ijfunction, writematrix *) (************************************************************************) (* ijfunction: converts the row and column of a lowerhalf matrix *) (* to the ordinal position of that entry in a vector (matrixarray) *) (************************************************************************) function ijfunction(i:integer; j:integer): integer; begin if (i>j) then ijfunction:=(((i-1)*(i-2)) div 2) + j else if (j>i) then ijfunction:=(((j-1)*(j-2)) div 2) + i else writeln('error: i=j in ijfunction'); end; (* ijfunction *) (******************************************************************) (* writematrix: write a matrixarray as lowerhalf matrix; *) (* n = size of the matrix, *) (* field = number of digits for each entry, and *) (* decplaces = number of digits after the decimal place *) (******************************************************************) procedure writematrix(mtx: matrixarray; n,field,decplaces: integer); var row,col: integer; begin ij:=0; for row:=2 to n do begin for col:=1 to (row-1) do begin ij:=ij+1; write(outfile,mtx[ij]:field:decplaces); if (col=feed) then writeln(outfile); end; writeln(outfile); end; writeln(outfile); writeln(outfile); end; (* writematrix *) (************************************************************************) (* readin: read program control information and data *) (************************************************************************) procedure readin; var i,j,k,ij,count: integer; dumreal: real; ch: char; word: array[1..3] of char; procedure readword; (* read a word of input control keywords *) begin ch:=' '; while ((ch=' ') or (ch=',')) do if not(eoln(infile)) then read(infile,ch); i:=0; while not((ch=' ') or (ch=',') or (eoln(infile))) do begin i:=i+1; if not(i=1) then read(infile,ch); j:=ord(ch); if ((j>=65) and (j<=90)) then j:=j+32; ch:=chr(j); if not(i>3) then word[i]:=ch; end; end; (* readword *) begin (* readin *) (* initialize & set defaults *) printdata:=false; printtransformed:=false; printheight:=false; printmodeldis:=false; printresiduals:=false; printtree:=true; printnumbers:=true; specifyorder:=false; linesize:=linelength; rooting:=asis; subtractconstant:=true; (* read analysis & output option line *) while not (eoln(infile)) do begin readword; if ((word[1]='d')and(word[2]='a')and(word[3]='t')) then printdata:=true; if ((word[1]='t')and(word[2]='r')and(word[3]='a')) then printtransformed:=true; if ((word[1]='m')and(word[2]='o')and(word[3]='d')) then printmodeldis:=true; if ((word[1]='r')and(word[2]='e')and(word[3]='s')) then printresiduals:=true; if ((word[1]='n')and(word[2]='o')and(word[3]='t')) then printtree:=false; if ((word[1]='c')and(word[2]='r')and(word[3]='t')) then linesize:=80; if ((word[1]='l')and(word[2]='i')and(word[3]='n')) then read(infile,linesize); if ((word[1]='n')and(word[2]='o')and(word[3]='n')) then printnumbers:=false; if ((word[1]='s')and(word[2]='p')and(word[3]='e')) then specifyorder:=true; if ((word[1]='n')and(word[2]='o')and(word[3]='s')) then subtractconstant:=false; if ((word[1]='h')and(word[2]='e')and(word[3]='i')) then printheight:=true; if ((word[1]='m')and(word[2]='i')and(word[3]='n')) then rooting:=minvar; if ((word[1]='d')and(word[2]='e')and(word[3]='e')) then writeln(outfile,'deepest rooting no longer supported: option ignored'); if ((word[1]='r')and(word[2]='o')and(word[3]='o')) then begin rooting:=specified; read(infile,rootchild[1],rootchild[2]); end; end; readln(infile); (* if order of leaves is to be specified, then read that (optional) line *) count:=0; if (specifyorder) then begin while not(eoln(infile)) do begin count:=count+1; read(infile,i); nodeposition[i]:=count end; readln(infile); end; writeln(outfile); writeln(outfile,'additive tree analysis (GTREE version 1.0):'); (* read & write comment line *) for i:=1 to linelength do if not(eoln(infile)) then begin read(infile,ch); write(outfile,ch) end; readln(infile); writeln(outfile); writeln(outfile); writeln(outfile); (* begin reading data description line *) read(infile,numobject); if (numobject>maxobject) then writeln(outfile,'error: maximum number of objects allowed is ',maxobject:4); if ((specifyorder) and not(count=numobject)) then writeln(outfile,'error: optional line specifying order of leaves ', 'has wrong number of entries'); readword; if ((word[1]='s')and(word[2]='i')and(word[3]='m')) then similarities:=true else if ((word[1]='d')and(word[2]='i')and(word[3]='s')) then similarities:=false else writeln(outfile,' can''t tell if similarities or distances'); readword; if ((word[1]='l')and(word[2]='o')and(word[3]='w')) then lowerhalf:=true else if ((word[1]='f')and(word[2]='u')and(word[3]='l')) then lowerhalf:=false else writeln(outfile,' can''t tell if full or lowerhalf matrix'); if not(eoln(infile)) then read(infile,field) else writeln(outfile,' can''t find field-width specification'); if not(eoln(infile)) then read(infile,decplaces) else writeln(outfile,' can''t find digits-after-decimal-point specification'); readln(infile); numprox:=(numobject*(numobject-1)) div 2; feed:= linesize div field; (* begin reading actual data *) (* read full matrix, treating as asymmetric; print data if requested *) if (lowerhalf=false) then begin if (printdata=true) then writeln(outfile,'data matrix:'); for ij:=1 to maxsize do prox[ij]:=0.0; for i:=1 to numobject do begin for j:=1 to numobject do begin if (eoln(infile)) then readln(infile); read(infile,dumreal); if (printdata) then write(outfile,dumreal: field: decplaces); if ((printdata) and (j=feed)) then writeln(outfile); if not(i=j) then begin ij:=ijfunction(i,j); prox[ij]:=prox[ij]+(dumreal/2) end; end; readln(infile); if (printdata) then writeln(outfile); end; writeln(outfile); writeln(outfile,'full matrix symmetrized'); writeln(outfile); writeln(outfile); end; (* read lowerhalf matrix *) if (lowerhalf) then begin ij:=0; for i:=2 to numobject do begin if (eoln(infile)) then readln(infile); for j:=1 to (i-1) do begin ij:=ij+1; if (eoln(infile)) then readln(infile); read(infile,prox[ij]); end; end; readln(infile); end; (* read in names if they are given *) j:=1; namelength:=0; while not((eof(infile)) or (j>numobject)) do begin i:=1; while not((eoln(infile)) or (i>20)) do begin read(infile,name[j,i]); i:=i+1; end; k:=i-1; if not(k=0) then while (name[j,k]=' ') do k:=k-1; if (namelengthmaxdis) then maxdis:=prox[ij]; end; if (min<0.0) then addconstant:=-min; writeln(outfile); writeln(outfile); writeln(outfile,'(',addconstant:field:decplaces, ' needed for positivity of distances )'); (* check every triple of objects for triangle inequality *) min:=maxdis; xxij:=1; for i:=1 to (numobject-2) do begin xxij:=xxij+i-1; ij:=xxij; xxjk:=xxij; for j:=(i+1) to (numobject-1) do begin ij:=ij+(j-2); ik:=ij; xxjk:=xxjk+j-1; jk:=xxjk; for k:=(j+1) to (numobject) do begin ik:=ik+(k-2); jk:=jk+(k-2); if ((prox[jk]>=prox[ij]) and (prox[jk]>=prox[ik])) then diff:=(prox[ij]+prox[ik])-prox[jk] else if ((prox[ik]>=prox[ij]) and (prox[ik]>=prox[jk])) then diff:=(prox[ij]+prox[jk])-prox[ik] else diff:=(prox[ik]+prox[jk])-prox[ij]; if (diffloserscore) then begin if (tscore[ij]>rowsum[i]) then begin holdi:=rowsum[i]; rowsum[i]:=tscore[ij]; nn[i]:=j end else loserscore:=tscore[ij]; if (tscore[ij]>rowsum[j]) then begin holdj:=rowsum[j]; rowsum[j]:=tscore[ij]; nn[j]:=i end else loserscore:=tscore[ij]; if (loserscoreloserscore)) then begin numgroup:=numgroup+1; group[numgroup]:=[i]+[j]; end; if (numgroup=0) then writeln('error: no MNN pairs in T matrix'); end; (* findgroups *) (*************************************************************************) (* makenode: using list of groups from "findgroups", make new nodes from *) (* groups. Compute length of arcs between new node and its *) (* children, and define distances from new node as weighted *) (* average of distances from children. *) (*************************************************************************) procedure makenode; var i,j,k,ij,node,holdcount,sumcount: integer; nodegroup: array[1..2] of integer; (* holds pair being made into node *) procedure estimate; (* compute arc lengths *) var sum,avedis,diff: array[1..2] of real; dis,avelength: real; i,j,holdcount: integer; begin ij:=ijfunction(nodegroup[1],nodegroup[2]); dis:=d[ij]; for i:=1 to 2 do begin holdcount:=0; sum[i]:=0.0; for j:=1 to n do if (use[j]) then begin ij:=ijfunction(nodegroup[i],j); sum[i]:=sum[i]+leafcount[index[j]]*d[ij]; holdcount:=holdcount+leafcount[index[j]]; end; avedis[i]:=sum[i]/holdcount; end; avelength:=(avedis[1]+avedis[2])/2; for i:=1 to 2 do begin diff[i]:=avedis[i]-avelength; length[index[nodegroup[i]]]:=dis/2+diff[i]-height[index[nodegroup[i]]]; if (length[index[nodegroup[i]]]<0.0) then length[index[nodegroup[i]]]:=0.0; end; end; (* estimate *) begin (* makenode *) numnode:=numnode+1; node:=numnode; k:=0; for i:=1 to n do if (i in group[whichgroup]) then begin k:=k+1; use[i]:=false; parent[index[i]]:=node; nodegroup[k]:=i end; if not(k=2) then writeln(outfile,'error: wrong number of children: node',node:4); for i:=1 to k do begin leafcount[node]:=leafcount[node]+leafcount[index[nodegroup[i]]]; branchset[node]:=branchset[node]+branchset[index[nodegroup[i]]]; end; (* order children according to specified or input order *) if (nodeposition[index[nodegroup[2]]]rootarclength) then diff:=rootarclength; if (diff<-rootarclength) then diff:=-rootarclength; length[child[rootnode,1]]:=(rootarclength-diff)/2.0; length[child[rootnode,2]]:=(rootarclength+diff)/2.0; sum1:=height[child[rootnode,1]]+length[child[rootnode,1]]; sum2:=height[child[rootnode,2]]+length[child[rootnode,2]]; height[rootnode]:=((sum1*leafcount[child[rootnode,1]]) +(sum2*leafcount[child[rootnode,2]])) / leafcount[rootnode]; leafss[rootnode]:=leafss[child[rootnode,1]]+leafss[child[rootnode,2]] +leafcount[child[rootnode,1]]*sqr(height[rootnode]-sum1) +leafcount[child[rootnode,2]]*sqr(height[rootnode]-sum2); if (rooting=minvar) then begin writeln(outfile); writeln(outfile,'var. of dis. from root to leaves =',leafss[rootnode]:9:4); end; end; (* hangroot *) (************************************************************************) (* findbestroot: find optimal rooting of the tree *) (* (for rooting=minvar or rooting=specified) *) (************************************************************************) procedure findbestroot; var parentlength,parentheight,parentss,minscore,psum,bsum,othersum,fooheight, newparentheight,newparentss,foo1,foo2: real; score: nodearray; parentcount,newparentcount,goodnode,badnode,goodchild,badchild, ch1,ch2: integer; foundroot: boolean; procedure checkchild(gp,bp,nde,othernde: integer); begin if ((nde>0) and (othernde>0)) then begin if (rooting=specified) then if ((branchset[rootchild[1]]*branchset[nde]<>[]) and (branchset[rootchild[2]]*branchset[nde]<>[])) then score[nde]:=-1.0 else score[nde]:=0.0; if (rooting=minvar) then begin psum:=parentheight+parentlength; othersum:=height[othernde]+length[othernde]; newparentcount:=parentcount+leafcount[othernde]; newparentheight:=((psum*parentcount) + (othersum*leafcount[othernde])) / newparentcount; newparentss:=parentss+leafss[othernde] +(parentcount*sqr(psum-newparentheight)) +(leafcount[othernde]*sqr(othersum-newparentheight)); diff:=height[nde]-newparentheight; if (diff>length[nde]) then diff:=length[nde]; if (diff<-length[nde]) then diff:=-length[nde]; foo1:=(length[nde]-diff)/2.0; foo2:=(length[nde]+diff)/2.0; sum1:=height[nde]+foo1; sum2:=newparentheight+foo2; fooheight:=((sum1*leafcount[nde])+(sum2*newparentcount))/numobject; score[nde]:=leafss[nde]+newparentss +leafcount[nde]*sqr(fooheight-sum1) +newparentcount*sqr(fooheight-sum2); writeln(outfile,'node',nde:3,' score=',score[nde]:8:3); end; (* minvar section *) if (score[nde]',nde:3,')'); parent[nde]:=rootnode; parent[bp]:=gp; length[bp]:=rootarclength; branchset[gp]:=branchset[bp]+branchset[othernde]; leafcount[gp]:=leafcount[bp]+leafcount[othernde]; height[gp]:=((leafcount[bp]*(height[bp]+length[bp])) +(leafcount[othernde]*(height[othernde]+length[othernde]))) / leafcount[gp]; othersum:=height[bp]+rootarclength; bsum:=height[othernde]+length[othernde]; leafss[gp]:=leafss[bp]+leafss[othernde] +(leafcount[bp]*sqr(othersum-height[gp])) +(leafcount[othernde]*sqr(bsum-height[gp])); nodeposition[gp]:=((leafcount[bp]*nodeposition[bp]) +(leafcount[othernde]*nodeposition[othernde])) / leafcount[gp]; if (nodeposition[gp]=score[rootnode]) then foundroot:=true else changeroot(goodnode,badnode,goodchild,badchild); until (foundroot); end; (* findbestroot *) (************************************************************************) (* report: write tree structure and arc-length estimates; draw tree *) (************************************************************************) procedure report; var i,j: integer; procedure drawtree; const vertchar = '|'; (* change if desired *) horizchar = '-'; var i,j,x,y,start,stop,maxstop,node: integer; maxheight, stretch: real; line: array[1..linelength] of char; yposition: array[1..twiceobject] of integer; topkid,bottomkid: array[1..twiceobject] of integer; numleaf: integer; (* descend calculates: vertical position of a branch ("yposition") horizontal position of branch ("height") endpoints of vertical connecting lines ("topkid","bottomkid") *) procedure descend(nde: integer); var i: integer; begin if not(nde=rootnode) then height[nde]:=height[parent[nde]]+length[nde]; if (nde<=numobject) (* if nde is a leaf *) then begin numleaf:=numleaf+1; yposition[nde]:=(numleaf*2)-1 end else begin (* if nde is an interior node *) for i:=1 to 2 do begin descend(child[nde,i]); yposition[nde]:=yposition[nde]+ (yposition[child[nde,i]]*leafcount[child[nde,i]]); end; yposition[nde]:=yposition[nde] div leafcount[nde]; topkid[nde]:=yposition[child[nde,1]]; bottomkid[nde]:=yposition[child[nde,2]] end; end; (* descend *) begin (* drawtree *) height[rootnode]:=0.0; numleaf:=0; for i:=(numobject+1) to numnode do yposition[i]:=0; descend(rootnode); if not(numobject=numleaf) then writeln(outfile,'error in descend, numleaf=',numleaf); maxheight:=0.0; for i:=1 to numobject do if (height[i]>maxheight) then maxheight:=height[i]; stretch:=(linesize-namelength-8)/maxheight; (* write tree arcs (horizontal lines) *) for y:=1 to (2*numobject) do begin maxstop:=0; for i:=1 to linesize do line[i]:=' '; for node:=1 to numnode do begin if (yposition[node]=y) then begin if (parent[node]<>0) then start:=trunc(stretch*height[parent[node]])+1 else start:=1; stop:=trunc(stretch*height[node])+1; if (stop>maxstop) then maxstop:=stop; if (stop>linesize) then stop:=linesize; for x:=start to stop do line[x]:=horizchar end; (* place vertical conecting lines *) if (node>numobject) then if ((y>topkid[node]) and (ymaxstop) then maxstop:=stop; line[stop]:=vertchar; end; end; (* loop on node *) for i:=1 to maxstop do write(outfile,line[i]); for i:=1 to numobject do if (yposition[i]=y) then begin if (printnumbers) then write(outfile,i:4,' ') else write(outfile,' '); for j:=1 to namelength do write(outfile,name[i,j]); end; writeln(outfile); end; end; (* drawtree *) begin (* report *) writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile,'node length children'); for i:=1 to numnode do begin write(outfile,i:4,' '); write(outfile,length[i]:field:decplaces,' '); if (i<=numobject) then begin write(outfile,' '); for j:=1 to 20 do write(outfile,name[i,j]) end else for j:=1 to 2 do write(outfile,child[i,j]:3); writeln(outfile); end; writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile); if (printtree) then drawtree; writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile); if (printheight) then begin writeln(outfile,'distance of each node from the root:'); writeln(outfile,'node height'); for i:=1 to numnode do writeln(outfile,i:3,' ',height[i]:field:decplaces); writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile); end; end; (* report *) (************************************************************************) (* stats: calculate stress(1), stress(2), squared monotonic and linear *) (* correlations *) (************************************************************************) procedure stats; var modeldis: matrixarray; c: array [1..maxsize] of integer; procedure derivedistances; var i,j,k,ij: integer; begin ij:=0; for i:=2 to numobject do begin for j:=1 to (i-1) do begin ij:=ij+1; modeldis[ij]:=0.0; for k:=1 to numnode do if (((i in branchset[k]) and not(j in branchset[k])) or ((j in branchset[k]) and not(i in branchset[k]))) then modeldis[ij]:=modeldis[ij]+length[k]; end; end; end; (* derivedistances *) procedure quicksort(lowerbound,upperbound: integer); begin if (upperbound>lowerbound+1) then begin x:=lowerbound+1; y:=upperbound; while (x<=y) do if (prox[c[x]]<=prox[c[lowerbound]]) then x:=x+1 else begin while (prox[c[y]]>prox[c[lowerbound]]) do y:=y-1; if (xprox[c[upperbound]]) then begin hold:=c[lowerbound]; c[lowerbound]:=c[upperbound]; c[upperbound]:=hold; end; end; (* quicksort *) procedure lsmonotransform(var modeldis,d: matrixarray); var i,j,place,ordered: integer; bin: real; endcondition: integer; begin repeat ordered:=1; i:=1; repeat if (d[c[i+1]](bin/(place-i+1))) then endcondition:=1; until (endcondition=1); for j:=i to place do d[c[j]]:=(bin/(place-i+1)); i:=place end; i:=i+1 until (i>=numprox); until (ordered=1); end; (* lsmonotransform *) procedure stresscalc(modeldis,d: matrixarray); var i: integer; stress,stress1,stress2,denom, varn,varob,nmean,obmean,propor,rmonsq: real; begin stress:=0.0; denom:=0.0; varn:=0.0; varob:=0.0; nmean:=0.0; obmean:=0.0; propor:=0.0; for i:=1 to numprox do begin obmean:=obmean+prox[i]; nmean:=nmean+modeldis[i] end; nmean:=nmean/numprox; obmean:=obmean/numprox; for i:=1 to numprox do begin stress:=stress+sqr(d[i]-modeldis[i]); propor:=propor+(prox[i]*modeldis[i]); denom:=denom+sqr(modeldis[i]); varn:=varn+sqr(modeldis[i]-nmean); varob:=varob+sqr(prox[i]-obmean); end; stress1:=sqrt(stress/denom); stress2:=sqrt(stress/varn); rmonsq:=1.0-(stress/varn); varn:=varn/numprox; varob:=varob/numprox; propor:=(sqr(propor/numprox - nmean*obmean))/(varn*varob); writeln(outfile,'stress formula 1 =',stress1:7:4); writeln(outfile,'stress formula 2 =',stress2:7:4); writeln(outfile,'r(monotonic) squared=',rmonsq:6:4); writeln(outfile,'r-squared (p.v.a.f.)=',propor:6:4); writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile); end; (* stresscalc *) begin (* stats *) derivedistances; (* re-use arrays c & d for lsmt transform of model distances *) for ij:=1 to numprox do begin d[ij]:=modeldis[ij]; c[ij]:=ij end; quicksort(1,numprox); lsmonotransform(modeldis,d); stresscalc(modeldis,d); if (printmodeldis) then begin writeln(outfile,'model distances:'); writematrix(modeldis,numobject,field,decplaces); end; if (printresiduals) then begin for int:=1 to numprox do modeldis[int]:=prox[int]-modeldis[int]; writeln(outfile,'residual distances:'); writematrix(modeldis,numobject,field,decplaces); end; end; (* stats *) (*********************** start of main program *****************************) begin (* main *) writeln; writeln('GTREE release version 1.0'); writeln; (* IBM-PC Turbo PASCAL open statements *) writeln('enter input file name:'); readln(infilnam); writeln('enter output file name:'); readln(outfilnam); assign(infile,infilnam); assign(outfile,outfilnam); (* IBM-PC Turbo PASCAL compiler directive to increase stack size *) {$M 65520,0,655360} reset(infile); rewrite(outfile); writeln; writeln('reading data'); readin; if (lowerhalf and printdata) then begin writeln(outfile,'data matrix:'); writematrix(prox,numobject,field,decplaces); end; chkmetric; if (printtransformed) then begin writeln(outfile,'transformed data distances:'); writematrix(prox,numobject,field,decplaces); end; initialize; write('fitting tree..'); (* main clustering loop **********************************) while (n>=4) do begin numgroup:=0; nreduce:=0; findgroups; for int:=1 to n do use[int]:=true; for whichgroup:=1 to numgroup do makenode; cleanup end; (* end main clustering loop ******************************) (* choose a root for the tree (now there are n=3 or n=2 nodes left) *) if (n=3) then begin numgroup:=1; whichgroup:=1; if (height[index[1]]>height[index[2]]) and (height[index[1]]>height[index[3]]) then group[1]:=[2]+[3] else if (height[index[2]]>height[index[1]]) and (height[index[2]]>height[index[3]]) then group[1]:=[1]+[3] else group[1]:=[1]+[2]; for int:=1 to 3 do use[int]:=true; makenode; cleanup; end; (* now n=2 *) makeroot; hangroot; if not(rooting=asis) then begin findbestroot; (* specified rooting or minvarroot *) hangroot end; (* end root-choosing section *) writeln; report; writeln('calculating fit statistics'); writeln; stats; close(infile); close(outfile); writeln('end gtree run'); end. (* main *)