(******************** ADDTREE/P version 1.5 *******************************) (* *) (* ADDTREE/P copyright 1981-1995 by James E. Corter *) (* All rights reserved. *) (* *) (* ADDTREE/P is a program in the PASCAL language for fitting additive *) (* trees to proximity data, using a variant (Corter, 1982) of the *) (* algorithm described by Sattath and Tversky (1977). Differences *) (* between the original Sattath & Tversky algorithm and the variant used *) (* here are described more fully in Corter (1992). *) (* The input data to ADDTREE/P may be either similarity or distance data. *) (* A listing of parameters and their estimates, drawing of the tree graph,*) (* and summary statistics are reported. Various other output options are *) (* available. A help file accompanies this source file. See it for more *) (* information on using the program. *) (* ADDTREE/P was written by James E. Corter. Questions or comments *) (* regarding the program may be addressed to him at: *) (* INTERNET: jec34@columbia.edu *) (* Box 41, Teachers College, Columbia University, New York, NY 10027 *) (* *) (* NOTICE REGARDING RETRIEVAL AND USE OF ADDTREE/P AND ALL ASSOCIATED *) (* MATERIAL: *) (* By retrieving or using any of the files in this directory *) (* pertaining to ADDTREE/P, you are agreeing to the following *) (* terms and conditions: *) (* *) (* 1. All the material you retrieved is and will remain the property *) (* of the author, James E. Corter. It is provided only for not-for- *) (* profit research and teaching purposes. The material to be supplied *) (* is not to be published or redistributed, with or without modifi- *) (* cation, or resold, or used for any business or commercial purpose *) (* without the express written permission of the author. *) (* *) (* 2. THIS MATERIAL IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR *) (* IMPLIED WARRANTY. IN PARTICULAR, BUT WITHOUT LIMITATION, NEITHER THE*) (* AUTHOR NOR AT&T MAKE ANY REPRESENTATION OR WARRANTY OF ANY KIND *) (* CONCERNING THE MERCHANTABILITY OF THIS MATERIAL, ITS FITNESS FOR ANY *) (* PARTICULAR PURPOSE, ITS FREEDOM FROM INFRINGEMENT OF ANY PATENT OR *) (* COPYRIGHT, ITS ACCURACY, OR PROVISION OF TECHNICAL SUPPORT. *) (* *) (* ADDTREE/P version 1.5: *) (* changes for version 1.2: -quicksort used instead of bubble sort *) (* -options for choosing of root: default is *) (* to leave as is. Optionally: find root that *) (* minimizes variance of distances from *) (* root to leaves, root betw. specified nodes *) (* -make specif. of leaf order more robust *) (* changes for version 1.3: -warns if data set is too big *) (* -superfluous error-warning stmnt. removed *) (* (stalemate warning in "findgroups") *) (* changes for version 1.4: -function ijfunction used only when necess. *) (* -replace rooting proc. => proc. "hangroot" *) (* (don't tripleroot, omit "deepestroot" option) *) (* -subtract constant to exactly satisfy t.i. *) (* -"findbestroot" doesn't search exhaustively *) (* changes for version 1.5: -IO stuff, efficiency improvements, info *) (* messages on execution progress added *) (* *) (**************************************************************************) program addtreep(input,output,infile,outfile); (* change these constants for bigger datasets, or for different compilers *) (* for IBM-PC Turbo PASCAL 6.0 version, set to 80,160,3160,(80) *) const maxobject = 80; twiceobject = 160; maxsize = 3160; (* must be at least 1/2*maxobject*(maxobject-1) *) linelength = 80; (* linelength of output device (printer, etc.) *) type grouptype = set of 1..maxobject; matrixarray = array[1..maxsize] of real; nodearray = array[1..twiceobject] of real; rootingtype = (asis,minvar,specified); var prox: matrixarray; (* input proximities *) d: matrixarray; (* transformed data distances *) c: array[1..maxsize] of integer; (* neighbor-count matrix *) numobject: integer; (* number of objects *) numprox: integer; (* number of inter-object distances *) n: integer; (* number of nodes at current iteration *) size: integer; (* number of distances at this iteration *) similarities,lowerhalf: boolean; (* similarity or distance data; lowerhalf or full matrix *) proxcode: real; (* 1.0 if distances, -1.0 if sim. *) field,decplaces: integer; (* output format control parms *) addconstant, (* additive constant for positivity and triangle inequality *) diff,maxdis,sum,sum1,sum2,sum3: real; x,y,hold,int,ij,whichgroup: integer; group: array[1..maxobject] of grouptype; (* pairs of objects to be joined at current iteration *) numgroup,nreduce: integer; (* number of such pairs *) numnode,rootnode: integer; (* number of nodes in tree; number of root node *) parent: array[1..twiceobject] of integer; (* "parent" of a node *) child: array[1..twiceobject,1..2] of integer; (* "children" of node *) length: nodearray; (* length of arc between a node and its parent *) height: nodearray; (* height (average length) of node in tree *) leafcount: array[1..twiceobject] of integer; (* number of objects in the branch determined by node *) branchset: array[1..twiceobject] of grouptype; (* keeps track of objects in each branch *) index: array[1..maxobject] of integer; (* keeps track of true node number as d matrix collapses *) use: array[1..maxobject] of boolean; (* keeps track of pairs being combined on this step *) rootarclength,rootd: real; (* length of arc root is on *) leafss: nodearray; (* holds sums-of-squares due to leaf-length for a node *) name: array[1..maxobject,1..20] of char; (* object names *) namelength: integer; (* length of longest name *) subtractconstant: boolean; (* if triang. ineq. already satisfied, subtract a constant so that it is exactly satisfied? *) rooting: rootingtype; (* three options are available for selection of root: asis=just put root at last two nodes minvar=find root with min. variance of root-to-leaf distances specified=root at specified nodes *) rootchild: array[1..2] of integer; (* for specified rooting *) printdata,printtransformed,printnumbers,specifyorder, printtree,printmodeldis,printresiduals,printheight: boolean; (* output options requests *) linesize: integer; (* output linelength for crt, etc. *) feed: integer; (* signals when output is about to run off line *) nodeposition: nodearray; (* hold calculated y-position of tree nodes*) infile,outfile: text; infilnam,outfilnam: string[24]; (* general purpose subroutines: ijfunction, writematrix *) (************************************************************************) (* ijfunction: converts the row and column of a lowerhalf matrix *) (* to the ordinal position of that entry in a vector (matrixarray) *) (************************************************************************) function ijfunction(i:integer; j:integer): integer; begin if (i>j) then ijfunction:=(((i-1)*(i-2)) div 2) + j else if (j>i) then ijfunction:=(((j-1)*(j-2)) div 2) + i else writeln('error: i=j in ijfunction'); end; (* ijfunction *) (******************************************************************) (* writematrix: write a matrixarray as lowerhalf matrix; *) (* n = size of the matrix, *) (* field = number of digits for each entry, and *) (* decplaces = number of digits after the decimal place *) (******************************************************************) procedure writematrix(mtx: matrixarray; n,field,decplaces: integer); var row,col: integer; begin ij:=0; for row:=2 to n do begin for col:=1 to (row-1) do begin ij:=ij+1; write(outfile,mtx[ij]:field:decplaces); if (col=feed) then writeln(outfile); end; writeln(outfile); end; writeln(outfile); writeln(outfile); end; (* writematrix *) (************************************************************************) (* readin: read program control information and data *) (************************************************************************) procedure readin; var i,j,k,ij,count: integer; dumreal: real; ch: char; word: array[1..3] of char; procedure readword; (* read a word of input control keywords *) begin ch:=' '; while ((ch=' ') or (ch=',')) do if not(eoln(infile)) then read(infile,ch); i:=0; while not((ch=' ') or (ch=',') or (eoln(infile))) do begin i:=i+1; if not(i=1) then read(infile,ch); j:=ord(ch); if ((j>=65) and (j<=90)) then j:=j+32; ch:=chr(j); if not(i>3) then word[i]:=ch; end; end; (* readword *) begin (* readin *) (* initialize & set defaults *) printdata:=false; printtransformed:=false; printheight:=false; printmodeldis:=false; printresiduals:=false; printtree:=true; printnumbers:=true; specifyorder:=false; linesize:=linelength; rooting:=asis; subtractconstant:=true; (* read analysis & output option line *) while not (eoln(infile)) do begin readword; if ((word[1]='d')and(word[2]='a')and(word[3]='t')) then printdata:=true; if ((word[1]='t')and(word[2]='r')and(word[3]='a')) then printtransformed:=true; if ((word[1]='m')and(word[2]='o')and(word[3]='d')) then printmodeldis:=true; if ((word[1]='r')and(word[2]='e')and(word[3]='s')) then printresiduals:=true; if ((word[1]='n')and(word[2]='o')and(word[3]='t')) then printtree:=false; if ((word[1]='c')and(word[2]='r')and(word[3]='t')) then linesize:=80; if ((word[1]='n')and(word[2]='o')and(word[3]='n')) then printnumbers:=false; if ((word[1]='s')and(word[2]='p')and(word[3]='e')) then specifyorder:=true; if ((word[1]='n')and(word[2]='o')and(word[3]='s')) then subtractconstant:=false; if ((word[1]='h')and(word[2]='e')and(word[3]='i')) then printheight:=true; if ((word[1]='m')and(word[2]='i')and(word[3]='n')) then rooting:=minvar; if ((word[1]='d')and(word[2]='e')and(word[3]='e')) then writeln(outfile,'deepest rooting no longer supported: option ignored'); if ((word[1]='r')and(word[2]='o')and(word[3]='o')) then begin rooting:=specified; read(infile,rootchild[1],rootchild[2]); end; end; readln(infile); (* if order of leaves is to be specified, then read that (optional) line *) count:=0; if (specifyorder) then begin while not(eoln(infile)) do begin count:=count+1; read(infile,i); nodeposition[i]:=count end; readln(infile); end; writeln(outfile); writeln(outfile,'addtree analysis (ADDTREE/P version 1.5):'); (* read & write comment line *) for i:=1 to linelength do if not(eoln(infile)) then begin read(infile,ch); write(outfile,ch) end; readln(infile); writeln(outfile); writeln(outfile); writeln(outfile); (* begin reading data description line *) read(infile,numobject); if (numobject>maxobject) then writeln(outfile,'error: maximum number of objects allowed is ',maxobject:4); if ((specifyorder) and not(count=numobject)) then writeln(outfile,'error: optional line specifying order of leaves ', 'has wrong number of entries'); readword; if ((word[1]='s')and(word[2]='i')and(word[3]='m')) then similarities:=true else if ((word[1]='d')and(word[2]='i')and(word[3]='s')) then similarities:=false else writeln(outfile,' can''t tell if similarities or distances'); readword; if ((word[1]='l')and(word[2]='o')and(word[3]='w')) then lowerhalf:=true else if ((word[1]='f')and(word[2]='u')and(word[3]='l')) then lowerhalf:=false else writeln(outfile,' can''t tell if full or lowerhalf matrix'); if not(eoln(infile)) then read(infile,field) else writeln(outfile,' can''t find field-width specification'); if not(eoln(infile)) then read(infile,decplaces) else writeln(outfile,' can''t find digits-after-decimal-point specification'); readln(infile); numprox:=(numobject*(numobject-1)) div 2; feed:= linesize div field; (* begin reading actual data *) (* read full matrix, treating as asymmetric; print data if requested *) if (lowerhalf=false) then begin if (printdata=true) then writeln(outfile,'data matrix:'); for ij:=1 to maxsize do prox[ij]:=0.0; for i:=1 to numobject do begin for j:=1 to numobject do begin if (eoln(infile)) then readln(infile); read(infile,dumreal); if (printdata) then write(outfile,dumreal: field: decplaces); if ((printdata) and (j=feed)) then writeln(outfile); if not(i=j) then begin ij:=ijfunction(i,j); prox[ij]:=prox[ij]+(dumreal/2) end; end; readln(infile); if (printdata) then writeln(outfile); end; writeln(outfile); writeln(outfile,'full matrix symmetrized'); writeln(outfile); writeln(outfile); end; (* read lowerhalf matrix *) if (lowerhalf) then begin ij:=0; for i:=2 to numobject do begin if (eoln(infile)) then readln(infile); for j:=1 to (i-1) do begin ij:=ij+1; if (eoln(infile)) then readln(infile); read(infile,prox[ij]); end; end; readln(infile); end; (* read in names if they are given *) j:=1; namelength:=0; while not((eof(infile)) or (j>numobject)) do begin i:=1; while not((eoln(infile)) or (i>20)) do begin read(infile,name[j,i]); i:=i+1; end; k:=i-1; if not(k=0) then while (name[j,k]=' ') do k:=k-1; if (namelengthmaxdis) then maxdis:=prox[ij]; end; if (min<0.0) then addconstant:=-min; writeln(outfile); writeln(outfile); writeln(outfile,'(',addconstant:field:decplaces, ' needed for positivity of distances )'); (* check every triple of objects for triangle inequality *) min:=maxdis; xxij:=1; for i:=1 to (numobject-2) do begin xxij:=xxij+i-1; ij:=xxij; xxjk:=xxij; for j:=(i+1) to (numobject-1) do begin ij:=ij+(j-2); ik:=ij; xxjk:=xxjk+j-1; jk:=xxjk; for k:=(j+1) to (numobject) do begin ik:=ik+(k-2); jk:=jk+(k-2); if ((prox[jk]>=prox[ij]) and (prox[jk]>=prox[ik])) then diff:=(prox[ij]+prox[ik])-prox[jk] else if ((prox[ik]>=prox[ij]) and (prox[ik]>=prox[jk])) then diff:=(prox[ij]+prox[jk])-prox[ik] else diff:=(prox[ik]+prox[jk])-prox[ij]; if (diffmaxloser) then maxloser:=losercount; end; (* findcloserneighbor *) begin (* findgroups *) (* find nearest-neighbors of each object & calc. rowsumdis. for tiebreaking *) holdcount:=0; for i:=1 to n do begin rowmaxcount:=-1; rowsumdistance[i]:=0.0; for j:=1 to n do if not(i=j) then begin ij:=ijfunction(i,j); rowsumdistance[i]:=rowsumdistance[i]+d[ij]; if (c[ij]>rowmaxcount) then begin rowmaxcount:=c[ij]; numnn:=1; neighbors[1]:=j; end else if (c[ij]=rowmaxcount) then begin numnn:=numnn+1; neighbors[numnn]:=j; end; end; (* loop on j *) (* check if any pair made from a new neighbor of i is already on list *) for new:=1 to numnn do begin goodone[new]:=true; for k:=1 to holdcount do if (([i]+[neighbors[new]])=([pair[k,1]]+[pair[k,2]])) then goodone[new]:=false; end; numnew:=0; for new:=1 to numnn do begin if (goodone[new]) then begin numnew:=numnew+1; pair[holdcount+numnew,1]:=i; pair[holdcount+numnew,2]:=neighbors[new]; end; end; holdcount:=holdcount+numnew; end; (* loop on each object, i *) for i:=1 to holdcount do goodone[i]:=true; maxloser:=0; (* compare every pair of pairs with a common member to see which is better *) for j:=2 to holdcount do for i:=1 to (j-1) do begin pairi:=[pair[i,1]]+[pair[i,2]]; pairj:=[pair[j,1]]+[pair[j,2]]; common:= pairi*pairj; if (common<>[]) (* if pairs overlap, then resolve by l.s. test *) then begin otheri:=pairi-common; otherj:=pairj-common; for int:=1 to n do begin if (int in common) then com:=int; if (int in otheri) then oth1:=int; if (int in otherj) then oth2:=int; end; findcloserneighbor(com,oth1,oth2); if (neighbor=oth1) then goodone[j]:=false else goodone[i]:=false; end; end; (* loop on pairs j,i of object pairs *) (* now each object i should have only a single nearest neighbor *) numgroup:=0; maxgood:=0; for i:=1 to holdcount do begin ij:=ijfunction(pair[i,1],pair[i,2]); if (goodone[i] and (c[ij]>maxgood)) then maxgood:=c[ij]; if ((goodone[i]) and (c[ij]>=maxloser)) then begin numgroup:=numgroup+1; group[numgroup]:=[pair[i,1]]+[pair[i,2]]; end; end; if (numgroup=0) (* if no good pair was > maxloser, take max good one(s) *) then for i:=1 to holdcount do if goodone[i] then begin ij:=ijfunction(pair[i,1],pair[i,2]); if (c[ij]=maxgood) then begin numgroup:=numgroup+1; group[numgroup]:=[pair[i,1]]+[pair[i,2]] end; end; end; (* findgroups *) (*************************************************************************) (* makenode: using list of groups from "findgroups", make new nodes from *) (* groups. Compute length of arcs between new node and its *) (* children, and define distances from new node as weighted *) (* average of distances from children. *) (*************************************************************************) procedure makenode; var i,j,k,ij,node,holdcount,sumcount: integer; nodegroup: array[1..2] of integer; (* holds pair being made into node *) procedure estimate; (* compute arc lengths *) var sum,avedis,diff: array[1..2] of real; dis,avelength: real; i,j,holdcount: integer; begin ij:=ijfunction(nodegroup[1],nodegroup[2]); dis:=d[ij]; for i:=1 to 2 do begin holdcount:=0; sum[i]:=0.0; for j:=1 to n do if (use[j]) then begin ij:=ijfunction(nodegroup[i],j); sum[i]:=sum[i]+leafcount[index[j]]*d[ij]; holdcount:=holdcount+leafcount[index[j]]; end; avedis[i]:=sum[i]/holdcount; end; avelength:=(avedis[1]+avedis[2])/2; for i:=1 to 2 do begin diff[i]:=avedis[i]-avelength; length[index[nodegroup[i]]]:=dis/2+diff[i]-height[index[nodegroup[i]]]; if (length[index[nodegroup[i]]]<0.0) then length[index[nodegroup[i]]]:=0.0; end; end; (* estimate *) begin (* makenode *) numnode:=numnode+1; node:=numnode; k:=0; for i:=1 to n do if (i in group[whichgroup]) then begin k:=k+1; use[i]:=false; parent[index[i]]:=node; nodegroup[k]:=i end; if not(k=2) then writeln(outfile,'error: wrong number of children: node',node:4); for i:=1 to k do begin leafcount[node]:=leafcount[node]+leafcount[index[nodegroup[i]]]; branchset[node]:=branchset[node]+branchset[index[nodegroup[i]]]; end; (* order children according to specified or input order *) if (nodeposition[index[nodegroup[2]]]rootarclength) then diff:=rootarclength; if (diff<-rootarclength) then diff:=-rootarclength; length[child[rootnode,1]]:=(rootarclength-diff)/2.0; length[child[rootnode,2]]:=(rootarclength+diff)/2.0; sum1:=height[child[rootnode,1]]+length[child[rootnode,1]]; sum2:=height[child[rootnode,2]]+length[child[rootnode,2]]; height[rootnode]:=((sum1*leafcount[child[rootnode,1]]) +(sum2*leafcount[child[rootnode,2]])) / leafcount[rootnode]; leafss[rootnode]:=leafss[child[rootnode,1]]+leafss[child[rootnode,2]] +leafcount[child[rootnode,1]]*sqr(height[rootnode]-sum1) +leafcount[child[rootnode,2]]*sqr(height[rootnode]-sum2); if (rooting=minvar) then begin writeln(outfile); writeln(outfile,'var. of dis. from root to leaves =',leafss[rootnode]:9:4); end; end; (* hangroot *) (************************************************************************) (* findbestroot: find optimal rooting of the tree *) (* (for rooting=minvar or rooting=specified) *) (************************************************************************) procedure findbestroot; var parentlength,parentheight,parentss,minscore,psum,bsum,othersum,fooheight, newparentheight,newparentss,foo1,foo2: real; score: nodearray; parentcount,newparentcount,goodnode,badnode,goodchild,badchild, ch1,ch2: integer; foundroot: boolean; procedure checkchild(gp,bp,nde,othernde: integer); begin if ((nde>0) and (othernde>0)) then begin if (rooting=specified) then if ((branchset[rootchild[1]]*branchset[nde]<>[]) and (branchset[rootchild[2]]*branchset[nde]<>[])) then score[nde]:=-1.0 else score[nde]:=0.0; if (rooting=minvar) then begin psum:=parentheight+parentlength; othersum:=height[othernde]+length[othernde]; newparentcount:=parentcount+leafcount[othernde]; newparentheight:=((psum*parentcount) + (othersum*leafcount[othernde])) / newparentcount; newparentss:=parentss+leafss[othernde] +(parentcount*sqr(psum-newparentheight)) +(leafcount[othernde]*sqr(othersum-newparentheight)); diff:=height[nde]-newparentheight; if (diff>length[nde]) then diff:=length[nde]; if (diff<-length[nde]) then diff:=-length[nde]; foo1:=(length[nde]-diff)/2.0; foo2:=(length[nde]+diff)/2.0; sum1:=height[nde]+foo1; sum2:=newparentheight+foo2; fooheight:=((sum1*leafcount[nde])+(sum2*newparentcount))/numobject; score[nde]:=leafss[nde]+newparentss +leafcount[nde]*sqr(fooheight-sum1) +newparentcount*sqr(fooheight-sum2); writeln(outfile,'node',nde:3,' score=',score[nde]:8:3); end; (* minvar section *) if (score[nde]',nde:3,')'); parent[nde]:=rootnode; parent[bp]:=gp; length[bp]:=rootarclength; branchset[gp]:=branchset[bp]+branchset[othernde]; leafcount[gp]:=leafcount[bp]+leafcount[othernde]; height[gp]:=((leafcount[bp]*(height[bp]+length[bp])) +(leafcount[othernde]*(height[othernde]+length[othernde]))) / leafcount[gp]; othersum:=height[bp]+rootarclength; bsum:=height[othernde]+length[othernde]; leafss[gp]:=leafss[bp]+leafss[othernde] +(leafcount[bp]*sqr(othersum-height[gp])) +(leafcount[othernde]*sqr(bsum-height[gp])); nodeposition[gp]:=((leafcount[bp]*nodeposition[bp]) +(leafcount[othernde]*nodeposition[othernde])) / leafcount[gp]; if (nodeposition[gp]=score[rootnode]) then foundroot:=true else changeroot(goodnode,badnode,goodchild,badchild); until (foundroot); end; (* findbestroot *) (************************************************************************) (* report: write tree structure and arc-length estimates; draw tree *) (************************************************************************) procedure report; var i,j: integer; procedure drawtree; const vertchar = '|'; (* change if desired *) horizchar = '-'; var i,j,x,y,start,stop,maxstop,node: integer; maxheight, stretch: real; line: array[1..linelength] of char; yposition: array[1..twiceobject] of integer; topkid,bottomkid: array[1..twiceobject] of integer; numleaf: integer; (* descend calculates: vertical position of a branch ("yposition") horizontal position of branch ("height") endpoints of vertical connecting lines ("topkid","bottomkid") *) procedure descend(nde: integer); var i: integer; begin if not(nde=rootnode) then height[nde]:=height[parent[nde]]+length[nde]; if (nde<=numobject) (* if nde is a leaf *) then begin numleaf:=numleaf+1; yposition[nde]:=(numleaf*2)-1 end else begin (* if nde is an interior node *) for i:=1 to 2 do begin descend(child[nde,i]); yposition[nde]:=yposition[nde]+ (yposition[child[nde,i]]*leafcount[child[nde,i]]); end; yposition[nde]:=yposition[nde] div leafcount[nde]; topkid[nde]:=yposition[child[nde,1]]; bottomkid[nde]:=yposition[child[nde,2]] end; end; (* descend *) begin (* drawtree *) height[rootnode]:=0.0; numleaf:=0; for i:=(numobject+1) to numnode do yposition[i]:=0; descend(rootnode); if not(numobject=numleaf) then writeln(outfile,'error in descend, numleaf=',numleaf); maxheight:=0.0; for i:=1 to numobject do if (height[i]>maxheight) then maxheight:=height[i]; stretch:=(linesize-namelength-8)/maxheight; (* write tree arcs (horizontal lines) *) for y:=1 to (2*numobject) do begin maxstop:=0; for i:=1 to linesize do line[i]:=' '; for node:=1 to numnode do begin if (yposition[node]=y) then begin if (parent[node]<>0) then start:=trunc(stretch*height[parent[node]])+1 else start:=1; stop:=trunc(stretch*height[node])+1; if (stop>maxstop) then maxstop:=stop; if (stop>linesize) then stop:=linesize; for x:=start to stop do line[x]:=horizchar end; (* place vertical conecting lines *) if (node>numobject) then if ((y>topkid[node]) and (ymaxstop) then maxstop:=stop; line[stop]:=vertchar; end; end; (* loop on node *) for i:=1 to maxstop do write(outfile,line[i]); for i:=1 to numobject do if (yposition[i]=y) then begin if (printnumbers) then write(outfile,i:4,' ') else write(outfile,' '); for j:=1 to namelength do write(outfile,name[i,j]); end; writeln(outfile); end; end; (* drawtree *) begin (* report *) writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile,'node length children'); for i:=1 to numnode do begin write(outfile,i:4,' '); write(outfile,length[i]:field:decplaces,' '); if (i<=numobject) then begin write(outfile,' '); for j:=1 to 20 do write(outfile,name[i,j]) end else for j:=1 to 2 do write(outfile,child[i,j]:3); writeln(outfile); end; writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile); if (printtree) then drawtree; writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile); if (printheight) then begin writeln(outfile,'distance of each node from the root:'); writeln(outfile,'node height'); for i:=1 to numnode do writeln(outfile,i:3,' ',height[i]:field:decplaces); writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile); end; end; (* report *) (************************************************************************) (* stats: calculate stress(1), stress(2), squared monotonic and linear *) (* correlations *) (************************************************************************) procedure stats; var modeldis: matrixarray; procedure derivedistances; var i,j,k,ij: integer; begin ij:=0; for i:=2 to numobject do begin for j:=1 to (i-1) do begin ij:=ij+1; modeldis[ij]:=0.0; for k:=1 to numnode do if (((i in branchset[k]) and not(j in branchset[k])) or ((j in branchset[k]) and not(i in branchset[k]))) then modeldis[ij]:=modeldis[ij]+length[k]; end; end; end; (* derivedistances *) procedure quicksort(lowerbound,upperbound: integer); begin if (upperbound>lowerbound+1) then begin x:=lowerbound+1; y:=upperbound; while (x<=y) do if (prox[c[x]]<=prox[c[lowerbound]]) then x:=x+1 else begin while (prox[c[y]]>prox[c[lowerbound]]) do y:=y-1; if (xprox[c[upperbound]]) then begin hold:=c[lowerbound]; c[lowerbound]:=c[upperbound]; c[upperbound]:=hold; end; end; (* quicksort *) procedure lsmonotransform(var modeldis,d: matrixarray); var i,j,place,ordered: integer; bin: real; endcondition: integer; begin repeat ordered:=1; i:=1; repeat if (d[c[i+1]](bin/(place-i+1))) then endcondition:=1; until (endcondition=1); for j:=i to place do d[c[j]]:=(bin/(place-i+1)); i:=place end; i:=i+1 until (i>=numprox); until (ordered=1); end; (* lsmonotransform *) procedure stresscalc(modeldis,d: matrixarray); var i: integer; stress,stress1,stress2,denom, varn,varob,nmean,obmean,propor,rmonsq: real; begin stress:=0.0; denom:=0.0; varn:=0.0; varob:=0.0; nmean:=0.0; obmean:=0.0; propor:=0.0; for i:=1 to numprox do begin obmean:=obmean+prox[i]; nmean:=nmean+modeldis[i] end; nmean:=nmean/numprox; obmean:=obmean/numprox; for i:=1 to numprox do begin stress:=stress+sqr(d[i]-modeldis[i]); propor:=propor+(prox[i]*modeldis[i]); denom:=denom+sqr(modeldis[i]); varn:=varn+sqr(modeldis[i]-nmean); varob:=varob+sqr(prox[i]-obmean); end; stress1:=sqrt(stress/denom); stress2:=sqrt(stress/varn); rmonsq:=1.0-(stress/varn); varn:=varn/numprox; varob:=varob/numprox; propor:=(sqr(propor/numprox - nmean*obmean))/(varn*varob); writeln(outfile,'stress formula 1 =',stress1:7:4); writeln(outfile,'stress formula 2 =',stress2:7:4); writeln(outfile,'r(monotonic) squared=',rmonsq:6:4); writeln(outfile,'r-squared (p.v.a.f.)=',propor:6:4); writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile); end; (* stresscalc *) begin (* stats *) derivedistances; (* re-use arrays c & d for lsmt transform of model distances *) for ij:=1 to numprox do begin d[ij]:=modeldis[ij]; c[ij]:=ij end; quicksort(1,numprox); lsmonotransform(modeldis,d); stresscalc(modeldis,d); if (printmodeldis) then begin writeln(outfile,'model distances:'); writematrix(modeldis,numobject,field,decplaces); end; if (printresiduals) then begin for int:=1 to numprox do modeldis[int]:=prox[int]-modeldis[int]; writeln(outfile,'residual distances:'); writematrix(modeldis,numobject,field,decplaces); end; end; (* stats *) (*********************** start of main program *****************************) begin (* main *) writeln; writeln('addtree/p version 1.5'); writeln; (* VAX VMS PASCAL file definition statements: *) (* open(infile,'addtree.raw',old); *) (* open(outfile,'addtree.out',new); *) (* IBM-PC Turbo PASCAL open statements *) writeln('enter input file name:'); readln(infilnam); writeln('enter output file name:'); readln(outfilnam); assign(infile,infilnam); assign(outfile,outfilnam); (* IBM-PC Turbo PASCAL compiler directive to increase stack size *) {$M 65520,0,655360} reset(infile); rewrite(outfile); writeln; writeln('reading data'); readin; if (lowerhalf and printdata) then begin writeln(outfile,'data matrix:'); writematrix(prox,numobject,field,decplaces); end; chkmetric; if (printtransformed) then begin writeln(outfile,'transformed data distances:'); writematrix(prox,numobject,field,decplaces); end; initialize; write('fitting tree..'); (* main clustering loop **********************************) while (n>=4) do begin quadruples; numgroup:=0; nreduce:=0; findgroups; for int:=1 to n do use[int]:=true; for whichgroup:=1 to numgroup do makenode; cleanup end; (* end main clustering loop ******************************) (* choose a root for the tree (now there are n=3 or n=2 nodes left) *) if (n=3) then begin numgroup:=1; whichgroup:=1; if (height[index[1]]>height[index[2]]) and (height[index[1]]>height[index[3]]) then group[1]:=[2]+[3] else if (height[index[2]]>height[index[1]]) and (height[index[2]]>height[index[3]]) then group[1]:=[1]+[3] else group[1]:=[1]+[2]; for int:=1 to 3 do use[int]:=true; makenode; cleanup; end; (* now n=2 *) makeroot; hangroot; if not(rooting=asis) then begin findbestroot; (* specified rooting or minvarroot *) hangroot end; (* end root-choosing section *) writeln; report; writeln('calculating fit statistics'); writeln; stats; close(infile); close(outfile); writeln('end addtree/p run'); end. (* main *)