
(********************* ADDTREE/P  version 1.5 ****************************)
(*                     September 1991                                    *)
(*                                                                       *)
(* ADDTREE/P is a program in the PASCAL language for fitting the addi-   *)
(* tive tree model to proximity data. The data may be either similarity  *)
(* or distance data. A listing of tree parameters and their estimates,   *)
(* drawing of the tree, and summary statistics are reported. Various     *)
(* other output options are available. A help file accompanies           *)
(* this source file. See it for more information on using the program.   *)
(*                                                                       *)
(*     ADDTREE/P uses the algorithm originally introduced by S. Sattath  *)
(* and Amos Tversky (1977), with modifications introduced by James       *)
(* Corter (1983).  This program was written by James E. Corter.          *)
(* Questions or comments regarding the program should be addressed him   *)
(* at the address given below.                                           *)
(*                                                                       *)
(*   Please note that the ADDTREE/P and EXTREE programs are the property *)
(* of James Corter and Amos Tversky. The programs are freely available to*)
(* be used for research or academic purposes, and may be re-distributed  *)
(* for these uses.  However, the programs should not be modified and     *)
(* re-distributed, or resold, or used for any business or commercial     *)
(* purpose without the express written consent of the author.  Persons   *)
(* desiring to use the programs for commercial purposes should contact   *)
(* the author, James Corter, for information on obtaining a version of   *)
(* the programs licensed for such uses, at this address:  James Corter,  *)
(* Box 41, Teachers College, New York NY 10027.  tel.(212) 678-3843.     *)
(*                                                                       *)
(* ADDTREE/P copyright <c> 1981, 1991 by James E. Corter                 *)
(*                                                                       *)
(* changes for version 1.2:  -quicksort used instead of bubble sort      *)
(*                           -options for choosing of root: default is   *)
(*                            to leave as is. Optionally: find root that *)
(*                            minimizes variance of distances from       *)
(*                            root to leaves, root betw. specified nodes *)
(*                           -make specif. of leaf order more robust     *)
(* changes for version 1.3:  -warns if data set is too big               *)  
(*                           -superfluous error-warning stmnt. removed   *)
(*                                 (stalemate warning in "findgroups")   *)
(* changes for version 1.4:  -function ijfunction used only when necess. *)
(*                           -replace rooting proc. => proc. "hangroot"  *)
(*                         (don't tripleroot, omit "deepestroot" option) *)
(*                           -subtract constant to exactly satisfy t.i.  *)
(*                           -"findbestroot" doesn't search exhaustively *)
(* changes for version 1.5:  -IO stuff, efficiency improvements, info    *)
(*                            messages on execution progress added       *)
(*************************************************************************)


program addtreep(input,output,infile,outfile);

(* change these constants for bigger datasets, or for different compilers *)
(* for IBM-PC Turbo PASCAL 6.0 version, set to 80,160,3160,(80) *)
const maxobject = 80;
      twiceobject = 160;
      maxsize = 3160;    (* must be at least 1/2*maxobject*(maxobject-1) *)
      linelength = 80;  (* linelength of output device (printer, etc.)  *)

type grouptype = set of 1..maxobject;
     matrixarray = array[1..maxsize] of real;
     nodearray = array[1..twiceobject] of real;
     rootingtype = (asis,minvar,specified);

var   prox: matrixarray;     (* input proximities *)
      d: matrixarray;        (* transformed data distances *)
      c: array[1..maxsize] of integer; (* neighbor-count matrix *)
      numobject: integer;    (* number of objects *)
      numprox: integer;      (* number of inter-object distances *)
      n: integer;            (* number of nodes at current iteration *)
      size: integer;         (* number of distances at this iteration *)
      similarities,lowerhalf: boolean;
            (* similarity or distance data; lowerhalf or full matrix *)
      proxcode: real;        (* 1.0 if distances, -1.0 if sim. *)
      field,decplaces: integer;  (* output format control parms *)
      addconstant,            
            (* additive constant for positivity and triangle inequality *)
       diff,maxdis,sum,sum1,sum2,sum3:  real;
      x,y,hold,int,ij,whichgroup: integer;
      group: array[1..maxobject] of grouptype;  
            (* pairs of objects to be joined at current iteration *)
      numgroup,nreduce: integer;  (* number of such pairs *)
      numnode,rootnode: integer;  
            (* number of nodes in tree; number of root node *)
      parent: array[1..twiceobject] of integer;  (* "parent" of a node *)
      child: array[1..twiceobject,1..2] of integer; (* "children" of node *)
      length: nodearray;  (* length of arc between a node and its parent *)
      height: nodearray;  (* height (average length) of node in tree *)
      leafcount: array[1..twiceobject] of integer;  
            (* number of objects in the branch determined by node *)
      branchset: array[1..twiceobject] of grouptype;  
            (* keeps track of objects in each branch *)
      index: array[1..maxobject] of integer;  
            (* keeps track of true node number as d matrix collapses *)
      use: array[1..maxobject] of boolean;
            (* keeps track of pairs being combined on this step *)
      rootarclength,rootd: real;  (* length of arc root is on *)
      leafss: nodearray;
            (* holds sums-of-squares due to leaf-length for a node *)
      name: array[1..maxobject,1..20] of char;  (* object names *)
      namelength: integer;  (* length of longest name *)
      subtractconstant: boolean;
            (* if triang. ineq. already satisfied, subtract a constant
               so that it is exactly satisfied? *)
      rooting: rootingtype; (* three options are available for selection of
                               root:  asis=just put root at last two nodes
                                      minvar=find root with min. variance of
                                             root-to-leaf distances
                                      specified=root at specified nodes *)
      rootchild: array[1..2] of integer; (* for specified rooting *)
      printdata,printtransformed,printnumbers,specifyorder,
      printtree,printmodeldis,printresiduals,printheight: boolean;
                  (* output options requests *)
      linesize: integer;       (* output linelength for crt, etc. *)
      feed: integer;   (* signals when output is about to run off line *)
      nodeposition: nodearray;  (* hold calculated y-position of tree nodes*)
      infile,outfile: text;
      infilnam,outfilnam: string[24];


(* general purpose subroutines: ijfunction, writematrix  *)

(************************************************************************)
(* ijfunction: converts the row and column of a lowerhalf matrix        *)
(* to the ordinal position of that entry in a vector (matrixarray)      *)
(************************************************************************)
function ijfunction(i:integer; j:integer): integer;
begin
   if (i>j) then ijfunction:=(((i-1)*(i-2)) div 2) + j
   else if (j>i) then ijfunction:=(((j-1)*(j-2)) div 2) + i
   else writeln('error: i=j in ijfunction');
end; (* ijfunction *)


(******************************************************************)
(* writematrix: write a matrixarray as lowerhalf matrix;          *)
(*   n = size of the matrix,                                      *)
(*   field = number of digits for each entry, and                 *)
(*   decplaces = number of digits after the decimal place         *)
(******************************************************************)
procedure writematrix(mtx: matrixarray; n,field,decplaces: integer);
   var row,col: integer;
begin
      ij:=0;
   for row:=2 to n do
   begin
      for col:=1 to (row-1) do
      begin
        ij:=ij+1;
        write(outfile,mtx[ij]:field:decplaces);
        if (col=feed) then writeln(outfile);
      end;
      writeln(outfile);
   end;
writeln(outfile); writeln(outfile);
end; (* writematrix *)



(************************************************************************)
(* readin:      read program control information and data               *)
(************************************************************************)
procedure readin;   
var   i,j,k,ij,count: integer;
      dumreal: real;
      ch: char;
      word: array[1..3] of char;


procedure readword;  (* read a word of input control keywords *)
begin
   ch:=' ';
   while ((ch=' ') or (ch=',')) do if not(eoln(infile)) then read(infile,ch);
   i:=0;
   while not((ch=' ') or (ch=',') or (eoln(infile))) do
   begin
      i:=i+1;
      if not(i=1) then read(infile,ch);
      j:=ord(ch);
      if ((j>=65) and (j<=90)) then j:=j+32;
      ch:=chr(j);
      if not(i>3) then word[i]:=ch;
   end;
end; (* readword *)


begin (* readin *)

(* initialize & set defaults *)
printdata:=false; printtransformed:=false; 
printheight:=false; printmodeldis:=false; printresiduals:=false;
printtree:=true; printnumbers:=true; 
specifyorder:=false; 
linesize:=linelength; 
rooting:=asis; subtractconstant:=true;


(* read analysis & output option line *)
while not (eoln(infile)) do
begin
  readword;
if ((word[1]='d')and(word[2]='a')and(word[3]='t')) then printdata:=true;
if ((word[1]='t')and(word[2]='r')and(word[3]='a')) 
      then printtransformed:=true;
if ((word[1]='m')and(word[2]='o')and(word[3]='d')) then printmodeldis:=true;
if ((word[1]='r')and(word[2]='e')and(word[3]='s')) then printresiduals:=true;
if ((word[1]='n')and(word[2]='o')and(word[3]='t')) then printtree:=false;
if ((word[1]='c')and(word[2]='r')and(word[3]='t')) then linesize:=80;
if ((word[1]='n')and(word[2]='o')and(word[3]='n')) then printnumbers:=false;
if ((word[1]='s')and(word[2]='p')and(word[3]='e')) then specifyorder:=true;
if ((word[1]='n')and(word[2]='o')and(word[3]='s')) 
      then subtractconstant:=false;
if ((word[1]='h')and(word[2]='e')and(word[3]='i')) then printheight:=true;
if ((word[1]='m')and(word[2]='i')and(word[3]='n')) then rooting:=minvar;
if ((word[1]='d')and(word[2]='e')and(word[3]='e'))
 then writeln(outfile,'deepest rooting no longer supported: option ignored');
if ((word[1]='r')and(word[2]='o')and(word[3]='o'))
 then begin
      rooting:=specified;
      read(infile,rootchild[1],rootchild[2]);
      end;
end;
readln(infile);


(* if order of leaves is to be specified, then read that (optional) line *)
      count:=0;
if (specifyorder)
then begin
      while not(eoln(infile)) do
      begin
        count:=count+1;
        read(infile,i);
        nodeposition[i]:=count
      end;
      readln(infile);
     end;


writeln(outfile);
writeln(outfile,'addtree analysis (ADDTREE/P version 1.5):');

(* read & write comment line *)
for i:=1 to linelength do if not(eoln(infile))   
                    then begin 
                        read(infile,ch); write(outfile,ch) 
                         end; 
readln(infile); writeln(outfile); writeln(outfile); writeln(outfile);

(* begin reading data description line *)
read(infile,numobject); 
  if (numobject>maxobject) then
   writeln(outfile,'error: maximum number of objects allowed is ',maxobject:4);
  if ((specifyorder) and not(count=numobject))
  then writeln(outfile,'error: optional line specifying order of leaves ',
                  'has wrong number of entries');
readword;
   if ((word[1]='s')and(word[2]='i')and(word[3]='m')) 
   then similarities:=true
   else if ((word[1]='d')and(word[2]='i')and(word[3]='s')) 
      then similarities:=false
   else writeln(outfile,'  can''t tell if similarities or distances');
readword;
   if ((word[1]='l')and(word[2]='o')and(word[3]='w')) 
      then lowerhalf:=true
   else if ((word[1]='f')and(word[2]='u')and(word[3]='l')) 
      then lowerhalf:=false
   else writeln(outfile,'  can''t tell if full or lowerhalf matrix');
if not(eoln(infile)) then read(infile,field) 
           else writeln(outfile,'  can''t find field-width specification');
if not(eoln(infile)) 
 then read(infile,decplaces)
 else writeln(outfile,' can''t find digits-after-decimal-point specification');
readln(infile);
numprox:=(numobject*(numobject-1)) div 2;
feed:= linesize div field;


(* begin reading actual data *)
(* read full matrix, treating as asymmetric; print data if requested *)
if (lowerhalf=false) then
begin
   if (printdata=true) then writeln(outfile,'data matrix:');
   for ij:=1 to maxsize do prox[ij]:=0.0;
   for i:=1 to numobject do
   begin
      for j:=1 to numobject do
      begin
      if (eoln(infile)) then readln(infile);
      read(infile,dumreal); 
      if (printdata) then write(outfile,dumreal: field: decplaces);
      if ((printdata) and (j=feed)) then writeln(outfile);
      if not(i=j) 
      then begin
         ij:=ijfunction(i,j);
         prox[ij]:=prox[ij]+(dumreal/2)
      end;
      end;
      readln(infile); if (printdata) then writeln(outfile);
   end;
writeln(outfile); writeln(outfile,'full matrix symmetrized');
writeln(outfile); writeln(outfile);
end;

(* read lowerhalf matrix *)
if (lowerhalf) then
begin
      ij:=0;
   for i:=2 to numobject do
   begin
      if (eoln(infile)) then readln(infile);
      for j:=1 to (i-1) do
      begin
        ij:=ij+1;
        if (eoln(infile)) then readln(infile);
        read(infile,prox[ij]);
      end;
   end;
   readln(infile);

end;

(* read in names if they are given *)
j:=1; namelength:=0;
while not((eof(infile)) or (j>numobject)) do
begin
    i:=1;
   while not((eoln(infile)) or (i>20)) do
     begin read(infile,name[j,i]); i:=i+1; end;
   k:=i-1;
   if not(k=0) then while (name[j,k]=' ') do k:=k-1;
   if (namelength<k) then namelength:=k;
   if (k<20) then for i:=(k+1) to 20 do name[j,i]:=' ';
   j:=j+1;
   if not(eof(infile)) then readln(infile);
end;

end; (* readin *)



(*************************************************************************)
(* chkmetric: check if proximities satisfy metric axioms, fix if not     *)
(*************************************************************************)
procedure chkmetric;
var i,j,k,ij,ik,jk,xxij,xxjk: integer;
    diff,min: real;
begin

   addconstant:=0.0; min:=0.0; maxdis:=0.0;

(* convert to dissimilarities if necessary *)
if (similarities) 
then begin  
      proxcode:=-1.0; 
      writeln(outfile,'similarities linearly transformed into distances')  
     end
else proxcode:=1.0;

(* check for positivity of distances *)
for ij:=1 to numprox do
begin
   prox[ij]:=proxcode*prox[ij];
   if (prox[ij]<min) then min:=prox[ij];
   if (prox[ij]>maxdis) then maxdis:=prox[ij];
end;
if (min<0.0) then addconstant:=-min;
writeln(outfile); writeln(outfile);
writeln(outfile,'(',addconstant:field:decplaces,
      '  needed for positivity of distances )');

(* check every triple of objects for triangle inequality *)
  min:=maxdis;
      xxij:=1;
   for i:=1 to (numobject-2) do
   begin
      xxij:=xxij+i-1;
      ij:=xxij;
      xxjk:=xxij;
   for j:=(i+1) to (numobject-1) do
   begin
      ij:=ij+(j-2);
      ik:=ij;
      xxjk:=xxjk+j-1;
      jk:=xxjk;
   for k:=(j+1) to (numobject) do
   begin
      ik:=ik+(k-2);
      jk:=jk+(k-2);
      if ((prox[jk]>=prox[ij]) and (prox[jk]>=prox[ik]))
      then diff:=(prox[ij]+prox[ik])-prox[jk]
      else if ((prox[ik]>=prox[ij]) and (prox[ik]>=prox[jk]))
           then diff:=(prox[ij]+prox[jk])-prox[ik]
      else diff:=(prox[ik]+prox[jk])-prox[ij];
      if (diff<min) then min:=diff
      end;
   end;
end;
addconstant:=-min;
if ((subtractconstant=false) and (addconstant<=0)) 
 then writeln(outfile,'triangle inequality already satisfied')
 else begin
      writeln(outfile,' ',addconstant:field:decplaces,
      '  added to exactly satisfy triangle inequality');
      for ij:=1 to numprox do prox[ij]:=prox[ij]+addconstant
      end;

writeln(outfile); writeln(outfile);
end; (* procedure chkmetric *)





(************************************************************************)
(* initialize: initialize variables                                     *)
(************************************************************************)
procedure initialize;
  var i,ij: integer;
begin
n:=numobject;
size:=numprox;
numnode:=numobject;
for i:=1 to numobject do
begin
   use[i]:=true;
   index[i]:=i;
   height[i]:=0.0;
   branchset[i]:=[i];
   child[i,1]:=0; child[i,2]:=0;
   leafcount[i]:=1;
   leafss[i]:=0.0;
   if not(specifyorder) then nodeposition[i]:=i
end;
for i:=(numobject+1) to twiceobject do 
begin
   height[i]:=0.0;
   branchset[i]:=[];
   leafcount[i]:=0;
   leafss[i]:=0.0;
end;
for ij:=1 to numprox do d[ij]:=prox[ij];
end; (* initialize *)




(************************************************************************)
(* quadruples: check every quadruple of objects, finding the pairs that *)
(*             are closest; increment their "neighbor score", c         *)
(************************************************************************)
procedure quadruples;
   var i,j,k,l,ij,ik,jk,il,jl,kl: integer;
       xxij,xxjk,xxkl: integer;
begin

   for ij:=1 to size do c[ij]:=0;

(* check every quadruple of objects *)
      xxij:=1;
   for i:=1 to (n-3) do
   begin
      xxij:=xxij+i-1;
      ij:=xxij;
      xxjk:=xxij;

   for j:=(i+1) to (n-2) do
   begin
      ij:=ij+(j-2);
      ik:=ij;
      xxjk:=xxjk+j-1;
      jk:=xxjk;
      xxkl:=xxjk;

   for k:=(j+1) to (n-1) do
   begin
      ik:=ik+(k-2);
      jk:=jk+(k-2);
      il:=ik;
      jl:=jk;
      xxkl:=xxkl+k-1;
      kl:=xxkl;

   for l:=(k+1) to n do
   begin
      il:=il+(l-2);
      jl:=jl+(l-2);
      kl:=kl+(l-2);

      sum1:=d[ij]+d[kl];
      sum2:=d[ik]+d[jl];
      sum3:=d[il]+d[jk];

(* object pairs in smallest of these sums receive a nearest-neighbor 
score of 2,those in the next smallest 1, those in largest 0  *)

      if (sum1<sum2) then begin c[ij]:=c[ij]+1; c[kl]:=c[kl]+1 end
       else if (sum2<sum1) then begin c[ik]:=c[ik]+1; c[jl]:=c[jl]+1 end;
      if (sum1<sum3) then begin c[ij]:=c[ij]+1; c[kl]:=c[kl]+1 end
       else if (sum3<sum1) then begin c[il]:=c[il]+1; c[jk]:=c[jk]+1 end;
      if (sum2<sum3) then begin c[ik]:=c[ik]+1; c[jl]:=c[jl]+1 end
       else if (sum3<sum2) then begin c[il]:=c[il]+1; c[jk]:=c[jk]+1 end;

   end;
   end;
   end;
   end;

end; (* quadruples *)





(************************************************************************)
(* findgroups: using the matrix of neighbor scores, c, find those pairs *)
(*             of objects that are mutual nearest-neighbors.  Resolve   *)
(*             ties and non-reciprocity by least-squares test in        *)
(*             "findcloserneighbor"                                     *)
(************************************************************************)
procedure findgroups;
var  i,j,k,ij:  integer;
     rowsumdistance: array[1..maxobject] of real;
     neighbors: array[1..maxobject] of integer;
     pair: array[1..maxsize,1..2] of integer;
     goodone: array[1..maxsize] of boolean;
     holdcount,neighbor,com,oth1,oth2,new,numnn,numnew,rowmaxcount,
       maxloser,maxgood: integer;
     pairi,pairj,common,otheri,otherj: grouptype;


(* chooses one neighbor as closer; returns neighbor & neighbordistance *)
procedure findcloserneighbor(obj,n1,n2:integer);
   var pair1,pair2,pair12: integer;
       n1distance,n2distance: real;
       losercount: integer;
begin
   pair1:=ijfunction(obj,n1);
   pair2:=ijfunction(obj,n2);
   pair12:=ijfunction(n1,n2);
   n1distance:=d[pair1]+((rowsumdistance[n2]-d[pair2]-d[pair12])/(n-3));
   n2distance:=d[pair2]+((rowsumdistance[n1]-d[pair1]-d[pair12])/(n-3));
   if (n1distance<n2distance)
    then begin 
         neighbor:=n1; 
         losercount:=c[pair2];
       end
    else begin 
         neighbor:=n2;
         losercount:=c[pair1];
       end;
if (losercount>maxloser) then maxloser:=losercount;
end;  (* findcloserneighbor *)


begin (* findgroups *)

(* find nearest-neighbors of each object & calc. rowsumdis. for tiebreaking *)
     holdcount:=0;
for i:=1 to n do
begin
   rowmaxcount:=-1;
   rowsumdistance[i]:=0.0;
   for j:=1 to n do
   if not(i=j)
   then begin
          ij:=ijfunction(i,j);
          rowsumdistance[i]:=rowsumdistance[i]+d[ij];
          if (c[ij]>rowmaxcount)
          then begin rowmaxcount:=c[ij];
                  numnn:=1;
                  neighbors[1]:=j;
            end
          else if (c[ij]=rowmaxcount)
          then begin
                  numnn:=numnn+1;
                  neighbors[numnn]:=j;
               end;
        end; (* loop on j *)

   (* check if any pair made from a new neighbor of i is already on list *)
   for new:=1 to numnn do
   begin
        goodone[new]:=true;
      for k:=1 to holdcount do
       if (([i]+[neighbors[new]])=([pair[k,1]]+[pair[k,2]]))
       then goodone[new]:=false;
   end;

        numnew:=0;
   for new:=1 to numnn do
   begin
      if (goodone[new])
      then begin
              numnew:=numnew+1;
              pair[holdcount+numnew,1]:=i;
              pair[holdcount+numnew,2]:=neighbors[new];
           end;
   end;

   holdcount:=holdcount+numnew;
end; (* loop on each object, i *)

   for i:=1 to holdcount do goodone[i]:=true;
   maxloser:=0;

(* compare every pair of pairs with a common member to see which is better *)
for j:=2 to holdcount do
for i:=1 to (j-1) do
begin
   pairi:=[pair[i,1]]+[pair[i,2]];
   pairj:=[pair[j,1]]+[pair[j,2]];
   common:= pairi*pairj;

   if (common<>[])   (* if pairs overlap, then resolve by l.s. test *)
   then begin

         otheri:=pairi-common;
         otherj:=pairj-common;
         for int:=1 to n do
         begin
            if (int in common) then com:=int;
            if (int in otheri) then oth1:=int;
            if (int in otherj) then oth2:=int;
         end;

         findcloserneighbor(com,oth1,oth2);

         if (neighbor=oth1)
          then goodone[j]:=false
          else goodone[i]:=false;

       end;
end; (* loop on pairs j,i of object pairs *)
(* now each object i should have only a single nearest neighbor *)

   numgroup:=0; maxgood:=0;
for i:=1 to holdcount do
begin
    ij:=ijfunction(pair[i,1],pair[i,2]);
  if (goodone[i] and (c[ij]>maxgood)) then maxgood:=c[ij];
  if ((goodone[i]) and (c[ij]>=maxloser))
  then begin
         numgroup:=numgroup+1;
         group[numgroup]:=[pair[i,1]]+[pair[i,2]];
       end;
end;
if (numgroup=0) (* if no good pair was > maxloser, take max good one(s) *)
then for i:=1 to holdcount do
if goodone[i]
then begin
       ij:=ijfunction(pair[i,1],pair[i,2]);
       if (c[ij]=maxgood)
       then begin
              numgroup:=numgroup+1;
              group[numgroup]:=[pair[i,1]]+[pair[i,2]]
            end;
     end;

end; (* findgroups *)





(*************************************************************************)
(* makenode: using list of groups from "findgroups", make new nodes from *)
(*           groups.  Compute length of arcs between new node and its    *)
(*           children, and define distances from new node as weighted    *)
(*           average of distances from children.                         *)
(*************************************************************************)
procedure makenode;
var  i,j,k,ij,node,holdcount,sumcount: integer;
     nodegroup: array[1..2] of integer;  (* holds pair being made into node *)



procedure estimate;   (* compute arc lengths *)
var      sum,avedis,diff: array[1..2] of real;
      dis,avelength: real;
      i,j,holdcount: integer;
begin
ij:=ijfunction(nodegroup[1],nodegroup[2]);
dis:=d[ij];
for i:=1 to 2 do
begin
   holdcount:=0; sum[i]:=0.0;
   for j:=1 to n do
   if (use[j]) then
   begin
      ij:=ijfunction(nodegroup[i],j);
      sum[i]:=sum[i]+leafcount[index[j]]*d[ij];
      holdcount:=holdcount+leafcount[index[j]];
   end;
   avedis[i]:=sum[i]/holdcount;
end;

avelength:=(avedis[1]+avedis[2])/2;
for i:=1 to 2 do
begin
   diff[i]:=avedis[i]-avelength;
   length[index[nodegroup[i]]]:=dis/2+diff[i]-height[index[nodegroup[i]]];
   if (length[index[nodegroup[i]]]<0.0) then length[index[nodegroup[i]]]:=0.0;
end;

end; (* estimate *)



begin   (* makenode *)
   numnode:=numnode+1;
   node:=numnode;
   k:=0;
   for i:=1 to n do
      if (i in group[whichgroup]) 
      then begin k:=k+1;
            use[i]:=false;
            parent[index[i]]:=node;
            nodegroup[k]:=i
         end;
if not(k=2)
 then writeln(outfile,'error: wrong number of children: node',node:4);

for i:=1 to k do 
begin
  leafcount[node]:=leafcount[node]+leafcount[index[nodegroup[i]]];
  branchset[node]:=branchset[node]+branchset[index[nodegroup[i]]];
end;
   

(* order children according to specified or input order *)
if (nodeposition[index[nodegroup[2]]]<nodeposition[index[nodegroup[1]]])
then begin
      child[node,1]:=index[nodegroup[2]]; 
      child[node,2]:=index[nodegroup[1]]
     end
else begin 
      child[node,1]:=index[nodegroup[1]]; 
      child[node,2]:=index[nodegroup[2]]
     end;

   
estimate;


(* calculate mean length of leaves under node ("height") *)
 sum1:=height[child[node,1]]+length[child[node,1]];
 sum2:=height[child[node,2]]+length[child[node,2]];
 height[node]:=((sum1*leafcount[child[node,1]])
              +(sum2*leafcount[child[node,2]])) / leafcount[node];

(* calculate sum of squares due to leaf-length for later choosing of root *)
leafss[node]:=leafss[child[node,1]]+leafss[child[node,2]]
           +leafcount[child[node,1]]*sqr(height[node]-sum1)
           +leafcount[child[node,2]]*sqr(height[node]-sum2);

(* calculate average y-position of node for printing *)
nodeposition[node]:=(nodeposition[child[node,1]]*leafcount[child[node,1]]
                  +nodeposition[child[node,2]]*leafcount[child[node,2]])
                / leafcount[node];

(* recompute distances to other nodes *)
   for i:=1 to n do
   if (use[i]) then 
   begin
      sumcount:=0; sum:=0.0;
      for j:=1 to 2 do
      begin
      ij:=ijfunction(i,nodegroup[j]);
      holdcount:=leafcount[index[nodegroup[j]]]*leafcount[index[i]];
      sum:=sum+holdcount*d[ij];
      sumcount:=sumcount+holdcount;
      end;
      ij:=ijfunction(i,nodegroup[1]);
      d[ij]:=sum/sumcount;
   end;


   use[nodegroup[1]]:=true;
   index[nodegroup[1]]:=node;
   nreduce:=nreduce+1;

   write('.');
end; (* makenode *)






(************************************************************************)
(* cleanup: collapse d matrix & prepare for next iteration            *)
(************************************************************************)
procedure cleanup;
   var  i,j,k,ij,nij: integer;
begin

  (* repack d matrix *)
    ij:=0; nij:=0;
  for j:=2 to n do
     for i:=1 to (j-1) do
     begin
        ij:=ij+1;
        if((use[i]) and (use[j]))
        then begin
          nij:=nij+1;
          d[nij]:=d[ij]
        end;
     end;
  
  for i:= (n-1) downto 1 do
        if (use[i]=false) then for k:=i to (n-1) do index[k]:=index[k+1];

   n:=n-nreduce;
   size:=(n*(n-1)) div 2;

end; (* cleanup *)




(************************************************************************)
(* makeroot: make a fake node, rootnode, with the last two nodes as its *)
(*           children.  This node is of degree two.                  *)
(************************************************************************)
procedure makeroot;     (* when n=2 *)
var i: integer;
begin
   rootd:=d[1];
   numnode:=numnode+1;
   rootnode:=numnode;
   parent[rootnode]:=0;
   length[rootnode]:=0.0;
   leafcount[rootnode]:=numobject;
      branchset[rootnode]:=[];
   for i:=1 to 2 do
   begin
      parent[index[i]]:=rootnode;
      branchset[rootnode]:=branchset[rootnode]+branchset[index[i]];
   end;
if (nodeposition[index[2]]<nodeposition[index[1]])
 then begin child[rootnode,2]:=index[1]; child[rootnode,1]:=index[2] end
 else begin child[rootnode,1]:=index[1]; child[rootnode,2]:=index[2] end;
rootarclength:=rootd-height[child[rootnode,1]]-height[child[rootnode,2]];
if (rootarclength<0.0) then rootarclength:=0.0;
end; (* makeroot *)


procedure hangroot; (* hang root at correct point on root arc *)
begin
   diff:=height[child[rootnode,1]]-height[child[rootnode,2]];
   if (diff>rootarclength) then diff:=rootarclength;
   if (diff<-rootarclength) then diff:=-rootarclength;
   length[child[rootnode,1]]:=(rootarclength-diff)/2.0;
   length[child[rootnode,2]]:=(rootarclength+diff)/2.0;
   sum1:=height[child[rootnode,1]]+length[child[rootnode,1]];
   sum2:=height[child[rootnode,2]]+length[child[rootnode,2]];
   height[rootnode]:=((sum1*leafcount[child[rootnode,1]])
              +(sum2*leafcount[child[rootnode,2]])) / leafcount[rootnode];
   leafss[rootnode]:=leafss[child[rootnode,1]]+leafss[child[rootnode,2]]
           +leafcount[child[rootnode,1]]*sqr(height[rootnode]-sum1)
           +leafcount[child[rootnode,2]]*sqr(height[rootnode]-sum2);
if (rooting=minvar)
then begin
writeln(outfile);
writeln(outfile,'var. of dis. from root to leaves =',leafss[rootnode]:9:4);
     end;
end; (* hangroot *)




(************************************************************************)
(* findbestroot: find optimal rooting of the tree                       *)
(*               (for rooting=minvar or rooting=specified)              *)
(************************************************************************)
procedure findbestroot;
var parentlength,parentheight,parentss,minscore,psum,bsum,othersum,fooheight,
     newparentheight,newparentss,foo1,foo2: real;
    score: nodearray;
    parentcount,newparentcount,goodnode,badnode,goodchild,badchild,
     ch1,ch2: integer;
    foundroot: boolean;



procedure checkchild(gp,bp,nde,othernde: integer);
begin
if ((nde>0) and (othernde>0))
then begin

if (rooting=specified)
then if ((branchset[rootchild[1]]*branchset[nde]<>[])
         and (branchset[rootchild[2]]*branchset[nde]<>[]))
     then score[nde]:=-1.0 else score[nde]:=0.0;

if (rooting=minvar)
then begin
  psum:=parentheight+parentlength;
  othersum:=height[othernde]+length[othernde];
  newparentcount:=parentcount+leafcount[othernde];
  newparentheight:=((psum*parentcount) + (othersum*leafcount[othernde]))
                    / newparentcount;
  newparentss:=parentss+leafss[othernde]
            +(parentcount*sqr(psum-newparentheight))
            +(leafcount[othernde]*sqr(othersum-newparentheight));

  diff:=height[nde]-newparentheight;
  if (diff>length[nde]) then diff:=length[nde];
  if (diff<-length[nde]) then diff:=-length[nde];
  foo1:=(length[nde]-diff)/2.0;
  foo2:=(length[nde]+diff)/2.0;
  sum1:=height[nde]+foo1;
  sum2:=newparentheight+foo2;
  fooheight:=((sum1*leafcount[nde])+(sum2*newparentcount))/numobject;
  score[nde]:=leafss[nde]+newparentss
          +leafcount[nde]*sqr(fooheight-sum1)
          +newparentcount*sqr(fooheight-sum2);
  writeln(outfile,'node',nde:3,'  score=',score[nde]:8:3);
end; (* minvar section *)

if (score[nde]<minscore)
then begin
        minscore:=score[nde];
        goodnode:=gp; badnode:=bp; goodchild:=nde; badchild:=othernde
     end;
end;
end; (* checkchild *)


procedure changeroot(gp,bp,nde,othernde: integer);
begin
writeln(outfile,'changeroot(',gp:3,' =>',nde:3,')');
parent[nde]:=rootnode;
parent[bp]:=gp;
length[bp]:=rootarclength;
branchset[gp]:=branchset[bp]+branchset[othernde];
leafcount[gp]:=leafcount[bp]+leafcount[othernde];
height[gp]:=((leafcount[bp]*(height[bp]+length[bp]))
             +(leafcount[othernde]*(height[othernde]+length[othernde])))
             / leafcount[gp];
othersum:=height[bp]+rootarclength;
bsum:=height[othernde]+length[othernde];
leafss[gp]:=leafss[bp]+leafss[othernde]
             +(leafcount[bp]*sqr(othersum-height[gp]))
             +(leafcount[othernde]*sqr(bsum-height[gp]));
nodeposition[gp]:=((leafcount[bp]*nodeposition[bp])
                +(leafcount[othernde]*nodeposition[othernde])) / leafcount[gp];
if (nodeposition[gp]<nodeposition[nde])
 then begin child[rootnode,1]:=gp; child[rootnode,2]:=nde end
 else begin child[rootnode,1]:=nde; child[rootnode,2]:=gp end;
if (nodeposition[bp]<nodeposition[othernde])
 then begin child[gp,1]:=bp; child[gp,2]:=othernde end
 else begin child[gp,1]:=othernde; child[gp,2]:=bp end;
if (rooting=minvar)
 then begin
        score[gp]:=score[rootnode];
        score[rootnode]:=score[nde];
      end
 else begin  (* if (rooting=specified) *)
        score[gp]:=0.0;
        score[rootnode]:=0.0;
      end;
rootarclength:=length[nde];
end; (* changeroot *)



begin (* findbestroot *)

  foundroot:=false;
  if (rooting=specified) then score[rootnode]:=0.0
    else if (rooting=minvar) then score[rootnode]:=leafss[rootnode];
repeat
  minscore:=score[rootnode];
  parentlength:=rootarclength;
  ch1:=child[rootnode,1];
  ch2:=child[rootnode,2];

  parentcount:=leafcount[ch2];
  parentheight:=height[ch2];
  parentss:=leafss[ch2];

if not(ch1<=numobject)
then begin
        checkchild(ch1,ch2,child[ch1,1],child[ch1,2]);
        checkchild(ch1,ch2,child[ch1,2],child[ch1,1]);
     end;

  parentcount:=leafcount[ch1];
  parentheight:=height[ch1];
  parentss:=leafss[ch1];

if not(ch2<=numobject)
then begin
        checkchild(ch2,ch1,child[ch2,1],child[ch2,2]);
        checkchild(ch2,ch1,child[ch2,2],child[ch2,1]);
     end;

if (minscore>=score[rootnode])
 then foundroot:=true
 else changeroot(goodnode,badnode,goodchild,badchild);

until (foundroot);

end; (* findbestroot *)





(************************************************************************)
(* report: write tree structure and arc-length estimates; draw tree     *)
(************************************************************************)
procedure report;
   var i,j: integer;


procedure drawtree;

   const vertchar = '|';      (* change if desired *)
       horizchar = '-';

   var      i,j,x,y,start,stop,maxstop,node: integer;
      maxheight,      stretch:  real;
      line: array[1..linelength] of char;
      yposition:  array[1..twiceobject] of integer;
      topkid,bottomkid: array[1..twiceobject] of integer;
      numleaf: integer;


(* descend calculates:       
      vertical position of a branch ("yposition")
      horizontal position of branch ("height") 
      endpoints of vertical connecting lines ("topkid","bottomkid")  *)
procedure descend(nde: integer);
var i: integer;
begin

   if not(nde=rootnode)
   then height[nde]:=height[parent[nde]]+length[nde];
      
   if (nde<=numobject)            (* if nde is a leaf *)
   then begin
         numleaf:=numleaf+1;
         yposition[nde]:=(numleaf*2)-1
      end
   else begin                  (* if nde is an interior node *)
         for i:=1 to 2 do
         begin
            descend(child[nde,i]);
            yposition[nde]:=yposition[nde]+
                (yposition[child[nde,i]]*leafcount[child[nde,i]]);
         end;
         yposition[nde]:=yposition[nde] div leafcount[nde];
         topkid[nde]:=yposition[child[nde,1]];
         bottomkid[nde]:=yposition[child[nde,2]]
      end;

end; (* descend *)



begin  (* drawtree *)

   height[rootnode]:=0.0; numleaf:=0;
   for i:=(numobject+1) to numnode do yposition[i]:=0;

   descend(rootnode);

   if not(numobject=numleaf)
    then writeln(outfile,'error in descend, numleaf=',numleaf);
   maxheight:=0.0;
   for i:=1 to numobject do
     if (height[i]>maxheight) then maxheight:=height[i];
   stretch:=(linesize-namelength-8)/maxheight;


(* write tree arcs (horizontal lines) *)
   for y:=1 to (2*numobject) do
   begin
      maxstop:=0;
      for i:=1 to linesize do line[i]:=' ';
      for node:=1 to numnode do
      begin
      if (yposition[node]=y) 
      then begin
            if (parent[node]<>0) 
              then start:=trunc(stretch*height[parent[node]])+1
              else start:=1;
            stop:=trunc(stretch*height[node])+1;
            if (stop>maxstop) then maxstop:=stop;
            if (stop>linesize) then stop:=linesize;
            for x:=start to stop do line[x]:=horizchar
           end;

(* place vertical conecting lines *)
      if (node>numobject)
      then if ((y>topkid[node]) and (y<bottomkid[node]))
           then begin
                   stop:=trunc(stretch*height[node])+1;
                   if (stop>maxstop) then maxstop:=stop;
                   line[stop]:=vertchar;
                end;

      end; (* loop on node *)

      for i:=1 to maxstop do write(outfile,line[i]);
      for i:=1 to numobject do 
      if (yposition[i]=y) 
      then begin
               if (printnumbers) then write(outfile,i:4,'  ')
                            else write(outfile,'  ');
            for j:=1 to namelength do write(outfile,name[i,j]);
           end;
      writeln(outfile);
   end;

end; (* drawtree *)


begin  (* report *)

writeln(outfile); writeln(outfile); writeln(outfile);
writeln(outfile); writeln(outfile,'node     length    children');
for i:=1 to numnode do
begin
   write(outfile,i:4,'    ');
   write(outfile,length[i]:field:decplaces,'     ');
   if (i<=numobject) 
    then begin
         write(outfile,'                  ');
         for j:=1 to 20 do write(outfile,name[i,j])
         end
    else for j:=1 to 2 do write(outfile,child[i,j]:3);
   writeln(outfile);
end;
writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile);

if (printtree) then drawtree;
writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile);

if (printheight)
then begin
      writeln(outfile,'distance of each node from the root:');
      writeln(outfile,'node   height');
      for i:=1 to numnode do
        writeln(outfile,i:3,'   ',height[i]:field:decplaces);
      writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile);
     end;

end; (* report *)





(************************************************************************)
(* stats: calculate stress(1), stress(2), squared monotonic and linear  *)
(*        correlations                                                  *)
(************************************************************************)
procedure stats;
var modeldis: matrixarray;

procedure derivedistances;
   var i,j,k,ij: integer;
begin

   ij:=0;
for i:=2 to numobject do
begin
   for j:=1 to (i-1) do
   begin
      ij:=ij+1;
      modeldis[ij]:=0.0;
      for k:=1 to numnode do
      if (((i in branchset[k]) and not(j in branchset[k]))
          or ((j in branchset[k]) and not(i in branchset[k])))
      then modeldis[ij]:=modeldis[ij]+length[k];
   end;
end;

end; (* derivedistances *)


         
procedure quicksort(lowerbound,upperbound: integer);
begin
 if (upperbound>lowerbound+1)
 then begin
       x:=lowerbound+1; y:=upperbound;
       while (x<=y) do
          if (prox[c[x]]<=prox[c[lowerbound]])
          then x:=x+1
          else begin
                while (prox[c[y]]>prox[c[lowerbound]])
                  do y:=y-1;
                if (x<y) 
                then begin
                       hold:=c[x]; c[x]:=c[y]; c[y]:=hold;
                       x:=x+1; y:=y-1;
                     end;
               end;
       hold:=c[lowerbound]; c[lowerbound]:=c[y]; c[y]:=hold;
       quicksort(lowerbound,y-1);
       quicksort(y+1,upperbound);
      end
 else if (upperbound=lowerbound+1)      (* prox has only two elements *)
      then if (prox[c[lowerbound]]>prox[c[upperbound]])
           then begin
                  hold:=c[lowerbound]; 
                  c[lowerbound]:=c[upperbound]; 
                  c[upperbound]:=hold;
                end;
end; (* quicksort *)


procedure lsmonotransform(var modeldis,d: matrixarray);
var i,j,place,ordered:  integer;
    bin:  real;
   endcondition: integer;
begin
repeat
ordered:=1;
   i:=1;
   repeat
      if (d[c[i+1]]<d[c[i]])
      then begin
        ordered:=0;
      endcondition:=0;
        bin:=d[c[i]];
        place:=i;
        repeat
          place:=place+1;
        bin:=bin+d[c[place]];
        if (place=numprox) then endcondition:=1
                  else if (d[c[place+1]]>(bin/(place-i+1)))
                       then endcondition:=1;
        until (endcondition=1);
        for j:=i to place do d[c[j]]:=(bin/(place-i+1));
        i:=place
      end;
      i:=i+1
   until (i>=numprox);
until (ordered=1);
end;  (* lsmonotransform *)


procedure stresscalc(modeldis,d: matrixarray);
var i: integer;
    stress,stress1,stress2,denom,
    varn,varob,nmean,obmean,propor,rmonsq: real;
begin
      stress:=0.0; denom:=0.0; 
      varn:=0.0; varob:=0.0; 
      nmean:=0.0; obmean:=0.0; propor:=0.0; 
   for i:=1 to numprox do
   begin
      obmean:=obmean+prox[i];
      nmean:=nmean+modeldis[i]
      end;
   nmean:=nmean/numprox;
   obmean:=obmean/numprox;
   for i:=1 to numprox do
   begin
     stress:=stress+sqr(d[i]-modeldis[i]);
     propor:=propor+(prox[i]*modeldis[i]);
     denom:=denom+sqr(modeldis[i]);
     varn:=varn+sqr(modeldis[i]-nmean);
     varob:=varob+sqr(prox[i]-obmean);
   end;
   stress1:=sqrt(stress/denom);
   stress2:=sqrt(stress/varn);
   rmonsq:=1.0-(stress/varn);
   varn:=varn/numprox;  varob:=varob/numprox;
   propor:=(sqr(propor/numprox - nmean*obmean))/(varn*varob);
   writeln(outfile,'stress formula 1 =',stress1:7:4);
   writeln(outfile,'stress formula 2 =',stress2:7:4);
   writeln(outfile,'r(monotonic) squared=',rmonsq:6:4);
   writeln(outfile,'r-squared (p.v.a.f.)=',propor:6:4);
   writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile);
end; (* stresscalc *)


begin (* stats *)
   derivedistances;
(* re-use arrays c & d for lsmt transform of model distances *)
     for ij:=1 to numprox do begin d[ij]:=modeldis[ij]; c[ij]:=ij end;
   quicksort(1,numprox);
   lsmonotransform(modeldis,d);
   stresscalc(modeldis,d);

      if (printmodeldis)
      then begin
              writeln(outfile,'model distances:');
              writematrix(modeldis,numobject,field,decplaces);
           end;

      if (printresiduals)
      then begin
              for int:=1 to numprox do modeldis[int]:=prox[int]-modeldis[int];
              writeln(outfile,'residual distances:');
              writematrix(modeldis,numobject,field,decplaces);
           end;

end; (* stats *)




(*********************** start of main program *****************************)
begin (* main *)

   writeln; writeln('addtree/p version 1.5'); writeln;

(* VAX VMS PASCAL file definition statements:  *)
(*  open(infile,'addtree.raw',old); *)
(*  open(outfile,'addtree.out',new); *)

(* IBM-PC Turbo PASCAL open statements *)
  writeln('enter input file name:');
  readln(infilnam);
  writeln('enter output file name:');
  readln(outfilnam);
  assign(infile,infilnam);
  assign(outfile,outfilnam);
(* IBM-PC Turbo PASCAL compiler directive to increase stack size *)
{$M 65520,0,655360}

  reset(infile);
  rewrite(outfile);

     writeln; writeln('reading data');

   readin;
   
      if (lowerhalf and printdata)
      then begin
             writeln(outfile,'data matrix:');
             writematrix(prox,numobject,field,decplaces);
           end;

   chkmetric;

      if (printtransformed)
      then begin
             writeln(outfile,'transformed data distances:');
             writematrix(prox,numobject,field,decplaces);
           end;

   initialize;

      write('fitting tree..');

(* main clustering loop **********************************)
   while (n>=4) do
   begin

      quadruples;

         numgroup:=0; nreduce:=0;
      findgroups;

      for int:=1 to n do use[int]:=true;
      for whichgroup:=1 to numgroup do makenode;

      cleanup

   end;
(* end main clustering loop ******************************)


(* choose a root for the tree (now there are n=3 or n=2 nodes left) *)
if (n=3)
then begin
      numgroup:=1; whichgroup:=1;
      if (height[index[1]]>height[index[2]])
       and (height[index[1]]>height[index[3]])
       then group[1]:=[2]+[3]
      else if (height[index[2]]>height[index[1]])
       and (height[index[2]]>height[index[3]])
       then group[1]:=[1]+[3]
      else group[1]:=[1]+[2];
      for int:=1 to 3 do use[int]:=true;
      makenode;
      cleanup;
     end;

(* now n=2 *)
   makeroot;
   hangroot;
   if not(rooting=asis)
   then begin
          findbestroot;   (* specified rooting or minvarroot *)
          hangroot
        end;
(* end root-choosing section *)
      writeln;


   report;
      writeln('calculating fit statistics'); writeln;
   stats;


close(infile);
close(outfile);

   writeln('end addtree/p run');

end. (* main *)

