(********************* EXTREE  version 1.5 *******************************)
(*                     September 1991                                    *)
(*                                                                       *)
(* EXTREE is a program in the PASCAL language for fitting the extended   *)
(* tree model (Corter & Tversky, 1986) to proximity data. The data may   *)
(* be either similarity or distance data. A listing of parameters and    *)
(* their estimates, drawing of the extended tree, and summary statistics *)
(* are reported. Various other output options are available. A help file *)
(* accompanies this source file. See it for more information on using    *)
(* the program.  This program was written by James E. Corter.            *)
(* Questions or comments regarding the program should be addressed him   *)
(* at the address given below.                                           *)
(*                                                                       *)
(*   Please note that the ADDTREE/P and EXTREE programs are the property *)
(* of James Corter and Amos Tversky. The programs are freely available to*)
(* be used for research or academic purposes, and may be re-distributed  *)
(* for these uses.  However, the programs should not be modified and     *)
(* re-distributed, or resold, or used for any business or commercial     *)
(* purpose without the express written consent of the author.  Persons   *)
(* desiring to use the programs for commercial purposes should contact   *)
(* the author, James Corter, for information on obtaining a version of   *)
(* the programs licensed for such uses, at this address:  James Corter,  *)
(* Box 41, Teachers College, New York NY 10027.  tel.(212) 678-3843.     *)
(*                                                                       *)
(* EXTREE copyright <c> 1982-2014 by James E. Corter                     *)
(*                                                                       *)
(********************* EXTREE  version 1.5 *******************************)


program extree(input,output,infile,outfile);

(* change these constants for bigger datasets, or for different compilers  *)
const maxobject = 32;          (* = maximum number of objects *)
      twiceobject = 64;        (* = 2n = max # of tree nodes *)
      thriceobject = 96;      (* = 3n = max # of tree + marked features *)
      maxsize = 496;           (* = n*(n-1)/2  (number of distances) *)
      linelength = 80;         (* set to local printer linelength *)

type grouptype = set of 1..maxobject;
     matrixarray = array[1..maxsize] of real;
     nodearray = array[1..thriceobject] of real;
     rootingtype = (asis,minvar,specified);

var   prox: matrixarray;      (* raw input data *)
      d: matrixarray;            (* transformed data *)
      c: array[1..maxsize] of integer;
      numobject: integer;      (* number of objects, n *)
      numprox: integer;      (* number of distances (=n*(n-1)/2) *)
      n,size: integer;      (* same as numobject, numprox *)
      proxcode: real;           (* =-1.0 for similarity data, 1.0 for dis*)
      field,decplaces: integer;      (* describes output data fields *)
      addconstant,maxdis,diff,sum,sum1,sum2:  real;
      group: array[1..maxobject] of grouptype;
      numgroup,nreduce: integer;
      numnode,rootnode: integer;
      child: array[1..thriceobject,1..2] of 0..thriceobject;
      leafcount: array[1..thriceobject] of integer;
      height: array[1..thriceobject] of real;
      parent: array[1..thriceobject] of 0..thriceobject;
      length: nodearray;
      index: array[1..thriceobject] of 1..thriceobject;
      use: array[1..thriceobject] of boolean;
      name: array[1..thriceobject,1..20] of char;
      branchset: array[1..thriceobject] of grouptype;
      leafss: nodearray;
      numfeat: integer;
      setzero,zeroleft: integer;
      iteration: integer;
      spillover: array[1..thriceobject] of boolean;
      host: array[1..thriceobject,1..6] of 0..thriceobject;
      printdata,printtransformed,printnumbers,
      specifyorder,specifytree,specifymarked,printpatternm,
       printheight,printtree,printmodeldis,printresiduals: boolean;
            (* output options requests *)
      similarities,lowerhalf: boolean;   (* similarities or distances;
                                   lowerhalf or full matrix *)
      subtractconst: boolean;  (* is data adjusted to EXACTLY
                              satisfy triangle inequality? *)
      rooting: rootingtype; (* three options are available for selection of
                               root: asis=just put root at last two nodes
                                     minvar=find root with min. variance of
                                             root-to-leaf distances
                                     specified=root at specified nodes *)
      rootchild: array[1..2] of integer; (* "children" of root node *)
      linesize: integer;       (* output linelength for crt, etc. *)
      namelength: integer; (* length of longest name *)
      symbol: array[1..thriceobject] of char;
      feed: integer;  (* signals when output is about to run off line *)
      column,fld: integer;    (* hold column & data field info *)
      threshold,  (* threshold for eliminating small marked segmentss *)
      tthreshold,  (*     "      "       "       small tree branches *)
      cthreshold: real; (* relative threshold for defining cliques *)
      numberhosts: array[1..thriceobject] of integer;
      maxspillover,newconstant,rootarclength: real;
      nummarked: integer;
      depthofsearch: integer;
      saveroot,rootd: real;
      nodeposition: nodearray;
      flagzero: boolean;
      i,j,k,l,m,ij,ik,jk,il,jl,kl,q,x,y,int,whichgroup,hold: integer;
      ch: char;
      infile,outfile: text;
      infilnam,outfilnam: string[24];


(***********************************************************************)
(* ijfunction: converts the row and column of a lowerhalf matrix       *)
(* to the ordinal position of that entry in a vector (matrixarray)     *)
(***********************************************************************)
function ijfunction(i:integer; j:integer): integer;
begin
   if (i>j) then ijfunction:=(((i-1)*(i-2)) div 2) + j
   else if (j>i) then ijfunction:=(((j-1)*(j-2)) div 2) + i
   else writeln('error: i=j in ijfunction');
end; (* ijfunction *)



(***********************************************************************)
(* writematrix: write a matrixarray as lowerhalf matrix;               *)
(*   n = size of the matrix,                                           *)
(*   field = number of digits for each entry, and                      *)
(*   decplaces = number of digits after the decimal place              *)
(***********************************************************************)
procedure writematrix(mtx: matrixarray; n,field,decplaces: integer);
   var row,col: integer;
begin
      ij:=0;
   for row:=2 to n do
   begin
      for col:=1 to (row-1) do
      begin
        ij:=ij+1;
        write(outfile,mtx[ij]:field:decplaces);
        if (col=feed) then writeln(outfile);
      end;
      writeln(outfile);
   end;
writeln(outfile); writeln(outfile);
end; (* writematrix *)



(***********************************************************************)
(* qsort: does quicksort of array dd, saves new order in array c       *)
(***********************************************************************)
procedure qsort(var dd: matrixarray; lower,upper: integer);
begin
 if (upper>lower+1)
 then begin
       x:=lower+1; y:=upper;
       while (x<=y) do
          if (dd[c[x]]>=dd[c[lower]])
          then x:=x+1
          else begin
                while (dd[c[y]]<dd[c[lower]])
                  do y:=y-1;
                if (x<y) 
                then begin
                       hold:=c[x]; c[x]:=c[y]; c[y]:=hold;
                       x:=x+1; y:=y-1;
                     end;
               end;
       hold:=c[lower]; c[lower]:=c[y]; c[y]:=hold;
       qsort(dd,lower,y-1);
       qsort(dd,y+1,upper);
      end
 else if (upper=lower+1)      (* dd has only two elements *)
      then if (dd[c[lower]]>dd[c[upper]])
           then begin
                  hold:=c[lower];
                  c[lower]:=c[upper];
                  c[upper]:=hold;
                end;
end; (* qsort *)




(************************************************************************)
(* readin:      read program control information and data               *)
(************************************************************************)
procedure readin;   
var   i,j,k,ij,count: integer;
      dumreal: real;
      ch: char;
      word: array[1..3] of char;


procedure readword;  (* read a word of input control keywords *)
begin
   ch:=' ';
   while ((ch=' ') or (ch=',')) do if not(eoln(infile)) then read(infile,ch);
   i:=0;
   while not((ch=' ') or (ch=',') or (eoln(infile))) do
   begin
      i:=i+1;
      if not(i=1) then read(infile,ch);
      j:=ord(ch);
      if ((j>=65) and (j<=90)) then j:=j+32;
      ch:=chr(j);
      if not(i>3) then word[i]:=ch;
   end;
end; (* readword *)


begin (* readin *)

(* initialize & set defaults *)
printdata:=false; printtransformed:=false; 
printheight:=false; printmodeldis:=false; printresiduals:=false;
printtree:=true; printnumbers:=true; printpatternm:=true;
specifyorder:=false; specifytree:=false; specifymarked:=false;
linesize:=linelength;
rooting:=asis; subtractconst:=true;
cthreshold:=0.5;
threshold:=-9999.9; tthreshold:=-9999.9; nummarked:=-9999; (* reset later *)


(* read analysis & output option line *)
while not (eoln(infile)) do
begin
  readword;
if ((word[1]='d')and(word[2]='a')and(word[3]='t')) then printdata:=true;
if ((word[1]='t')and(word[2]='r')and(word[3]='a')) 
      then printtransformed:=true;
if ((word[1]='m')and(word[2]='o')and(word[3]='d')) then printmodeldis:=true;
if ((word[1]='r')and(word[2]='e')and(word[3]='s')) then printresiduals:=true;
if ((word[1]='n')and(word[2]='o')and(word[3]='t')) then printtree:=false;
if ((word[1]='c')and(word[2]='r')and(word[3]='t')) then linesize:=80;
if ((word[1]='n')and(word[2]='o')and(word[3]='n')) then printnumbers:=false;
if ((word[1]='s')and(word[2]='p')and(word[3]='e')) then specifyorder:=true;
if ((word[1]='n')and(word[2]='o')and(word[3]='s')) 
      then subtractconst:=false;
if ((word[1]='h')and(word[2]='e')and(word[3]='i')) then printheight:=true;
if ((word[1]='t')and(word[2]='r')and(word[3]='e')) then specifytree:=true;
if ((word[1]='m')and(word[2]='a')and(word[3]='r')) then specifymarked:=true;
if ((word[1]='n')and(word[2]='o')and(word[3]='s'))
      then subtractconst:=false;
if ((word[1]='n')and(word[2]='o')and(word[3]='p')) then printpatternm:=false;
if ((word[1]='l')and(word[2]='i')and(word[3]='n')) then read(infile,linesize);
if ((word[1]='t')and(word[2]='h')and(word[3]='r')) then read(infile,threshold);
if ((word[1]='t')and(word[2]='t')and(word[3]='h'))
 then read(infile,tthreshold);
if ((word[1]='r')and(word[2]='h')and(word[3]='o'))
 then read(infile,cthreshold);
if ((word[1]='p')and(word[2]='a')and(word[3]='r'))
     then read(infile,nummarked);
if ((word[1]='m')and(word[2]='i')and(word[3]='n')) then rooting:=minvar;
if ((word[1]='d')and(word[2]='e')and(word[3]='e'))
 then writeln(outfile,'deepest rooting no longer supported: option ignored');
if ((word[1]='r')and(word[2]='o')and(word[3]='o'))
 then begin
      rooting:=specified;
      read(infile,rootchild[1],rootchild[2]);
      end;
end;
readln(infile);

(* if order of leaves is to be specified, then read that (optional) line *)
      count:=0;
if (specifyorder)
then begin
      while not(eoln(infile)) do
       begin count:=count+1; read(infile,i); nodeposition[i]:=count end;
      readln(infile);
     end;

writeln(outfile);
writeln(outfile,'extree analysis (EXTREE version 1.5):');

(* read & write comment line *)
for i:=1 to linelength do
if not(eoln(infile))   
 then begin read(infile,ch); write(outfile,ch) end;
readln(infile); writeln(outfile); writeln(outfile); writeln(outfile);

(* begin reading data description line *)
read(infile,numobject); 
if (numobject>maxobject) then
 writeln(outfile,'error: maximum number of objects allowed is ',maxobject:4);
if (nummarked>maxobject)
 then writeln('error: no more than',maxobject:4,
              ' overlapping features can be specified');
if ((specifyorder) and not(count=numobject))
 then writeln(outfile,'error: optional line specifying order of leaves ',
                  'has wrong number of entries');
readword;
   if ((word[1]='s')and(word[2]='i')and(word[3]='m')) 
   then similarities:=true
   else if ((word[1]='d')and(word[2]='i')and(word[3]='s')) 
      then similarities:=false
   else writeln(outfile,'  can''t tell if similarities or distances');
readword;
   if ((word[1]='l')and(word[2]='o')and(word[3]='w')) 
      then lowerhalf:=true
   else if ((word[1]='f')and(word[2]='u')and(word[3]='l')) 
      then lowerhalf:=false
   else writeln(outfile,'  can''t tell if full or lowerhalf matrix');
if not(eoln(infile)) then read(infile,field) 
           else writeln(outfile,'  can''t find field-width specification');
if not(eoln(infile)) 
 then read(infile,decplaces)
 else writeln(outfile,' can''t find digits-after-decimal-point specification');
readln(infile);
numprox:=(numobject*(numobject-1)) div 2;
feed:= linesize div field;


(* begin reading actual data *)
(* read full matrix, treating as asymmetric; print data if requested *)
if (lowerhalf=false) then
begin
   if (printdata=true) then writeln(outfile,'data matrix:');
   for ij:=1 to maxsize do prox[ij]:=0.0;
   for i:=1 to numobject do
   begin
      for j:=1 to numobject do
      begin
      if (eoln(infile)) then readln(infile);
      read(infile,dumreal); 
      if (printdata) then write(outfile,dumreal: field: decplaces);
      if ((printdata) and (j=feed)) then writeln(outfile);
      if not(i=j) 
      then begin
         ij:=ijfunction(i,j);
         prox[ij]:=prox[ij]+(dumreal/2)
      end;
      end;
      readln(infile); if (printdata) then writeln(outfile);
   end;
writeln(outfile); writeln(outfile,'full matrix symmetrized');
writeln(outfile); writeln(outfile);
end;

(* read lowerhalf matrix *)
if (lowerhalf) then
begin
      ij:=0;
   for i:=2 to numobject do
   begin
      if (eoln(infile)) then readln(infile);
      for j:=1 to (i-1) do
      begin
        ij:=ij+1;
        if (eoln(infile)) then readln(infile);
        read(infile,prox[ij]);
      end;
   end;
   readln(infile);

end;

(* read in names if they are given *)
j:=1; namelength:=0;
while not((eof(infile)) or (j>numobject)) do
begin
    i:=1;
   while not((eoln(infile)) or (i>20)) do
     begin read(infile,name[j,i]); i:=i+1; end;
   k:=i-1;
   if not(k=0) then while (name[j,k]=' ') do k:=k-1;
   if (namelength<k) then namelength:=k;
   if (k<20) then for i:=(k+1) to 20 do name[j,i]:=' ';
   j:=j+1;
   if not(eof(infile)) then readln(infile);
end;

end; (* readin *)



(*************************************************************************)
(* chkmetric: check if proximities satisfy metric axioms, fix if not     *)
(*************************************************************************)
procedure chkmetric;
var i,j,k,ij,ik,jk,xxij,xxjk: integer;
    diff,min: real;
begin

   addconstant:=0.0; min:=0.0; maxdis:=0.0;

(* convert to dissimilarities if necessary *)
if (similarities) 
then begin  
      proxcode:=-1.0; 
      writeln(outfile,'similarities linearly transformed into distances')  
     end
else proxcode:=1.0;

(* check for positivity of distances *)
for ij:=1 to numprox do
begin
   prox[ij]:=proxcode*prox[ij];
   if (prox[ij]<min) then min:=prox[ij];
   if (prox[ij]>maxdis) then maxdis:=prox[ij];
end;
if (min<0.0) then addconstant:=-min;
writeln(outfile); writeln(outfile);
writeln(outfile,'(',addconstant:field:decplaces,
      '  needed for positivity of distances )');

(* check every triple of objects for triangle inequality *)
  min:=maxdis;
      xxij:=1;
   for i:=1 to (numobject-2) do
   begin
      xxij:=xxij+i-1;
      ij:=xxij;
      xxjk:=xxij;
   for j:=(i+1) to (numobject-1) do
   begin
      ij:=ij+(j-2);
      ik:=ij;
      xxjk:=xxjk+j-1;
      jk:=xxjk;
   for k:=(j+1) to (numobject) do
   begin
      ik:=ik+(k-2);
      jk:=jk+(k-2);
      if ((prox[jk]>=prox[ij]) and (prox[jk]>=prox[ik]))
      then diff:=(prox[ij]+prox[ik])-prox[jk]
      else if ((prox[ik]>=prox[ij]) and (prox[ik]>=prox[jk]))
           then diff:=(prox[ij]+prox[jk])-prox[ik]
      else diff:=(prox[ik]+prox[jk])-prox[ij];
      if (diff<min) then min:=diff
      end;
   end;
end;
addconstant:=-min;
if ((subtractconst=false) and (addconstant<=0))
 then writeln(outfile,'triangle inequality already satisfied')
 else begin
      writeln(outfile,' ',addconstant:field:decplaces,
      '  added to exactly satisfy triangle inequality');
      for ij:=1 to numprox do prox[ij]:=prox[ij]+addconstant
      end;

writeln(outfile); writeln(outfile);
end; (* procedure chkmetric *)




(************************************************************************)
(* initialize: initialize variables                                     *)
(************************************************************************)
procedure initialize;
  var i,ij: integer;
begin
n:=numobject;
size:=numprox;
numnode:=numobject;
depthofsearch:=3*numobject;
iteration:=0;
for ij:=1 to numprox do d[ij]:=prox[ij];

(* these symbols are used to label marked feature segments in extended tree *)
symbol[1]:='C'; symbol[2]:='D'; symbol[3]:='E'; symbol[4]:='H';
symbol[5]:='I'; symbol[6]:='N'; symbol[7]:='O'; symbol[8]:='U';
symbol[9]:='X'; symbol[10]:='Z'; symbol[11]:='A'; symbol[12]:='D';
symbol[13]:='G'; symbol[14]:='J'; symbol[15]:='K'; symbol[16]:='L';
symbol[17]:='M'; symbol[18]:='S'; symbol[19]:='T'; symbol[20]:='V';
symbol[21]:='a'; symbol[22]:='b'; symbol[23]:='c'; symbol[24]:='d';
symbol[25]:='e'; symbol[26]:='f'; symbol[27]:='g'; symbol[28]:='h';
symbol[29]:='i'; symbol[30]:='j'; symbol[31]:='j'; symbol[32]:='k';
symbol[33]:='l'; symbol[34]:='m'; symbol[35]:='n'; symbol[36]:='o';
symbol[37]:='p'; symbol[38]:='q'; symbol[39]:='r'; symbol[40]:='s';

for i:=1 to numobject do
begin
   use[i]:=true;
   index[i]:=i;
   height[i]:=0.0;
   branchset[i]:=[i];
   child[i,1]:=0; child[i,2]:=0;
   leafcount[i]:=1;
   leafss[i]:=0.0;
   if not(specifyorder) then nodeposition[i]:=i
end;
for i:=(numobject+1) to thriceobject do
begin
   height[i]:=0.0;
   branchset[i]:=[];
   leafcount[i]:=0;
   leafss[i]:=0.0;
end;
(* check values of certain parameters *)
if specifytree then writeln(outfile,'user-specified tree structure');
if specifymarked then writeln(outfile,'user-specified marked features');
if (linesize>linelength) then begin linesize:=linelength;
writeln(outfile,'warning: linesize must be <= ',linelength:3,' (resetting it)')
  end;
if (nummarked=-9999) then nummarked:=(numobject div 2);
if (cthreshold<0.0) then cthreshold:=0.0;
if (cthreshold>1.0) then cthreshold:=1.0;
if (threshold=-9999.9)
 then threshold:=(maxdis+addconstant)/50.0
 else if (threshold<0.0)
      then begin threshold:=0.0;
            writeln(outfile,'warning: threshold must be >= 0.0 (resetting it)')
           end;
if (tthreshold=-9999.9) then tthreshold:=threshold
  else if (tthreshold<0.0) then tthreshold:=0.0;
end; (* initialize *)



(************************************************************************)
(* quadruples: check every quadruple of objects, finding the pairs that *)
(*             are closest; increment their "neighbor score", c         *)
(************************************************************************)
procedure quadruples;
   var i,j,k,l,ij,ik,jk,il,jl,kl: integer;
       xxij,xxjk,xxkl: integer;
       sum1,sum2,sum3: real;
begin

   for ij:=1 to size do c[ij]:=0;

(* check every quadruple of objects *)
      xxij:=1;
   for i:=1 to (n-3) do
   begin
      xxij:=xxij+i-1;
      ij:=xxij;
      xxjk:=xxij;

   for j:=(i+1) to (n-2) do
   begin
      ij:=ij+(j-2);
      ik:=ij;
      xxjk:=xxjk+j-1;
      jk:=xxjk;
      xxkl:=xxjk;

   for k:=(j+1) to (n-1) do
   begin
      ik:=ik+(k-2);
      jk:=jk+(k-2);
      il:=ik;
      jl:=jk;
      xxkl:=xxkl+k-1;
      kl:=xxkl;

   for l:=(k+1) to n do
   begin
      il:=il+(l-2);
      jl:=jl+(l-2);
      kl:=kl+(l-2);

      sum1:=d[ij]+d[kl];
      sum2:=d[ik]+d[jl];
      sum3:=d[il]+d[jk];

(* object pairs in smallest of these sums receive a nearest-neighbor
score of 2,those in the next smallest 1, those in largest 0  *)

      if (sum1<sum2) then begin c[ij]:=c[ij]+1; c[kl]:=c[kl]+1 end
       else if (sum2<sum1) then begin c[ik]:=c[ik]+1; c[jl]:=c[jl]+1 end;
      if (sum1<sum3) then begin c[ij]:=c[ij]+1; c[kl]:=c[kl]+1 end
       else if (sum3<sum1) then begin c[il]:=c[il]+1; c[jk]:=c[jk]+1 end;
      if (sum2<sum3) then begin c[ik]:=c[ik]+1; c[jl]:=c[jl]+1 end
       else if (sum3<sum2) then begin c[il]:=c[il]+1; c[jk]:=c[jk]+1 end;

   end;
   end;
   end;
   end;

end; (* quadruples *)



(************************************************************************)
(* findgroups: using the matrix of neighbor scores, c, find those pairs *)
(*             of objects that are mutual nearest-neighbors.  Resolve   *)
(*             ties and non-reciprocity by least-squares test in        *)
(*             "findcloserneighbor"                                     *)
(************************************************************************)
procedure findgroups;
var  i,j,k,ij:  integer;
     rowsumdistance: array[1..maxobject] of real;
     neighbors: array[1..maxobject] of integer;
     pair: array[1..maxsize,1..2] of integer;
     goodone: array[1..maxsize] of boolean;
     holdcount,neighbor,com,oth1,oth2,new,numnn,numnew,rowmaxcount,
       maxloser,maxgood: integer;
     pairi,pairj,common,otheri,otherj: grouptype;


(* chooses one neighbor as closer; returns neighbor & neighbordistance *)
procedure findcloserneighbor(obj,n1,n2:integer);
   var pair1,pair2,pair12: integer;
       n1distance,n2distance: real;
       losercount: integer;
begin
   pair1:=ijfunction(obj,n1);
   pair2:=ijfunction(obj,n2);
   pair12:=ijfunction(n1,n2);
   n1distance:=d[pair1]+((rowsumdistance[n2]-d[pair2]-d[pair12])/(n-3));
   n2distance:=d[pair2]+((rowsumdistance[n1]-d[pair1]-d[pair12])/(n-3));
   if (n1distance<n2distance)
    then begin 
         neighbor:=n1; 
         losercount:=c[pair2];
       end
    else begin 
         neighbor:=n2;
         losercount:=c[pair1];
       end;
if (losercount>maxloser) then maxloser:=losercount;
end;  (* findcloserneighbor *)


begin (* findgroups *)

(* find nearest-neighbors of each object & calc. rowsumdis. for tiebreaking *)
     holdcount:=0;
for i:=1 to n do
begin
   rowmaxcount:=-1;
   rowsumdistance[i]:=0.0;
   for j:=1 to n do
   if not(i=j)
   then begin
          ij:=ijfunction(i,j);
          rowsumdistance[i]:=rowsumdistance[i]+d[ij];
          if (c[ij]>rowmaxcount)
          then begin rowmaxcount:=c[ij];
                  numnn:=1;
                  neighbors[1]:=j;
            end
          else if (c[ij]=rowmaxcount)
          then begin
                  numnn:=numnn+1;
                  neighbors[numnn]:=j;
               end;
        end; (* loop on j *)

   (* check if any pair made from a new neighbor of i is already on list *)
   for new:=1 to numnn do
   begin
        goodone[new]:=true;
      for k:=1 to holdcount do
       if (([i]+[neighbors[new]])=([pair[k,1]]+[pair[k,2]]))
       then goodone[new]:=false;
   end;

        numnew:=0;
   for new:=1 to numnn do
   begin
      if (goodone[new])
      then begin
              numnew:=numnew+1;
              pair[holdcount+numnew,1]:=i;
              pair[holdcount+numnew,2]:=neighbors[new];
           end;
   end;

   holdcount:=holdcount+numnew;
end; (* loop on each object, i *)

   for i:=1 to holdcount do goodone[i]:=true;
   maxloser:=0;

(* compare every pair of pairs with a common member to see which is better *)
for j:=2 to holdcount do
for i:=1 to (j-1) do
begin
   pairi:=[pair[i,1]]+[pair[i,2]];
   pairj:=[pair[j,1]]+[pair[j,2]];
   common:= pairi*pairj;

   if (common<>[])   (* if pairs overlap, then resolve by l.s. test *)
   then begin

         otheri:=pairi-common;
         otherj:=pairj-common;
         for int:=1 to n do
         begin
            if (int in common) then com:=int;
            if (int in otheri) then oth1:=int;
            if (int in otherj) then oth2:=int;
         end;

         findcloserneighbor(com,oth1,oth2);

         if (neighbor=oth1)
          then goodone[j]:=false
          else goodone[i]:=false;

       end;
end; (* loop on pairs j,i of object pairs *)
(* now each object i should have only a single nearest neighbor *)

   numgroup:=0; maxgood:=0;
for i:=1 to holdcount do
begin
    ij:=ijfunction(pair[i,1],pair[i,2]);
  if (goodone[i] and (c[ij]>maxgood)) then maxgood:=c[ij];
  if ((goodone[i]) and (c[ij]>=maxloser))
  then begin
         numgroup:=numgroup+1;
         group[numgroup]:=[pair[i,1]]+[pair[i,2]];
       end;
end;
if (numgroup=0) (* if no good pair was > maxloser, take max good one(s) *)
then for i:=1 to holdcount do
if goodone[i]
then begin
       ij:=ijfunction(pair[i,1],pair[i,2]);
       if (c[ij]=maxgood)
       then begin
              numgroup:=numgroup+1;
              group[numgroup]:=[pair[i,1]]+[pair[i,2]]
            end;
     end;

end; (* findgroups *)



(*************************************************************************)
(* makenode: using list of groups from "findgroups", make new nodes from *)
(*           groups.  Compute length of arcs between new node and its    *)
(*           children, and define distances from new node as weighted    *)
(*           average of distances from children.                         *)
(*************************************************************************)
procedure makenode;
var  i,j,k,ij,node,holdcount,sumcount: integer;
     nodegroup: array[1..2] of integer;  (* holds pair being made into node *)



procedure estimate;   (* compute arc lengths *)
var      sum,avedis,diff: array[1..2] of real;
      dis,avelength: real;
      i,j,holdcount: integer;
begin
ij:=ijfunction(nodegroup[1],nodegroup[2]);
dis:=d[ij];
for i:=1 to 2 do
begin
   holdcount:=0; sum[i]:=0.0;
   for j:=1 to n do
   if (use[j]) then
   begin
      ij:=ijfunction(nodegroup[i],j);
      sum[i]:=sum[i]+leafcount[index[j]]*d[ij];
      holdcount:=holdcount+leafcount[index[j]];
   end;
   avedis[i]:=sum[i]/holdcount;
end;

avelength:=(avedis[1]+avedis[2])/2;
for i:=1 to 2 do
begin
   diff[i]:=avedis[i]-avelength;
   length[index[nodegroup[i]]]:=dis/2+diff[i]-height[index[nodegroup[i]]];
   if (length[index[nodegroup[i]]]<0.0) then length[index[nodegroup[i]]]:=0.0;
end;

end; (* estimate *)



begin   (* makenode *)
   numnode:=numnode+1;
   node:=numnode;
   k:=0;
   for i:=1 to n do
      if (i in group[whichgroup]) 
      then begin k:=k+1;
            use[i]:=false;
            parent[index[i]]:=node;
            nodegroup[k]:=i
         end;
if not(k=2)
 then writeln(outfile,'error: wrong number of children: node',node:4);

for i:=1 to k do 
begin
  leafcount[node]:=leafcount[node]+leafcount[index[nodegroup[i]]];
  branchset[node]:=branchset[node]+branchset[index[nodegroup[i]]];
end;
   

(* order children according to specified or input order *)
if (nodeposition[index[nodegroup[2]]]<nodeposition[index[nodegroup[1]]])
then begin
      child[node,1]:=index[nodegroup[2]]; 
      child[node,2]:=index[nodegroup[1]]
     end
else begin 
      child[node,1]:=index[nodegroup[1]]; 
      child[node,2]:=index[nodegroup[2]]
     end;

   
estimate;


(* calculate mean length of leaves under node ("height") *)
 sum1:=height[child[node,1]]+length[child[node,1]];
 sum2:=height[child[node,2]]+length[child[node,2]];
 height[node]:=((sum1*leafcount[child[node,1]])
              +(sum2*leafcount[child[node,2]])) / leafcount[node];

(* calculate sum of squares due to leaf-length for later choosing of root *)
leafss[node]:=leafss[child[node,1]]+leafss[child[node,2]]
           +leafcount[child[node,1]]*sqr(height[node]-sum1)
           +leafcount[child[node,2]]*sqr(height[node]-sum2);

(* calculate average y-position of node for printing *)
nodeposition[node]:=(nodeposition[child[node,1]]*leafcount[child[node,1]]
                  +nodeposition[child[node,2]]*leafcount[child[node,2]])
                / leafcount[node];

(* recompute distances to other nodes *)
   for i:=1 to n do
   if (use[i]) then 
   begin
      sumcount:=0; sum:=0.0;
      for j:=1 to 2 do
      begin
      ij:=ijfunction(i,nodegroup[j]);
      holdcount:=leafcount[index[nodegroup[j]]]*leafcount[index[i]];
      sum:=sum+holdcount*d[ij];
      sumcount:=sumcount+holdcount;
      end;
      ij:=ijfunction(i,nodegroup[1]);
      d[ij]:=sum/sumcount;
   end;


   use[nodegroup[1]]:=true;
   index[nodegroup[1]]:=node;
   nreduce:=nreduce+1;

     write('.');
end; (* makenode *)





(************************************************************************)
(* cleanup: collapse d matrix & prepare for next iteration            *)
(************************************************************************)
procedure cleanup;
   var  i,j,k,ij,nij: integer;
begin

  (* repack d matrix *)
    ij:=0; nij:=0;
  for j:=2 to n do
     for i:=1 to (j-1) do
     begin
        ij:=ij+1;
        if((use[i]) and (use[j]))
        then begin
          nij:=nij+1;
          d[nij]:=d[ij]
        end;
     end;
  
  for i:= (n-1) downto 1 do
        if (use[i]=false) then for k:=i to (n-1) do index[k]:=index[k+1];

   n:=n-nreduce;
   size:=(n*(n-1)) div 2;

end; (* cleanup *)




(************************************************************************)
(* makeroot: make a fake node, rootnode, with the last two nodes as its *)
(*           children.  This node is of degree two.                  *)
(************************************************************************)
procedure makeroot;     (* when n=2 *)
var i: integer;
begin
   rootd:=d[1];
   numnode:=numnode+1;
   rootnode:=numnode;
   parent[rootnode]:=0;
   length[rootnode]:=0.0;
   leafcount[rootnode]:=numobject;
      branchset[rootnode]:=[];
   for i:=1 to 2 do
   begin
      parent[index[i]]:=rootnode;
      branchset[rootnode]:=branchset[rootnode]+branchset[index[i]];
   end;
if (nodeposition[index[2]]<nodeposition[index[1]])
 then begin child[rootnode,2]:=index[1]; child[rootnode,1]:=index[2] end
 else begin child[rootnode,1]:=index[1]; child[rootnode,2]:=index[2] end;
rootarclength:=rootd-height[child[rootnode,1]]-height[child[rootnode,2]];
if (rootarclength<0.0) then rootarclength:=0.0;
end; (* makeroot *)


procedure hangroot; (* hang root at correct point on root arc *)
begin
   diff:=height[child[rootnode,1]]-height[child[rootnode,2]];
   if (diff>rootarclength) then diff:=rootarclength;
   if (diff<-rootarclength) then diff:=-rootarclength;
   length[child[rootnode,1]]:=(rootarclength-diff)/2.0;
   length[child[rootnode,2]]:=(rootarclength+diff)/2.0;
   sum1:=height[child[rootnode,1]]+length[child[rootnode,1]];
   sum2:=height[child[rootnode,2]]+length[child[rootnode,2]];
   height[rootnode]:=((sum1*leafcount[child[rootnode,1]])
              +(sum2*leafcount[child[rootnode,2]])) / leafcount[rootnode];
   leafss[rootnode]:=leafss[child[rootnode,1]]+leafss[child[rootnode,2]]
           +leafcount[child[rootnode,1]]*sqr(height[rootnode]-sum1)
           +leafcount[child[rootnode,2]]*sqr(height[rootnode]-sum2);
if (rooting=minvar)
then begin
       writeln(outfile);
       writeln(outfile,'var. of dis. from root to leaves =',
               leafss[rootnode]:9:4);
     end;
if (rootarclength>0.0)
  then saveroot:=length[child[rootnode,1]]/rootarclength
  else saveroot:=0.0;
end; (* hangroot *)



(************************************************************************)
(* findbestroot: find optimal rooting of the tree                       *)
(*               (for rooting=minvar or rooting=specified)              *)
(************************************************************************)
procedure findbestroot;
var parentlength,parentheight,parentss,minscore,psum,bsum,othersum,fooheight,
     newparentheight,newparentss,foo1,foo2: real;
    score: nodearray;
    parentcount,newparentcount,goodnode,badnode,goodchild,badchild,
     ch1,ch2: integer;
    foundroot: boolean;


procedure checkchild(gp,bp,nde,othernde: integer);
begin
if ((nde>0) and (othernde>0))
then begin

if (rooting=specified)
then if ((branchset[rootchild[1]]*branchset[nde]<>[])
         and (branchset[rootchild[2]]*branchset[nde]<>[]))
     then score[nde]:=-1.0 else score[nde]:=0.0;

if (rooting=minvar)
then begin
  psum:=parentheight+parentlength;
  othersum:=height[othernde]+length[othernde];
  newparentcount:=parentcount+leafcount[othernde];
  newparentheight:=((psum*parentcount) + (othersum*leafcount[othernde]))
                    / newparentcount;
  newparentss:=parentss+leafss[othernde]
            +(parentcount*sqr(psum-newparentheight))
            +(leafcount[othernde]*sqr(othersum-newparentheight));

  diff:=height[nde]-newparentheight;
  if (diff>length[nde]) then diff:=length[nde];
  if (diff<-length[nde]) then diff:=-length[nde];
  foo1:=(length[nde]-diff)/2.0;
  foo2:=(length[nde]+diff)/2.0;
  sum1:=height[nde]+foo1;
  sum2:=newparentheight+foo2;
  fooheight:=((sum1*leafcount[nde])+(sum2*newparentcount))/numobject;
  score[nde]:=leafss[nde]+newparentss
          +leafcount[nde]*sqr(fooheight-sum1)
          +newparentcount*sqr(fooheight-sum2);
  writeln(outfile,'node',nde:3,'  score=',score[nde]:8:3);
end; (* minvar section *)

if (score[nde]<minscore)
then begin
        minscore:=score[nde];
        goodnode:=gp; badnode:=bp; goodchild:=nde; badchild:=othernde
     end;
end;
end; (* checkchild *)


procedure changeroot(gp,bp,nde,othernde: integer);
begin
writeln(outfile,'changeroot(',gp:3,' =>',nde:3,')');
parent[nde]:=rootnode;
parent[bp]:=gp;
length[bp]:=rootarclength;
branchset[gp]:=branchset[bp]+branchset[othernde];
leafcount[gp]:=leafcount[bp]+leafcount[othernde];
height[gp]:=((leafcount[bp]*(height[bp]+length[bp]))
             +(leafcount[othernde]*(height[othernde]+length[othernde])))
             / leafcount[gp];
othersum:=height[bp]+rootarclength;
bsum:=height[othernde]+length[othernde];
leafss[gp]:=leafss[bp]+leafss[othernde]
             +(leafcount[bp]*sqr(othersum-height[gp]))
             +(leafcount[othernde]*sqr(bsum-height[gp]));
nodeposition[gp]:=((leafcount[bp]*nodeposition[bp])
                +(leafcount[othernde]*nodeposition[othernde])) / leafcount[gp];
if (nodeposition[gp]<nodeposition[nde])
 then begin child[rootnode,1]:=gp; child[rootnode,2]:=nde end
 else begin child[rootnode,1]:=nde; child[rootnode,2]:=gp end;
if (nodeposition[bp]<nodeposition[othernde])
 then begin child[gp,1]:=bp; child[gp,2]:=othernde end
 else begin child[gp,1]:=othernde; child[gp,2]:=bp end;
if (rooting=minvar)
 then begin
        score[gp]:=score[rootnode];
        score[rootnode]:=score[nde];
      end
 else begin  (* if (rooting=specified) *)
        score[gp]:=0.0;
        score[rootnode]:=0.0;
      end;
rootarclength:=length[nde];
end; (* changeroot *)



begin (* findbestroot *)

  foundroot:=false;
  if (rooting=specified) then score[rootnode]:=0.0
    else if (rooting=minvar) then score[rootnode]:=leafss[rootnode];
repeat
  minscore:=score[rootnode];
  parentlength:=rootarclength;
  ch1:=child[rootnode,1];
  ch2:=child[rootnode,2];

  parentcount:=leafcount[ch2];
  parentheight:=height[ch2];
  parentss:=leafss[ch2];

if not(ch1<=numobject)
then begin
        checkchild(ch1,ch2,child[ch1,1],child[ch1,2]);
        checkchild(ch1,ch2,child[ch1,2],child[ch1,1]);
     end;

  parentcount:=leafcount[ch1];
  parentheight:=height[ch1];
  parentss:=leafss[ch1];

if not(ch2<=numobject)
then begin
        checkchild(ch2,ch1,child[ch2,1],child[ch2,2]);
        checkchild(ch2,ch1,child[ch2,2],child[ch2,1]);
     end;

if (minscore>=score[rootnode])
 then foundroot:=true
 else changeroot(goodnode,badnode,goodchild,badchild);

until (foundroot);

end; (* findbestroot *)




(*********************************************************************)
(* report: describe nodes, tree structure, and draw tree             *)
(*********************************************************************)
procedure report;


procedure drawtree;
const vertchar = '|';    (* characters used to draw tree; change if desired *)
      horizchar = '-';
var   start,stop,maxstop,linestop,middle,node,numleaf: integer;
      maxxposition,stretch:  real;
      line: array[1..linelength] of char;
      xposition:  array[1..thriceobject] of real;
      yposition:  array[1..thriceobject] of integer;
      topkid,bottomkid: array[1..thriceobject] of integer;
      writej: boolean;




(* descend calculates:
      vertical position of a branch ("yposition")
      horizontal position of branch ("xposition")
      endpoints of vertical connecting lines ("topkid","bottomkid")  *)
procedure descend(nde: integer);
var i: integer;
begin

   if not(nde=rootnode)
   then xposition[nde]:=xposition[parent[nde]]+length[nde];

   if (nde<=numobject)            (* if nde is a leaf *)
   then begin
         numleaf:=numleaf+1;
         yposition[nde]:=(numleaf*2)-1
      end
   else begin                  (* if nde is an interior node *)
         for i:=1 to 2 do
         begin
            descend(child[nde,i]);
            yposition[nde]:=yposition[nde]+
                (yposition[child[nde,i]]*leafcount[child[nde,i]]);
         end;
         yposition[nde]:=yposition[nde] div leafcount[nde];
         topkid[nde]:=yposition[child[nde,1]];
         bottomkid[nde]:=yposition[child[nde,2]]
      end;

end; (* descend *)




begin  (* drawtree *)

   xposition[rootnode]:=0.0; numleaf:=0;
   for i:=(numobject+1) to numnode do yposition[i]:=0;


   descend(rootnode);


   if not(numleaf=numobject)
      then writeln(outfile,'error in tree structure:  numleaf=',numleaf);
   maxxposition:=0.0;
   for i:=1 to numobject do
     if (xposition[i]>maxxposition) then maxxposition:=xposition[i];
   maxstop:=(linesize-namelength-7);
   if ((printpatternm=true) and (numfeat>numnode))
   then begin
        maxstop:=maxstop-2-(2*nummarked);
        for i:=1 to nummarked do if not(use[numnode+i])
                                 then maxstop:=maxstop+2;
        for i:=1 to (linesize-15) do write(outfile,' ');
        writeln(outfile,'marked feature');
        for i:=1 to (linesize-15) do write(outfile,' ');
        writeln(outfile,'pattern matrix');
        for i:=1 to (linesize-15) do write(outfile,' ');
        writeln(outfile,'--------------');
        writeln(outfile);
      end;
   stretch:=maxstop/maxxposition;


(* begin loop on printer lines *)
   for y:=1 to ((2*numobject)-1) do
   begin
      linestop:=0;
      for i:=1 to linesize do line[i]:=' ';


      for node:=1 to numnode do
      begin


(* place tree features *)
      if (yposition[node]=y)
      then begin
            if (parent[node]<>0)
               then start:=trunc(stretch*xposition[parent[node]])+1
               else start:=1;
            stop:=trunc(stretch*xposition[node])+1;
            if (stop>linestop) then linestop:=stop;
            if (start<=stop)
             then for x:=start to stop do line[x]:=horizchar;


(* place marked features *)
            if (numfeat>numnode)
            then begin
               for j:=1 to (numfeat-numnode) do
               for k:=1 to numberhosts[j] do
               if (host[j,k]=node)
               then begin
                     if (length[j+numnode]>0.0)
                      then middle:=start+trunc(stretch*length[j+numnode])
                      else middle:=start;
                     if (use[j+numnode]=true)
                     then for x:=start to middle do
                           if (x<=maxstop) then line[x]:=symbol[j];
                     if (length[j+numnode]>0.0) then start:=middle+1;
                    end; (* loop when host=node *)

              end;  (* marked feature section *)
      end;  (* loop that is performed only when yposition[node] equals y *)

(* place vertical connecting lines *)
      if (node>numobject)
      then if ((y>topkid[node]) and (y<bottomkid[node]))
           then begin
                   stop:=trunc(stretch*xposition[node])+1;
                   if (stop>linestop) then linestop:=stop;
                   line[stop]:=vertchar;
                end;

   end; (* loop on node *)

(* write out computed line *)
   for i:=1 to linestop do if not(i>maxstop) then write(outfile,line[i]);
   if ((numfeat>numnode) and not(odd(y)) and printpatternm)
   then begin
          for i:=1 to (maxstop-linestop+4+namelength) do write(outfile,' ');
          if (printnumbers=true) then write(outfile,'    ');
          for i:=1 to nummarked do
            if (use[numnode+i]) then write(outfile,' .');
        end;
   for i:=1 to numobject do
   if (yposition[i]=y)
   then begin
          if (printnumbers=true) then write(outfile,i:4);
          write(outfile,'  ');
          for j:=1 to namelength do write(outfile,name[i,j]);
          if ((numfeat>numnode) and (printpatternm))
          then begin
                 write(outfile,'  ');
                 if (maxstop>linestop)
                  then for j:=1 to (maxstop-linestop) do write(outfile,' ');
                 for j:=1 to (numfeat-numnode) do
                 begin
                  writej:=false;
                  for k:=1 to numberhosts[j] do
                  if ((i in branchset[host[j,k]]) and (use[numnode+j]=true))
                   then writej:=true;
                  if (writej=true)
                   then write(outfile,' ',symbol[j])
                   else if (use[numnode+j]=true) then write(outfile,' .');
                 end;
               end;
        end; (* loop on i *)
      writeln(outfile);
   end;
writeln(outfile); writeln(outfile);

end; (* drawtree *)


begin  (* report *)

writeln(outfile);
writeln(outfile,' node     length    children    label'); writeln(outfile);
for i:=1 to numfeat do
begin
   write(outfile,i:4,'    ');
   write(outfile,length[i]:field:decplaces,'   ');
   if (i<=numobject)             (* if i is a leaf *)
   then begin
          write(outfile,'                ');
          for j:=1 to 20 do write(outfile,name[i,j])
        end
   else if (i<=numnode)         (* if i is an internal tree node *)
        then for j:=1 to 2 do write(outfile,child[i,j]:4)
   else begin                   (* if i is a marked feature *)
        k:=i-numnode;
          for j:=1 to numberhosts[k] do
             write(outfile,host[k,j]:4);
          write(outfile,'      "',symbol[k],'"');
        end;
   writeln(outfile);
end;
writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile);

if (printtree=true) then drawtree;

end; (* report *)



(***************************************************************************)
(* stats: calculate stress and r-squared between data & model distances    *)
(***************************************************************************)
procedure stats;
var modeldis: matrixarray;


procedure derivedistances;
begin
      ij:=0;
for i:=2 to numobject do
begin
   for j:=1 to (i-1) do
   begin
      ij:=ij+1;
      modeldis[ij]:=0.0;
      for k:=1 to numnode do
      if (((i in branchset[k]) and not(j in branchset[k]))
          or ((j in branchset[k]) and not(i in branchset[k])))
      then modeldis[ij]:=modeldis[ij]+length[k];
if (numfeat>numnode)
then for k:=1 to (numfeat-numnode) do
  for l:=1 to (numberhosts[k]-1) do
  for m:=(l+1) to numberhosts[k] do
      if (((i in branchset[host[k,l]]) and (j in branchset[host[k,m]]))
        or((j in branchset[host[k,l]]) and (i in branchset[host[k,m]])))
      then modeldis[ij]:=modeldis[ij]-2.0*length[k+numnode];
end;
end;
end; (* derivedistances *)


procedure lsmonotransform(var modeldis,d: matrixarray);
var i,j,place,ordered:  integer;
    bin:  real;
   endcondition: integer;
begin
repeat
ordered:=1;
   i:=1;
   repeat
      if (d[c[i+1]]>d[c[i]])
      then begin
        ordered:=0;
      endcondition:=0;
        bin:=d[c[i]];
        place:=i;
        repeat
          place:=place+1;
        bin:=bin+d[c[place]];
        if (place=numprox) then endcondition:=1
                  else if (d[c[place+1]]<(bin/(place-i+1)))
                       then endcondition:=1;
        until (endcondition=1);
        for j:=i to place do d[c[j]]:=(bin/(place-i+1));
        i:=place
      end;
      i:=i+1
   until (i>=numprox);
until (ordered=1);
end;  (* lsmonotransform *)


procedure stresscalc(modeldis,d: matrixarray);
var i: integer;
    stress,stress1,stress2,denom,
    varn,varob,nmean,obmean,propor,rmonsq: real;
begin
      stress:=0.0; denom:=0.0; 
      varn:=0.0; varob:=0.0; 
      nmean:=0.0; obmean:=0.0; propor:=0.0; 
   for i:=1 to numprox do
   begin
      obmean:=obmean+prox[i];
      nmean:=nmean+modeldis[i]
      end;
   nmean:=nmean/numprox;
   obmean:=obmean/numprox;
   for i:=1 to numprox do
   begin
     stress:=stress+sqr(d[i]-modeldis[i]);
     propor:=propor+(prox[i]*modeldis[i]);
     denom:=denom+sqr(modeldis[i]);
     varn:=varn+sqr(modeldis[i]-nmean);
     varob:=varob+sqr(prox[i]-obmean);
   end;
   stress1:=sqrt(stress/denom);
   stress2:=sqrt(stress/varn);
   rmonsq:=1.0-(stress/varn);
   varn:=varn/numprox;  varob:=varob/numprox;
   propor:=(sqr(propor/numprox - nmean*obmean))/(varn*varob);
   writeln(outfile,'stress formula 1 =',stress1:7:4);
   writeln(outfile,'stress formula 2 =',stress2:7:4);
   writeln(outfile,'r(monotonic) squared=',rmonsq:6:4);
   writeln(outfile,'r-squared (p.v.a.f.)=',propor:6:4);
   writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile);
end; (* stresscalc *)


begin (* stats *)

   derivedistances;
(* re-use arrays c & d for lsmt transform of model distances *)
     for ij:=1 to numprox do begin d[ij]:=modeldis[ij]; c[ij]:=ij end;
   qsort(prox,1,numprox);
   lsmonotransform(modeldis,d);
   stresscalc(modeldis,d);

(* now re-use array d to store (untransformed) tree distances *)
for ij:=1 to numprox do d[ij]:=modeldis[ij];

end; (* stats *)



(****************************************************************************)
(* beginning of extree section -- look for marked features                  *)
(****************************************************************************)
(* findfeatures: check all possible marked features, save those > threshold *)
(****************************************************************************)
procedure findfeatures;
var  est: matrixarray;  (* use to store estimates of marked segments *)
     row,col: array[1..maxsize] of 1..twiceobject;
     leaf,ileaf,jleaf,icount,jcount,count,branch1,branch2: integer;
     ileaflist,jleaflist: array[1..maxobject] of 1..maxobject;
     avedisij,avedisik,avedisil,avedisjk,avedisjl,diskl: real;
     treedisij,treedisik,treedisil,treedisjk,treedisjl,treediskl: real;
     node,othernode,nmarked,hold,pq,ii,jj,iijj: integer;
     sum,sum1,sum2,sum3,diff,estimate,
     treesum,treesum1,treesum2,treesum3: real;
     nerf: array[1..2] of integer;



procedure findbestset;
type intarray = array[0..thriceobject] of integer;
var  useit: array[1..thriceobject] of boolean;
     connected: array[1..thriceobject,1..thriceobject] of boolean;
     all,compsub: intarray;
     weight: array[1..thriceobject] of real;
     feature,group,othergroup,clique,r,cc,nc,ig,io,nh: integer;
     groupset,othergroupset: grouptype;
     hght1,hght2,test: real;


(* cliques is the recursive part of the clique-finding algorithm *)
(* uses Bron-Kerbosch algorithm (Alg. 457, CACM) *)
procedure cliques(old: intarray; ne,ce: integer);
var new: intarray;
nod, fixp: integer;
    newne,newce,i,j,count,pos,p,s,sel,minnod: integer;
    loc: integer;

begin
  minnod:=ce; i:=0; nod:=0;
(* determine each counter value and look for minimum *)
  while ((i<ce) and (minnod<>0)) do
  begin
    i:=i+1;
    p:=old[i];
    count:=0;
    j:=ne;
(* count points not connected to p *)
    while ((j<ce) and (count<minnod)) do
    begin
      j:=j+1;
      if not (connected[p,old[j]])
       then begin
              count:=count+1;
(* save position of potential candidate *)
              pos:=j;
            end;
    end;
(* test new minimum *)
    if (count<minnod)
    then begin
           fixp:=p;
           minnod:=count;
           if (i<=ne) then s:=pos
             else begin
                    s:=i;
                    nod:=1
                  end;
         end; (* new minimum *)
  end; (* loop on i *)

(* backtrackcycle *)
  for nod:=(minnod+nod) downto 1 do
  begin
(* interchange *)
    p:=old[s];
    old[s]:=old[ne+1];
    sel:=p;
    old[ne+1]:=p;
(* fill new set "not" *)
    newne:=0; i:=0;
    while (i<ne) do
    begin
      i:=i+1;
      if connected[sel,old[i]]
      then begin
             newne:=newne+1;
             new[newne]:=old[i]
           end;
    end;
(* fill new set "candidates" *)
    newce:=newne;
    i:=ne+1;
    while (i<ce) do
    begin
      i:=i+1;
      if connected[sel,old[i]]
      then begin
             newce:=newce+1;
             new[newce]:=old[i]
           end;
  end;
(* add to "compsub" *)
  cc:=cc+1;
  compsub[cc]:=sel;
  if ((newce=0) and (cc>2)) (* report only cliques with more than 2 members *)
  then begin
          clique:=clique+1;
          write(outfile,' clique = ');
          for loc:=1 to cc do
          begin
            host[clique,loc]:=compsub[loc];
            write(outfile,compsub[loc]:3)
          end;
          numberhosts[clique]:=cc;
          writeln(outfile)
       end
   else if ((newce<>0) and (newne<newce)) then cliques(new,newne,newce);
(* remove from "compsub" *)
  cc:=cc-1;
(* add to "not" *)
  ne:=ne+1;
  if (nod>1)
  then begin
(* select a candidate not connected to the fixed point *)
         s:=ne;
         repeat s:=s+1 until not(connected[fixp,old[s]]);
       end;
  end; (* backtrackcycle *)
end; (* procedure cliques *)


begin  (* procedure findbestset *)
  for i:=1 to depthofsearch do useit[i]:=true;

(* throw out "false" children & parents *)
if (depthofsearch>=2) then
 for group:=1 to (depthofsearch-1) do
  for othergroup:=(group+1) to depthofsearch do
  begin
      ig:=c[group]; io:=c[othergroup];
    if (row[ig]=row[io])
    then if (((parent[col[ig]]=col[io]) and (est[io]<(0.5*est[ig])))
             or (col[ig]=parent[col[io]])) then useit[othergroup]:=false;
    if (col[ig]=col[io])
    then if (((parent[row[ig]]=row[io]) and (est[io]<(0.5*est[ig])))
             or (row[ig]=parent[row[io]])) then useit[othergroup]:=false;
    if (row[ig]=col[io])
    then if (((parent[col[ig]]=row[io]) and (est[io]<(0.5*est[ig])))
             or (col[ig]=parent[row[io]])) then useit[othergroup]:=false;
    if (col[ig]=row[io])
    then if (((parent[row[ig]]=col[io]) and (est[io]<(0.5*est[ig])))
             or (row[ig]=parent[col[io]])) then useit[othergroup]:=false;
 end;

for q:=1 to numnode do    (* prepare for clique-finding *)
begin
   for r:=1 to numnode do connected[q,r]:=false;
   connected[q,q]:=true
end;
for i:=1 to depthofsearch do
if useit[i]
then begin
       pq:=c[i];
       connected[row[pq],col[pq]]:=true;
       connected[col[pq],row[pq]]:=true;
     end;
(* look through binary feature list for maximal-weight complete subgraphs *)
  clique:=0; cc:=0;
  for r:=1 to numnode do all[r]:=r;
cliques(all,0,numnode);

if (clique>0) then  (* estimate clique weights *)
for nc:=1 to clique do
begin
    weight[nc]:=0.0;
  for i:=1 to depthofsearch do
  begin
      count:=0;
    for j:=1 to numberhosts[nc] do
     if ((row[c[i]]=host[nc,j]) or (col[c[i]]=host[nc,j]))
     then count:=count+1;
    if (count>1) then weight[nc]:=weight[nc]+est[c[i]]
  end;
  nh:=numberhosts[nc];
  test:=weight[nc]/(nh*(nh-1)/2);
  writeln(outfile,'clique',nc:3,'   ave. weight =',test:field:decplaces);

(* eliminate each pairwise feature i that went into clique,
    unless it's twice as large as clique *)
  for i:=1 to depthofsearch do
  begin
      count:=0;
    for j:=1 to numberhosts[nc] do
     if ((row[c[i]]=host[nc,j]) or (col[c[i]]=host[nc,j]))
     then count:=count+1;
    if (count>1)
    then begin
           nh:=numberhosts[nc];
           test:=(weight[nc]-est[c[i]])/((nh*(nh-1)/2)-1);
           if ((est[c[i]]*cthreshold)<test) then useit[i]:=false
         end
  end  (* loop on i *)
end; (* loop for clique nc *)

(* put cliques & useable features into "host" & "useit" *)
  feature:=clique;
for i:=1 to depthofsearch do
if useit[i] 
then begin
        feature:=feature+1;
        host[feature,1]:=row[c[i]]; host[feature,2]:=col[c[i]];
        numberhosts[feature]:=2;
     end;
depthofsearch:=feature;
for i:=1 to depthofsearch do useit[i]:=true;

(* remove complementary-set features *)
if (depthofsearch>=2) then
for group:=1 to (depthofsearch-1) do
if useit[group]
then begin
     groupset:=[];
   for i:=1 to numberhosts[group] do
     groupset:=groupset+branchset[host[group,i]];
 for othergroup:=(group+1) to (depthofsearch) do
 if useit[othergroup]
   then begin
        othergroupset:=[];
      for i:=1 to numberhosts[othergroup] do
        othergroupset:=othergroupset+branchset[host[othergroup,i]];
      if ((groupset+othergroupset)=branchset[rootnode])
      then begin
(* represent as feature of objects that are nearer in tree approx. of data *)
          hght1:=0.0; hght2:=0.0;
        for i:=1 to numberhosts[group] do
         hght1:=hght1+height[host[group,i]]+length[host[group,i]];
        for i:=1 to numberhosts[othergroup] do
         hght2:=hght2+height[host[othergroup,i]]+length[host[othergroup,i]];
        if ((hght1/numberhosts[group])<(hght2/numberhosts[othergroup]))
         then useit[othergroup]:=false
         else useit[group]:=false;
        end;
   end;
end;

(* clean up arrays, throwing out features deleted as redundant *)
if (depthofsearch>=1)
then begin
         feature:=0;
       for i:=1 to depthofsearch do
        if (useit[i] and ((feature+numnode)<thriceobject))
        then begin
              feature:=feature+1;
              use[feature+numnode]:=true;
              numberhosts[feature]:=numberhosts[i];
              for k:=1 to numberhosts[feature] do host[feature,k]:=host[i,k]
             end;
       if (feature<nummarked) then nummarked:=feature;
     end;

end; (* findbestset *)


(***************************************************************************)
(* for each pair of internal nodes in the tree ("node" and "othernode"),   *)
(* look at the four subtrees determined by those two nodes, and calculate  *)
(* the size of the marked feature shared by each pair of subtrees          *)
(***************************************************************************)
begin (* findfeatures *)
   nmarked:=0;     (* current number of marked features > threshold *)
for node:=(numobject+1) to (numnode-2) do        (* don't consider rootnode *)
begin
      write('.');
   for othernode:=(node+1) to (numnode-1) do                       (* ditto *)
   begin

     for i:=1 to 2 do
     for j:=1 to 2 do
     if ((branchset[child[node,i]]*branchset[child[othernode,j]])=[])
     then begin

      branch1:=child[node,i];
      branch2:=child[othernode,j];
      count:=0; estimate:=0.0; sum:=0.0; treesum:=0.0;

        icount:=0; jcount:=0;
        for leaf:=1 to numobject do
        begin
          if (leaf in branchset[branch1])
           then begin icount:=icount+1; ileaflist[icount]:=leaf end;
          if (leaf in branchset[branch2])
           then begin jcount:=jcount+1; jleaflist[jcount]:=leaf end
        end;

      for ii:=1 to icount do
      for jj:=1 to jcount do
      begin
         iijj:=ijfunction(ileaflist[ii],jleaflist[jj]);
         sum:=sum+prox[iijj];
         treesum:=treesum+d[iijj];
      end;

      avedisij:=sum/(icount*jcount);
      treedisij:=treesum/(icount*jcount);

for k:=1 to (numobject-1) do
if not((k in branchset[branch1]) or (k in branchset[branch2]))
then begin

         avedisik:=0.0; treedisik:=0.0;
      for ileaf:=1 to icount do
      begin
      avedisik:=avedisik+prox[ijfunction(ileaflist[ileaf],k)];
      treedisik:=treedisik+d[ijfunction(ileaflist[ileaf],k)];
      end;
      avedisik:=avedisik/icount;
      treedisik:=treedisik/icount;

         avedisjk:=0.0; treedisjk:=0.0;
      for jleaf:=1 to jcount do
      begin
      avedisjk:=avedisjk+prox[ijfunction(jleaflist[jleaf],k)];
      treedisjk:=treedisjk+d[ijfunction(jleaflist[jleaf],k)];
      end;
      avedisjk:=avedisjk/jcount;
      treedisjk:=treedisjk/jcount;

for l:=(k+1) to numobject do
if not((l in branchset[branch1]) or (l in branchset[branch2]))
then begin

         avedisil:=0.0; treedisil:=0.0;
      for ileaf:=1 to icount do
      begin
      avedisil:=avedisil+prox[ijfunction(ileaflist[ileaf],l)];
      treedisil:=treedisil+d[ijfunction(ileaflist[ileaf],l)];
      end;
      avedisil:=avedisil/icount;
      treedisil:=treedisil/icount;

         avedisjl:=0.0; treedisjl:=0.0;
      for jleaf:=1 to jcount do
      begin
      avedisjl:=avedisjl+prox[ijfunction(jleaflist[jleaf],l)];
      treedisjl:=treedisjl+d[ijfunction(jleaflist[jleaf],l)];
      end;
      avedisjl:=avedisjl/jcount;
      treedisjl:=treedisjl/jcount;

      diskl:=prox[ijfunction(k,l)];
      treediskl:=d[ijfunction(k,l)];


sum1:=avedisij+diskl;     treesum1:=treedisij+treediskl;
sum2:=avedisik+avedisjl;  treesum2:=treedisik+treedisjl;
sum3:=avedisil+avedisjk;  treesum3:=treedisil+treedisjk;

(* if tree structure is ((i j)(k l)), don't use to estimate feat. for i&j *)
if not((treesum1<treesum2) and (treesum1<treesum3))
then begin
      if (treesum2<treesum3)
        then diff:=sum3-sum1
        else diff:=sum2-sum1;
      estimate:=estimate+diff;
      count:=count+1;
     end;

end; (* loop on l *)
end; (* loop on k *)

if (count>0) then estimate:=estimate/(count*2);

if (nmarked<maxsize)
then begin

        if (estimate>threshold)
        then begin
               nmarked:=nmarked+1;
   (* re-use array c to sort marked segments *)
               c[nmarked]:=nmarked; est[nmarked]:=estimate;
               row[nmarked]:=branch1; col[nmarked]:=branch2;
             end
     end
else begin writeln(outfile,'WARNING:  too many marked features found;',
             ' increase threshold and re-run'); writeln(outfile) end;

end; (* loop on children of i and j *)
end; (* loop on j *)
end; (* loop on i *)

if (nmarked<depthofsearch) then depthofsearch:=nmarked;
if (nmarked<nummarked) then nummarked:=nmarked;

   writeln(outfile);
if (nmarked=0)
 then writeln(outfile,'no marked features were greater than threshold')
else begin

qsort(est,1,nmarked);

   writeln(outfile,'  i  j     estimate     [ set(i) ]   [ set(j) ]');
   writeln(outfile,'  -  -     --------     -----------------------');
for q:=1 to depthofsearch do
begin
   pq:=c[q];
   write(outfile,row[pq]:3,col[pq]:3,'     ',est[pq]:field:decplaces);
   column:=14+field;
   nerf[1]:=row[pq]; nerf[2]:=col[pq];
   for i:=1 to 2 do
   begin
      write(outfile,'       ['); column:=column+8;
      for j:=1 to numobject do
        if (j in branchset[nerf[i]])
        then begin
               if ((column<=linesize) and ((column+3)>linesize))
               then begin writeln(outfile); column:=18+field;
                          for l:=1 to column do write(outfile,' ')
                    end;
               write(outfile,j:3); column:=column+3;
             end;
   write(outfile,' ]'); column:=column+1;
   end;
   writeln(outfile);
end;
writeln(outfile); writeln(outfile);

(* find cliques & eliminate redundant features *)
writeln(outfile,
    'checking for cliques & redundant patterns of marked features');
  if (nmarked>thriceobject) then nmarked:=thriceobject;
findbestset

end; (* if nmarked > 0 *)
writeln(outfile); writeln(outfile);

end;  (* findfeatures *)



(*********************************************************************)
(* lsq: find least-squares solution of model equations                 *)
(*********************************************************************)
procedure lsq(var xvar: nodearray);
var   ata: array[1..thriceobject,1..thriceobject] of real;
      arow,perm: array[1..thriceobject] of integer;
      atb,scale: array[1..thriceobject] of real;
      rowmax,big,sizepivot: real;
      pivotrow,hold: integer;
      sum,pivot,rowfactor: real;
      numparms,kk: integer;
      usetwo: boolean;

(* set up one row of model matrix, a, and use to make aTa *)
procedure setup;
begin

(* can't estimate a features shared by both children of root *)
if ((use[child[rootnode,1]]=true) and (use[child[rootnode,2]]=true))
then begin
      usetwo:=true;
      use[child[rootnode,2]]:=false;
      setzero:=setzero+1
     end
else usetwo:=false;
  numparms:=numfeat-setzero;

(* initialize for setup of A'A matrix and A'b array *)
for k:=1 to numparms do
begin
   for l:=1 to numparms do ata[k,l]:=0.0;
   atb[k]:=0.0;
end;

(* now loop on each distance, ij, to see which parameters are involved *)
   ij:=0;
for i:=2 to numobject do
for j:=1 to (i-1) do
begin
  ij:=ij+1;
(* first set up tree features *)
    for l:=1 to numfeat do arow[l]:=0;
    kk:=0;
  for k:=1 to numnode do
  if (use[k]=true)
  then begin
          kk:=kk+1;
          if (((i in branchset[k]) and not(j in branchset[k]))
              or ((j in branchset[k]) and not(i in branchset[k])))
          then arow[kk]:=1 else arow[kk]:=0;
       end;
(* now set up marked features *)
  for k:=1 to (numfeat-numnode) do
  if (use[k+numnode]=true)
  then begin
         kk:=kk+1;
         arow[kk]:=0;
         for l:=1 to numberhosts[k] do
         if ((spillover[host[k,l]]=true)
          and (((i in branchset[host[k,l]]) and not(j in branchset[host[k,l]]))
          or ((j in branchset[host[k,l]]) and not(i in branchset[host[k,l]]))))
         then arow[kk]:=1;
         for l:=1 to (numberhosts[k]-1) do
          for m:=(l+1) to numberhosts[k] do
           if (((i in branchset[host[k,l]]) and (j in branchset[host[k,m]]))
            or((j in branchset[host[k,l]]) and (i in branchset[host[k,m]])))
           then arow[kk]:=arow[kk]-2;
       end;
(* now use both tree and marked features to make A'A matrix *)
(* also set up A'b array *)
  for k:=1 to numparms do
  begin
    for l:=1 to numparms do ata[k,l]:=ata[k,l]+(arow[k]*arow[l]);
    atb[k]:=atb[k]+(prox[ij]*arow[k])
  end;
end; (* loop on ij *)

end; (* setup *)


procedure eliminate;
begin

(* invert ata *)
for i:=1 to numparms do
begin
  perm[i]:=i;
  rowmax:=0.0;
  for j:=1 to numparms do
   if (rowmax<abs(ata[i,j])) then rowmax:=abs(ata[i,j]);
  if (rowmax>0.0) then scale[i]:=1.0/rowmax
        else writeln(outfile,' zero row encountered',i:4);
end;

for k:=1 to (numparms-1) do
begin
   big:=0.0;
   for i:=k to numparms do
   begin
     sizepivot:=abs(ata[perm[i],k]*scale[perm[i]]);
     if (sizepivot>big)
      then begin big:=sizepivot; pivotrow:=i end;
   end;
   if (big<=0.0) then writeln(outfile,' zero pivot, col:',k:4);
   if (pivotrow<>k)
   then begin
          hold:=perm[k];
          perm[k]:=perm[pivotrow];
          perm[pivotrow]:=hold
        end;
   pivot:=ata[perm[k],k];
   for i:=(k+1) to numparms do
   begin
      rowfactor:=ata[perm[i],k]/pivot;
      ata[perm[i],k]:=rowfactor;
      for j:=(k+1) to numparms do
        ata[perm[i],j]:=ata[perm[i],j]-rowfactor*ata[perm[k],j];
   end;
end;  (* loop on k *)
if (ata[perm[numparms],numparms]=0.0)
 then writeln(outfile,' last pivot=0');
end; (* eliminate *)


procedure backsubstitute;
begin

xvar[1]:=atb[perm[1]];
for i:=2 to numparms do
begin
   sum:=0.0;
   for j:=1 to (i-1) do
      sum:=sum+ata[perm[i],j]*xvar[j];
   xvar[i]:=atb[perm[i]]-sum;
end;

xvar[numparms]:=xvar[numparms]/ata[perm[numparms],numparms];
for i:=(numparms-1) downto 1 do
begin
   sum:=0.0;
   for j:=(i+1) to numparms do
      sum:=sum+ata[perm[i],j]*xvar[j];
   xvar[i]:=(xvar[i]-sum)/ata[perm[i],i];
end;

end; (* backsubstitute *)


procedure adjustlengths;
var h1,h2: real;
begin

zeroleft:=setzero;

for k:=numfeat downto 1 do
if (use[k]=true)
 then length[k]:=length[k-zeroleft]
 else begin length[k]:=0.0; zeroleft:=zeroleft-1 end;


for i:=(numobject+1) to numnode do
if (spillover[i]=true)
then for j:=1 to (numfeat-numnode) do
     for k:=1 to numberhosts[j] do
        if (host[j,k]=i)
        then length[i]:=length[i]+length[j+numnode];


rootarclength:=length[child[rootnode,1]]+length[child[rootnode,2]];
length[child[rootnode,1]]:=saveroot*rootarclength;
length[child[rootnode,2]]:=(1.0-saveroot)*rootarclength;
h1:=height[child[rootnode,1]]+length[child[rootnode,1]];
h2:=height[child[rootnode,2]]+length[child[rootnode,2]];
if (h1>h2) then height[rootnode]:=h1
           else height[rootnode]:=h2;
if (usetwo=true)
then begin use[child[rootnode,2]]:=true; setzero:=setzero-1 end;

end; (* adjustlengths *)


begin (* lsq *)

   setup;
   eliminate;
   backsubstitute;
   adjustlengths;

end; (* lsq *)



(*********************************************************************)
(* checkspillover: check for small and negative parameter estimates  *)
(*********************************************************************)
procedure checkspillover;
var   markedfsum: array[1..thriceobject] of real;
      branch,which,markedf: integer;
      thresh: real;
begin
flagzero:=false;

(* check if marked features overflow their host arc *)
  for branch:=1 to numnode do markedfsum[branch]:=0.0;
for markedf:=1 to nummarked do
if ((use[markedf+numnode]=true) and (length[markedf+numnode]>=threshold))
then for which:=1 to numberhosts[markedf] do
     begin
       branch:=host[markedf,which];
       markedfsum[branch]:=markedfsum[branch]+length[markedf+numnode];
     end;
write(outfile,'spillover of marked features on arcs: ');
for branch:=(numobject+1) to numnode do
  if (markedfsum[branch]>length[branch])
  then begin
         spillover[branch]:=true;
         write(outfile,branch:4);
         if (use[branch]=true)
         then begin
                flagzero:=true;
                setzero:=setzero+1;
                use[branch]:=false;
              end;
       end;
writeln(outfile);

(* eliminate interior tree arcs & marked features smaller than threshold *)
write(outfile,'features smaller than threshold: ');
for i:=(numobject+1) to numfeat do
begin
  if (i<=numnode) then thresh:=tthreshold else thresh:=threshold;
  if ((use[i]=true) and (length[i]<thresh))
  then begin
         use[i]:=false;
         length[i]:=0.0;
         flagzero:=true;
         setzero:=setzero+1;
         write(outfile,i:4);
       end;
end;
writeln(outfile);

maxspillover:=0.0;
for branch:=1 to numobject do
 if (maxspillover<(markedfsum[branch]-length[branch]))
 then maxspillover:=markedfsum[branch]-length[branch];
newconstant:=2*maxspillover;
writeln(outfile,'maximum leaf spillover= ',maxspillover:field:decplaces);
writeln(outfile); writeln(outfile);

end; (* checkspillover *)




(**************************************************************************)
(* describemarked: describe final set of marked features                  *)
(**************************************************************************)
procedure describemarked;
var feature: integer;
begin

writeln(outfile); writeln(outfile);
writeln(outfile,'final set of marked features:');
writeln(outfile);
writeln(outfile,'feature  objects sharing feature');
writeln(outfile,'-------  -----------------------');

if (namelength>0) then fld:=namelength else fld:=3;

for feature:=1 to nummarked do
  if use[feature+numnode]
  then begin
            writeln(outfile);
         write(outfile,'   ',symbol[feature],'     [ ');
         column:=12;
         for j:=1 to numberhosts[feature] do
           for k:=1 to numobject do
             if (k in branchset[host[feature,j]])
              then begin
                  if ((column+fld)>linesize)
                   then begin writeln(outfile); column:=12;
                            for l:=1 to column do write(outfile,' ')
                            end;
                  if (namelength=0)
                   then begin write(outfile,k:3); column:=column+3 end
                   else for l:=1 to namelength do if (name[k,l]<>' ')
                     then begin write(outfile,name[k,l]); column:=column+1 end;
                  write(outfile,', '); column:=column+2;
                end;
         writeln(outfile,']');
      end;
end; (* describemarked *)




(****************************************************************************)
(****************  start of main program  ***********************************)
(****************************************************************************)
begin (* main *)

  writeln; writeln('extree version 1.5'); writeln;

(* PC TURBO PASCAL FILE DEFINITION STATEMENTS *)
  writeln('enter input file name:');
  readln(infilnam);
  writeln('enter output file name:');
  readln(outfilnam);
  assign(infile,infilnam);
  assign(outfile,outfilnam);
(* Turbo PASCAL compiler directive to increase stack size *)
{$M 65520,0,655360}

   reset(infile);
   rewrite(outfile);

      writeln; writeln('reading data');

   readin;

      if (lowerhalf and printdata)
      then begin
             writeln(outfile,'data matrix:');
             writematrix(prox,numobject,field,decplaces);
           end;

   chkmetric;

      if printtransformed (* prox has been transformed to satisfy m. axioms *)
      then begin
            writeln(outfile,'transformed data distances:');
            writematrix(prox,numobject,field,decplaces);
           end;

   initialize;

      write('fitting tree..');


(***** begin addtree section *********************************************)

while (n>=4) do
begin

if (specifytree=false)
then begin            (* find objects (branches) to be joined *)

      quadruples;

        numgroup:=0; nreduce:=0;
      findgroups;

     end

else begin            (* only if user-specified tree structure *)
      readln(infile,i,j);
      numgroup:=1; nreduce:=0; group[1]:=[];
      for int:=1 to n do if ((index[int]=i) or (index[int]=j))
                     then group[1]:=group[1]+[int];
     end;

      for int:=1 to n do use[int]:=true;
   for whichgroup:=1 to numgroup do makenode;

   cleanup;

end;
(* end main addtree loop ****************************************************)


(* choose a root for the tree (there are now only 2 or 3 nodes left) *)
if (n=3)
then begin
      numgroup:=1; whichgroup:=1;
if (height[index[1]]>height[index[2]]) and (height[index[1]]>height[index[3]])
 then group[1]:=[2]+[3];
if (height[index[2]]>height[index[1]]) and (height[index[2]]>height[index[3]])
 then group[1]:=[1]+[3];
if (height[index[3]]>height[index[1]]) and (height[index[3]]>height[index[2]])
 then group[1]:=[1]+[2];
      for int:=1 to 3 do use[int]:=true;
      makenode;
      cleanup;
     end;
writeln;

(* now n=2 *)
makeroot;
hangroot;
if not(rooting=asis)  
 then begin
        findbestroot;   (* specified rooting or minvarroot *)
        hangroot
      end;
(* end root-choosing section *)

      numfeat:=numnode;

   report;            (* draw addtree *)
      writeln('calculating fit statistics');
   stats;            (* & report fit statistics *)



(****** begin extree section ************************************************)
      writeln(outfile); writeln(outfile);
      writeln(outfile,'extree analysis:'); writeln(outfile);
      if not(specifymarked)
       then writeln(outfile,nummarked:4,' marked features will be tried');
      writeln(outfile,'features smaller than ',threshold:field:decplaces,
                    ' will be eliminated'); writeln(outfile);

      for int:=1 to numnode do use[int]:=true;
      for int:=(numnode+1) to thriceobject do use[int]:=false;

      write('estimating marked segments...');

if not(specifymarked)
then findfeatures      (* check & estimate marked features *)

else begin            (* (unless they are to be specified by user) *)
        numgroup:=0;
        repeat readln(infile,ch) until ch='*';
      while not(eof(infile)) do
      begin
        numgroup:=numgroup+1; int:=0;
        while not(eoln(infile)) do
        begin
          int:=int+1;
          read(infile,host[numgroup,int]);
        end;
        numberhosts[numgroup]:=int;
          use[numgroup+numnode]:=true;
        if (numberhosts[numgroup]<2)
         then writeln(outfile,'error in reading marked feature # ',numgroup:3);
        readln(infile);
      end;
      nummarked:=numgroup;
     end;
       writeln;

(* describe initial set of marked features *)
if (nummarked>0)
then begin
      for i:=1 to nummarked do
      begin
        writeln(outfile,'feature ',symbol[i]);
        for j:=1 to numberhosts[i] do
        begin
         write(outfile,host[i,j]:4);
         write(outfile,'   (');
         for k:=1 to numobject do
              if (k in branchset[host[i,j]]) then write(outfile,k:3);
         writeln(outfile,' )');
        end;
      end;
      writeln(outfile); writeln(outfile);

      numfeat:=numfeat+nummarked;

      for int:=1 to numnode do spillover[int]:=false;
      use[rootnode]:=false;
      setzero:=1;


      writeln('simultaneous estimation of parameters');
   repeat (* estimate & prune: loop till all feature estimates >= 0.0 *)
      iteration:=iteration+1;
      writeln(outfile); writeln(outfile);
      writeln(outfile,'iteration:',iteration:4); writeln(outfile);

   lsq(length);
   checkspillover;

   until (flagzero=false);
       for int:=1 to numobject do length[int]:=length[int]+maxspillover;

   report;            (* draw extree *)
      writeln('calculating fit statistics');
   stats;            (* & report fit statistics *)

end; (* if nummarked > 0 *)


(* describe any marked features that have been retained *)
if (nummarked>0) then describemarked;
writeln(outfile); writeln(outfile); writeln(outfile); writeln(outfile);


      if printmodeldis  (* note that array d now contains tree distances *)
      then begin
            writeln(outfile,'model distances:');
            writematrix(d,numobject,field,decplaces);
           end;

      if printresiduals  (* note that array d now contains tree distances *)
      then begin
            for int:=1 to numprox do 
                  prox[int]:=prox[int]-d[int]+newconstant;
            writeln(outfile,'residual distances:');
            writematrix(prox,numobject,field,decplaces);
           end;


close(infile);
close(outfile);

writeln; writeln('end extree run');
end. (* main *)


