#!/usr/local/bin/kermit +
#
# s t a t i s t i c s
#
# Given a file in which each line contains a pair of numbers, X and Y,
# computes and prints the max, min, mean, variance, and standard deviation of
# the X's and Y's, and the correlation coefficient of X and Y.  The numbers in
# file may (but need not) have decimal points and fractional parts.
#
# Usage:       statistics <filename>
# Requires:    C-Kermit 7.0 or later.
# Illustrates: Floating-point computation and formatting, file i/o.
#
# Note that floating-point arithmetic, unlike integer arithmetic, is done with 
# function calls.  The floating point function names all begin with "ffp".
# Also note the use of \fcontent() in the FOPEN statement; this to allow for
# DOS filenames with backslash path-separators.
#
# Author: F. da Cruz, The Kermit Project, Columbia University, July 1999.
# Version 2: Adds IF FLOAT checking.
#
if < \v(argc) 2 exit 1 Usage: \%0 filename  ; A filename is required

; Output function for X and Y statistics.
; Args: Label, X value, Y value.
;
define xxout {
  echo {\frpad(\%1:,16)\flpad(\ffprou(\%2,2),10)\%9\flpad(\ffprou(\%3,2),10)}
}

fopen /read \%c \fcontents(\%1)         ; Open the data file
if fail exit 1                          ; Check
.\%n = 0                                ; Number of X,Y pairs
.x:sum = 0                              ; Initialize sums
.y:sum = 0                              ; (not strictly necessary)
.x:sum2 = 0
.y:sum2 = 0
.xy:sum = 0

while not \f_eof(\%c) {                 ; Until end of input file...
    fread \%c line                      ; Read a line (record)
    if fail break                       ; Check
    if not def line continue            ; Ignore empty lines
    .\%x := \fword(\m(line),1,{ })      ; Extract X and Y from the record
    .\%y := \fword(\m(line),2,{ })
    if ( not ( float \%x && float \%y ) ) exit 1 Bad record: "\m(line)"
    .x:sum := \ffpadd(\m(x:sum),\%x)    ; Get sums and check validity
    .y:sum := \ffpadd(\m(y:sum),\%y)
    if = \%n 0 { .x:max := \%x, .x:min := \%x, .y:max := \%y, .y:min := \%y }
    incr \%n                            ; Count this record
    .x:max  := \ffpmax(\m(x:max),\%x)   ; Accumulate max, min, other stats
    .x:min  := \ffpmin(\m(x:min),\%x)
    .y:max  := \ffpmax(\m(y:max),\%y)
    .y:min  := \ffpmin(\m(y:min),\%y)
    .x:sum2 := \ffpadd(\m(x:sum2),\ffpmult(\%x,\%x)) ; Sum of squares (X)
    .y:sum2 := \ffpadd(\m(y:sum2),\ffpmult(\%y,\%y)) ; Sum of squares (Y)
    .xy:sum := \ffpadd(\m(xy:sum),\ffpmult(\%x,\%y)) ; Sum of products (XY)
}
fclose \%c                              ; Finished reading data - close file
echo
echo Records: \%n
if < \%n 2 exit 1 {Sorry, at least two data pairs are required.}

; Compute means, intermediate quantities, variances.
;
.x:mean := \ffpdiv(\m(x:sum),\%n)
.y:mean := \ffpdiv(\m(y:sum),\%n)
.x:ss   := \ffpsub(\m(x:sum2),\ffpdiv(\ffpmult(\m(x:sum),\m(x:sum)),\%n))
.y:ss   := \ffpsub(\m(y:sum2),\ffpdiv(\ffpmult(\m(y:sum),\m(y:sum)),\%n))
.xy:ss  := \ffpsub(\m(xy:sum),\ffpdiv(\ffpmult(\m(x:sum),\m(y:sum)),\%n))
.x:var  := \ffpdiv(\m(x:ss),\%n)
.y:var  := \ffpdiv(\m(y:ss),\%n)

; Print the results

echo {                       X         Y}
xxout Sum \m(x:sum) \m(y:sum)
xxout Minimum \m(x:min) \m(y:min)
xxout Maximum \m(x:max) \m(y:max)
xxout Mean \m(x:mean) \m(y:mean)
xxout Variance \m(x:var) \m(y:var)
xxout {Std deviation} \ffpsqrt(\m(x:var)) \ffpsqrt(\m(y:var))
echo
echo  Correlation coefficient:   -
\ffpround(\ffpdiv(\m(xy:ss),\ffpsqrt(\ffpmul(\m(x:ss),\m(y:ss)))),2)

exit 0
