/* File MSBPCT.C Author: Robert Weiner, Programming Plus, rweiner@watsun.cc.columbia.edu Synopsis: Translates a BOO-encoded file (produced by MSBMKB) back into its original form. Modification History: 29-APR-92 Initial Beta Release Ideas taken from old msbpct.c (versions before 01-may-92) and new msbmkb.c 01-MAY-92 Added files="-", Added -q 05-MAY-92 Release after outside testing Added void usage() proto Thanks to Christian Hemsing for OS-9 testing & defs. Thanks to Steve Walton for Amiga testing & defs. 08-MAY-92 Prepare for general release Modified _CDECL define, Added uchar defs, Fixed up for MSDOS GNU CC Use gcc -DMSDOS to compile. This MSDOS GCC defines "unix" which doesn't help us at all! 17-MAY-92 Add AtariST defs & Improved __STDC__ check from Bruce Moore I think I'm going to leave off the old program "does output file exist? overwrite y/n?" check. Please let me know if you think its really reqd. Actually, I would like to force the user to specify the output file name always since embedded output names can be used maliciously. Removed string fns so don't need string.h. Added Check for ~0 removing non-nulls. Next general release now ready... Thanks to those listed in the directory below: 12-JUL-92 Near Final release...?? Added portability items, cmd line overrides ifdef UCHAR, VOID, NOANSI Shortened lines to 79 max (got them all?) Only thing not done is checking #ifdef NOUCHAR and adding any anding off bits which signed chars may intruduce in unboo(). Beta Testing Informaton, Supported Systems Directory: ===================================================================== ( Testor / Operating System / O.S. Version / Compiler ) Rob Weiner, rweiner@watsun.cc.columbia.edu: MSDOS 5.0 MSC 5.1 MSDOS 5.0 GCC (DJGPP DOS 386/G++ 1.05) VAX/VMS 5.4-2 VAXC 3.2 SUNOS 4.1 UNIXPC 3.51 Christian Hemsing, chris@v750.lfm.rwth-aachen.de: OS-9 Stephen Walton, swalton@solaria.csun.edu: AMIGA MANX C (defines MCH_AMIGA) Bruce J. Moore, moorebj@icd.ab.com: AtariST TOS/GEMDOS MWC 3.7 Fun stuff such as my favorite testing shell command is now possible: $ for i in * do echo $i: cat $i | msbmkb -q - - | msbpct -q - - | cmp -l - $i done This properly implements the Lasner ~0 fixes. Synopsys: The en-booer writes out printable text from binary text via a 3 input char to 4 output char conversion (called "triple to quad" conversion). Since the input text can run out before the last triple can be formed, all en-booers (msbmkb) would add 1 or 2 nulls to the input stream to complete the triple such that a valid quad can be output. Thus the problem where often a de-booer (msbpct) will create an output file from a boo encoded file, but the output file is larger than the input file by 1 or 2 nulls. Charles Lasner documented this problem and offered a fix... For each 1 or 2 extra null pad chars added to the input stream, the en-booer should add a trailing ~0 to the created boo file. ~X (where X-'0' is a repeat value which indicates a number of "repeated nulls" does not have a value for the sequence "~0" which would imply: ``decode into a series of 0 nulls,'' a noop for "old" debooers. Hence ~0 can be used as a flag that the input text had a "padding null" added to it and then the de-booer can know NOT to add these padding chars to the output stream. This allows the en-boo/de-boo programs to finally always guarantee that you get what you started with after passing through the en-boo then de-boo process. Some bugs/facts with the MSBPCT/MSBMKB programs which popped up or were discovered recently (January through March 1992): - CURRENT msbpct will NOT make a correct output file from the boo file THIS msbmkb creates. It loses or adds a char. Comes from improper implementation of Lasner changes. Note: CURRENT enbooer with CURRENT unbooer make the same mistakes encoding/uncoding hense files come out more or less ok. - OLD msbpct will create a proper output file from a boo file created from THIS en-booer. - Current msbpct also screws up output column checking and can override the max (usually ~0~0 at eof) and undercut the standard value. - Current msbpct doesn't correctly implement lasner fixes. - Current msbpct tells of "using an old booer" at times it can determine that that statement is meaningless. - Addtl improper implementation of Lasner change yields (quite often) an additional 2 nulls in the output file which are removed by an additional 2 ~0 sequence... to break even. ie. where old & this enbooer at eof writes "~A", the current (bad) booer writes "~C~0~0". (other items not listed). This new msbpct replaces the old one (msbpct's dated before Mar1992). Credit should be given to the maintainers of the old msbpct: Original by Howie Kaye -- Columbia University 3/11/86 Robert Weiner of Programming Plus, Frank da Cruz of Columbia University, Davide P. Cervone of University of Rochester, Martin Knoblauch of TH-Darmstadt, Germany, John Matthews of U of Delaware, L. John Junod of DTNSRDC, Christian Hemsing, RWTH Aachen, Germany. Sorry, this seems a bit slower than previous msbpct. Not sure why yet. */ #include /* only header we need */ /* Version Dependencies... Give each new special case its own defs: */ #ifdef VAX11C /* VAXC032 */ #define SYSTEM "VAX/VMS" #define EXIT_GOOD 1 #define EXIT_INFO 3 #define EXIT_BAD 5 #define FOPEN_ROPTS "r" /* open it VMS/RMS Fixed 512 - VMS Executable Format */ #define FOPEN_WOPTS "wb","ctx=rec","mrs=512","rfm=fix" #define YES_PROTOS #endif #ifdef MSDOS /* MSC 5.1 */ #define SYSTEM "MSDOS" #define EXIT_GOOD 0 #define EXIT_INFO 1 #define EXIT_BAD 2 #define FOPEN_ROPTS "r" #define FOPEN_WOPTS "wb" #define YES_PROTOS #endif #ifdef GEMDOS /* AtariST - TOS - MWC v3.7 */ #define SYSTEM "AtariST/TOS" #define EXIT_GOOD 0 #define EXIT_INFO 1 #define EXIT_BAD 2 #define FOPEN_ROPTS "rb" #define FOPEN_WOPTS "w" #define CASE_CHANGE CHANGE_LOWER /* lowercase boo file name */ #define YES_PROTOS #endif #ifdef OSK #define SYSTEM "OS-9" #define EXIT_GOOD 0 #define EXIT_INFO 1 #define EXIT_BAD 1 #define FOPEN_ROPTS "r" #define FOPEN_WOPTS "w" #define CASE_CHANGE CHANGE_NONE /* leave filename case sensitive */ /* #undef YES_PROTOS * default OS9 to noprotos * */ #endif #ifndef FOPEN_ROPTS /* No system found, use unix defaults */ #define SYSTEM "UNIX/Amiga/Generic" #define EXIT_GOOD 0 #define EXIT_INFO 1 #define EXIT_BAD 2 #define FOPEN_ROPTS "r" #define FOPEN_WOPTS "w" /* #undef YES_PROTOS * default UNIX/generic to noprotos * */ #endif #ifndef NOANSI /* allow cmd line override to STDC */ #ifdef __STDC__ /* Ansi likes prototypes */ #if __STDC__ /* MWC sets this defined but 0 valued */ #define YES_PROTOS #endif #endif /* __STDC__ */ #endif /* NOANSI */ #ifndef VOID /* allow cmd line override to VOID */ #define VOID void /* assume system likes void */ #endif #ifndef _CDECL #define _CDECL #endif #ifndef __DATE__ #define __DATE__ "01-MAY-1992" #endif #ifndef __TIME__ #define __TIME__ "00:00:00" #endif /* Typedefs */ #ifndef UCHAR /* allow cmd line override */ typedef unsigned char uchar; /* possible portability concern */ #define UCHAR uchar #else #define NOUCHAR 1 /* flag saying cmd line changed uchar */ #endif /* BOO Decoder Defs: */ #define unchar(c) ( (c) - '0' ) /* Here are the function prototypes... If your 'C' don't like prototypes, don't declare YES_PROTOS. */ #ifdef YES_PROTOS VOID _CDECL convert (FILE *, FILE *); int _CDECL get4 (FILE *, UCHAR *); VOID _CDECL output (FILE *, UCHAR *, int); VOID _CDECL unboo (UCHAR *, UCHAR *); VOID usage (VOID); #else VOID convert (); int get4 (); VOID output (); VOID unboo (); VOID usage (); #endif long count_in=0, count_out=0; /* character counts */ int quiet=0; main(argc,argv) int argc; char **argv; { FILE *fpin, *fpout; char outfile[BUFSIZ], *outfilptr; while( argc > 1 && *argv[1]=='-' ) { if( argv[1][1] == '\0' ) break; switch( argv[1][1] ) { case 'v': /* version */ fprintf(stderr, "MSBPCT.C, Date=\"%s, %s\", System=\"%s\"\n", __DATE__,__TIME__,SYSTEM); fprintf(stderr, "\ Email comments to \"rweiner@kermit.columbia.edu\" \ (Rob Weiner/Programming Plus)\ \n"); fprintf(stderr,"\n"); break; case 'q': /* quiet */ quiet=1; break; default: usage(); } argc--; argv++; } if( argc < 2 || argc > 3 ) usage(); if( argv[1][0]=='-' && argv[1][1]=='\0' ) { fpin = stdin ; } else if( (fpin = fopen( argv[1] , FOPEN_ROPTS )) == NULL ) { fprintf(stderr,"Error, cannot open input file \"%s\"\n", argv[1]); exit(EXIT_BAD); } if( fgets(outfile, BUFSIZ, fpin) == NULL ) { fprintf(stderr,"Error, cannot read boo filename line\n"); exit(EXIT_BAD); } /* outfile[ strlen(outfile) - 1 ] = '\0' ; * wack \n */ /* redone w/o strlen... */ outfilptr = outfile ; while( *outfilptr && (*outfilptr != '\n') && (*outfilptr != '\r') ) outfilptr++; *outfilptr = '\0' ; outfilptr = outfile ; if( argc == 3 ) /* override on internally stored filename */ { outfilptr = argv[2]; if( !quiet ) { fprintf(stderr, "BOO Internally stored output filename = \"%s\"\n", outfile); fprintf(stderr, "Command line output filename override = \"%s\"\n", outfilptr); } } if( !quiet ) fprintf(stderr, "Creating Binary File \"%s\" from BOO File \"%s\"...\n", outfilptr,argv[1]); if( outfilptr[0]=='-' && outfilptr[1]=='\0' ) { fpout = stdout ; } else if( (fpout = fopen( outfilptr , FOPEN_WOPTS )) == NULL ) { fprintf(stderr,"Error, cannot open output file \"%s\"\n", outfilptr); exit(EXIT_BAD); } convert(fpin,fpout); output(fpout,(UCHAR *)"",0); /* flush output buffering */ fclose(fpin); fclose(fpout); if( !quiet ) { fprintf(stderr,"Data bytes in: %ld, ", count_in); fprintf(stderr,"Data bytes out: %ld, ", count_out); fprintf(stderr, "Difference: %ld bytes\n", count_in - count_out); } exit(EXIT_GOOD); } VOID usage() { fprintf(stderr, "MSBPCT = Decode Ascii BOO Encoded File into Binary File\n"); fprintf(stderr, "\ Usage: MSBPCT [-v(version) -q(quiet)] input_boo_file [output_file_override]\n" ); fprintf(stderr, " Note: Filenames of '-' are supported for stdin & stdout\n"); exit(EXIT_INFO); } VOID convert(fpin,fpout) /* convert every 4 chars to 3 */ FILE *fpin, *fpout; { int n; int fill_nulls = 0; UCHAR inbuf[10], outbuf[10]; int must_output=0; while( (n = get4(fpin,inbuf)) != 0 ) { if( n < 0 ) /* -n is 1 more than # repeated nulls */ { if( n == -1 ) /* ~0 found */ { fill_nulls++; /* count #nulls to back up */ } else { /* ~X null compression found */ if( must_output ) /* output last triple */ { output(fpout,outbuf,must_output); must_output = 0; } while( ++n < 0 ) output(fpout,(UCHAR *)"",1); /* ~0 must be after all data */ fill_nulls = 0 ; } } else { if( must_output ) /* output last triple */ output(fpout,outbuf,must_output); unboo( inbuf , outbuf ); /* output these chars the next time around */ fill_nulls = 0 ; /* ~0 must be after all data */ must_output = 3 ; /* must output last triple */ } } if( fill_nulls > 0 ) { if( !quiet ) fprintf(stderr,"Fill Nulls = %d\n",fill_nulls); /* by definition, if there are ~0, there must be a triple */ if( must_output < 3 ) /* we expect a triple when see ~0s */ { fprintf(stderr, "WARNING: Detected Invalid Boo Format (~0 after non-triple)\n"); fprintf(stderr, "WARNING: Output File is probably %d nulls greater than original input file\n", fill_nulls); } else { must_output -= fill_nulls ; if( ((fill_nulls>0) && (outbuf[2]!='\0')) || ((fill_nulls>1) && (outbuf[1]!='\0')) ) { fprintf(stderr, "WARNING: Detected Invalid Boo Format (Non-Null Chars Removed by ~0)\n"); } } } if( must_output > 0 ) /* output last, possibly ~0 reduced, triple */ output(fpout, outbuf, must_output); } int get4( fp , buf ) /* return: pos=# read, neg=# nulls + 1 found */ FILE *fp; UCHAR *buf; { int i=0; /* amt last read */ int nulls=0; /* amt nulls found */ int c; do { if( (c = getc(fp)) == EOF ) /* hit eof */ { if( ferror(fp) ) /* quick check */ { fprintf(stderr, "get4(): fread error on input file\n"); exit(EXIT_BAD); } break; /* stop */ } count_in++; if( c == '\n' ) /* \n means nothing */ continue; if( i == 0 ) /* not in quad yet */ { if( nulls == 1 ) /* this char IS #nulls now */ { /* add 1 as a ~0 flag */ nulls = unchar( c ) + 1 ; return( -nulls ); /* got it, return */ } else if( c == '~' ) /* null repeat prefix */ { nulls=1; continue; } } i++; /* count till 4 */ *buf++ = c ; /* save chars */ } while( i <= 3 ); return(i); } VOID output(fp,s,n) /* output chars, n==0 = flush buffer */ FILE *fp; UCHAR *s; int n; { static char buf[BUFSIZ]; static char *p=buf; int flush = (n==0) ; unsigned count; if( n < 0 ) /* ~0 backup */ { if( p < buf-n ) /* ensure there is stuff to delete */ { fprintf(stderr, "output(): Error, no chars in buffer to backup output stream\n"); exit(EXIT_BAD); } p += n ; /* backup ptr */ } else { /* n==0 = flush buffer */ if( (n != 0) && ((p+n) <= (buf+sizeof(buf))) ) { /* will fit in current buffer */ while( n-- > 0 ) *p++ = *s++ ; } else { /* won't fit in current buffer */ /* take what we can, write current, load next */ while( (n > 0) && (p < (buf+sizeof(buf))) ) { *p++ = *s++ ; n-- ; } /* this must be "p-buf,1" ordered here for VMS varying recs to come out right, probably helps fixed 512 too */ count = p - buf ; if( (count>0) && (fwrite( buf , count , 1 , fp ) != 1) ) { fprintf(stderr, "output(): fwrite error on output file\n"); exit(EXIT_BAD); } count_out += count ; p = buf ; while( n-- > 0 ) /* don't forget leftovers */ *p++ = *s++ ; } } } VOID unboo( inbuf , outbuf ) /* here is where we unboo 4 into 3 chars */ UCHAR *inbuf, *outbuf; { UCHAR x,y,z,a,b,c,d; /* get a,b,c,d the 4 booed bytes */ a = unchar( *inbuf++ ); b = unchar( *inbuf++ ); c = unchar( *inbuf++ ); d = unchar( *inbuf ); /* calc x,y,z the 3 unbooed bytes */ /* we shouldn't need some of these &ands below, except to make sure input data is still 6 bit */ x = (a << 2) | ((b >> 4) & 003) ; y = (b << 4) | ((c >> 2) & 017) ; z = (c << 6) | (d & 077) ; *outbuf++ = x; *outbuf++ = y; *outbuf = z; } /* [EOF] */