#include #include #include #include // Huffman algorithm written using structures // // usage for encoding and decoding: // huffman probfile M // huffman probfile M decode // e.g. // huffman probs8 8 < sourcefile > zipfile // huffman probs8 8 decode < zipfile > uncompressedfile // // This program reads in a file called probfile which // should contain M positive numbers on M lines. // These will be run through the Huffman algorithm // to generate a tree structure in memory which is then // immediately used for encoding or decoding (decoding if // three arguments are given, otherwise encoding). // The sourcefile is assumed to consist of the characters // {1,2,3,4,5....,M}, one per line. // The zipfile is "binary" (ie uses {1,0}). // #define MAXSTRING 1000 // maximum length of codeword string expected #define VERBOSE 1 // "states" are objects that embody the leaves and internal // nodes and root node of the Huffman tree. struct state { int leaf ; // If this is >=1, then this state is a leaf, and // // the number leaf is the character number in the alphabet struct state * n0 ; struct state * n1 ; char codeword[MAXSTRING] ; // if a leaf node, this contains the word. // Also used when building codewords }; int readindvector(double *w, int lo, int hi, char *file); double *dvector(int nl,int nh); static struct state *recurse( double* p , struct state ** s , int M ) ; static int extendallwordswith ( struct state * s , int c ) ; int main( int argc , char *argv[] ){ struct state **s , **tmps , *currentlocation , *root ; char probfile[100] , spacer[10] , spacerS[10] ; int i ; int encode , decode = 0 ; ; int M = 4 ; double *p ; strcpy(spacer,""); strcpy(spacerS,"\n"); // Read the number of alphabet elements from command line if(argc>2) { sscanf(argv[2], "%d", &M ) ; } else { fprintf(stderr,"warning: Number of alphabet elements should be provided on command line\nUsage for encoding and decoding:\n huffman probfile M\n huffman probfile M decode\n"); } if(argc>1) { sscanf(argv[1], "%s", probfile ) ; } else { fprintf(stderr,"ERROR: probability filename required on command line\n"); exit(0); } if(argc>3) { fprintf(stderr,"decoding\n"); decode=1; } encode = 1 - decode ; // Allocate some memory p = dvector(1,M); // Read the probabilities from the input if ( readindvector( p , 1, M, probfile ) < 0 ) { exit(0); } // allocate the leaves. s = (struct state **)malloc( (M+1)*sizeof(struct state *)); tmps = (struct state **)malloc( (M+1)*sizeof(struct state *)); for ( i=1; i<=M ; i++ ) { if (VERBOSE>1) fprintf(stderr,"%d\t%9g\n", i,p[i]); s[i] = (struct state *)malloc(sizeof(struct state)); s[i]->leaf=i ; tmps[i] = s[i] ; strcpy( s[i]->codeword, "" ); // ensure codeword is blank } // use a copy of s, tmps, because we want to preserve the original s's root = recurse( p , tmps , M ); // report the huffman tree for ( i=1; i<=M ; i++ ) { if (VERBOSE>-1) fprintf(stderr,"%d\t%s\n", i,s[i]->codeword); } // Now read the source file if ( encode ) { while ( fscanf(stdin, "%d", &i) != EOF ) { // printf("%d->",i); if(s[i]) printf("%s%s",s[i]->codeword,spacer); } } else { // decode currentlocation = root ; while ( (i=fgetc(stdin)) > 0 ) { // use it to determine which way to go. if( i == '0' ) { currentlocation = currentlocation->n0 ; } else if( i == '1' ) { currentlocation = currentlocation->n1 ; } else { continue ; // ignore this character, get another } if ( currentlocation->leaf ) { printf("%d%s", (currentlocation->leaf) , spacerS ) ; currentlocation = root ; // back to root node } } } return(0); } // // recursive function to build Huffman tree // static struct state *recurse( double* p , struct state ** s , int M ) { // receives a positive vector of size M = 2 or more. // creates a new state by combining two of them; and // returns the value of the state's pointer. struct state *news ; int i1 = 1 , i2 = 2 , i ; int vacate , remain ; double p1 ; double p2 ; // set up space for a new state news = (struct state *)malloc(sizeof(struct state)); news->leaf=0 ; strcpy( news->codeword, "" ); // ensure codeword is blank // establish which are the two smallest elements, // and put their indices in i1 and i2. if(M>2) { p1 = p[i1] ; p2 = p[i2] ; for ( i = 3 ; i <= M ; i ++ ) { if ( p[i] < p1 ) { i1 = i ; p1 = p[i] ; } else if ( p[i] < p2 ) { i2 = i ; p2 = p[i] ; } } } vacate = (i2>i1) ? i2 : i1 ; remain = (i2>i1) ? i1 : i2 ; news->n0 = s[remain] ; news->n1 = s[vacate] ; if ( ( extendallwordswith( s[remain] , 0 ) <0 ) || ( extendallwordswith( s[vacate] , 1 ) <0 ) ) { fprintf(stderr,"problem at level %d\n", M) ; } if(M>2) { // whichever index out of i1 and i2 is the larger, that // one moves in with the other. // everyone after "vacate" shuffles along. p[remain] += p[vacate] ; s[remain] = news ; for ( ; vacate < M ; vacate++ ) { p[vacate] = p[vacate+1] ; s[vacate] = s[vacate+1] ; } p[M] = -1; // stop using the clipped leaf s[M] = 0; // p and s return recurse( p, s, M-1 ) ; } else { return news ; } } // recursive function to propagate a character down the tree, // to all children of this point. static int extendallwordswith ( struct state * s , int c ) { int status = 0 ; char tmp[MAXSTRING]; if( s>0 ) { // prepend c to the codeword sprintf( tmp , "%d%s", c , s->codeword ); strcpy( s->codeword, tmp ); if( !(s->leaf) ) { status += extendallwordswith ( s->n0 , c ) ; status += extendallwordswith ( s->n1 , c ) ; } } else { fprintf(stderr, "impossible state node %d\n", c ); status-- ; } return status; } int readindvector(double *w, int lo, int hi, char *file) { int i, status = 0 ; FILE *fp; fp = fopen( file, "r" ); if( !fp ) fprintf( stderr, "No such file: %s\n", file ), exit(0); for (i=lo;i<=hi;i++) { if ( fscanf(fp,"%lf ",&w[i]) == EOF ) { status = -1 ; break ; } } fclose( fp ); if ( status < 0 ) fprintf( stderr, "Warning: readindvector failed at component %d\n",i); return status ; } double *dvector(int nl,int nh) { double *v; v=(double *)malloc((unsigned) (nh-nl+1)*sizeof(double)); if (!v) fprintf(stderr,"allocation failure in dvector()" ); return v-nl; }