# Central limit theorem demonstration. # # Pick any discrete distribution and # visualize the distribution of a sum of # t iid variables. # For t greater than 20 or so, the # distbn looks very Gaussian indeed. # However a close look (for example on # log scale) shows persisting and # interesting differences. These graphs # would also be good for a discussion of # large deviation theory. # # David MacKay 22 November 2003 # To use, run octave, then type convolve gset autos xy; gset linestyle 8 lt 1 lw 0.3 gset tics out verbose=0; ### one lop sided probability distribution p0 = [ 0.30000 0.10000 0.15000 0.00000 0.00000 0.20000 0.00000 0.00000 0.00000 0.00000 0.25000 ]; ### another lop-sided probability distribution, lop-sided the other way p0 = [ 0.25000 0.00000 0.00000 0.00000 0.05000 0.0000 0.00000 0.15000 0.00000 0.20000 0.35000 ]; gset lmargin 6 gset bmargin 1 gset rmargin 1 gset tmargin 1 gset nokey T = 300 ; # maximum number of variables from p0 to add together Tmax = 50 ; # when to start skipping Tperiod = 20 ; # multiple y = p0' ; gset size 1,1 for t=1:T if ( (t<= Tmax) || ( round(t/Tperiod) * Tperiod == t ) ) s=size(y); ## r = 1:(s(2)); r = 0:(s(2)-1); ## the list of possible values of r, where ## r = sum_{n=1}^N x_n mean = r * y' ; rsquared = r.*r ; meansquare = rsquared * y' ; myvar = meansquare - mean *mean ; g = exp( - (r-mean).**2 / (2.0 * myvar) ) / sqrt(2.0*3.14159 * myvar \ ) ; command = sprintf ( "gset title 'sum of %d' " , t ) ; command = sprintf ( "gset title '' " ) ; eval(command) ; command = sprintf ( "gset xrange [ -1 :%d ] " , (s(2)-1) ) ; eval(command) ; ycommand = sprintf ( "gset yrange [ %g:%g] " , 0.0 , max(y) ) ; eval(ycommand) ; gset size 1,0.7 gset origin 0,0 gset nologscale y ; command = sprintf ( "gset term postscript \"Helvetica\" 30 ; gset output 'ps/sum%03d.ps' " , t ) ; eval(command) ; gplot g' , y' w impulse 1; gset logscale y ; lycommand = sprintf ( "gset yrange [ %g:%g] " , min(g)/3.0 , max(y) ) ; eval(lycommand) ; command = sprintf ( "gset term postscript 'Helvetica' 30 ; gset output 'ps/lsum%03d.ps' " , t ) ; eval(command) ; gplot g' , y' w impulse 1; gset size 1,1 command = sprintf ( "gset term postscript \"Helvetica\" 30 ; gset output 'ps/bsum%03d.ps' " , t ) ; eval(command) ; gset multiplot gset origin 0,0.5 gset size 1,0.5 gset nologscale y ; gplot g' , y' w impulse 1; gset logscale y ; eval(lycommand) ; gset origin 0,0 gset size 1,0.5 gplot g' , y' w impulse 1; gset nomultiplot gset origin 0,0 gset size 1,1 endif y = conv( y , p0 ) ; if((t<0)) disp ( "This is the distribution of one variable, and its \ approximation by a Gaussian. (Upper figure: same, on log scale)") ; disp ( "The remaining pictures show the distbn of the sum of t such variables") ; endif if(verbose || (t<0)) ans = input ( "ready" ) ; endif endfor