From 74f4b1bc3b627ba4c7e03498234d88cacdfbe97b Mon Sep 17 00:00:00 2001 From: Aki Date: Wed, 29 Sep 2021 22:52:49 +0200 Subject: Squashed 'vorbis/' content from commit d22c3ab5f git-subtree-dir: vorbis git-subtree-split: d22c3ab5f633460abc2532feee60ca0892134cbf --- vq/vqgen.c | 566 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 566 insertions(+) create mode 100644 vq/vqgen.c (limited to 'vq/vqgen.c') diff --git a/vq/vqgen.c b/vq/vqgen.c new file mode 100644 index 0000000..934d264 --- /dev/null +++ b/vq/vqgen.c @@ -0,0 +1,566 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 * + * by the Xiph.Org Foundation http://www.xiph.org/ * + * * + ******************************************************************** + + function: train a VQ codebook + + ********************************************************************/ + +/* This code is *not* part of libvorbis. It is used to generate + trained codebooks offline and then spit the results into a + pregenerated codebook that is compiled into libvorbis. It is an + expensive (but good) algorithm. Run it on big iron. */ + +/* There are so many optimizations to explore in *both* stages that + considering the undertaking is almost withering. For now, we brute + force it all */ + +#include +#include +#include +#include + +#include "vqgen.h" +#include "bookutil.h" + +/* Codebook generation happens in two steps: + + 1) Train the codebook with data collected from the encoder: We use + one of a few error metrics (which represent the distance between a + given data point and a candidate point in the training set) to + divide the training set up into cells representing roughly equal + probability of occurring. + + 2) Generate the codebook and auxiliary data from the trained data set +*/ + +/* Codebook training **************************************************** + * + * The basic idea here is that a VQ codebook is like an m-dimensional + * foam with n bubbles. The bubbles compete for space/volume and are + * 'pressurized' [biased] according to some metric. The basic alg + * iterates through allowing the bubbles to compete for space until + * they converge (if the damping is dome properly) on a steady-state + * solution. Individual input points, collected from libvorbis, are + * used to train the algorithm monte-carlo style. */ + +/* internal helpers *****************************************************/ +#define vN(data,i) (data+v->elements*i) + +/* default metric; squared 'distance' from desired value. */ +float _dist(vqgen *v,float *a, float *b){ + int i; + int el=v->elements; + float acc=0.f; + for(i=0;ientries;i++) + memcpy(_now(v,i),_point(v,i),sizeof(float)*v->elements); + v->seeded=1; +} + +int directdsort(const void *a, const void *b){ + float av=*((float *)a); + float bv=*((float *)b); + return (avbv); +} + +void vqgen_cellmetric(vqgen *v){ + int j,k; + float min=-1.f,max=-1.f,mean=0.f,acc=0.f; + long dup=0,unused=0; + #ifdef NOISY + int i; + char buff[80]; + float spacings[v->entries]; + int count=0; + FILE *cells; + sprintf(buff,"cellspace%d.m",v->it); + cells=fopen(buff,"w"); +#endif + + /* minimum, maximum, cell spacing */ + for(j=0;jentries;j++){ + float localmin=-1.; + + for(k=0;kentries;k++){ + if(j!=k){ + float this=_dist(v,_now(v,j),_now(v,k)); + if(this>0){ + if(v->assigned[k] && (localmin==-1 || thisentries)continue; + + if(v->assigned[j]==0){ + unused++; + continue; + } + + localmin=v->max[j]+localmin/2; /* this gives us rough diameter */ + if(min==-1 || localminmax)max=localmin; + mean+=localmin; + acc++; +#ifdef NOISY + spacings[count++]=localmin; +#endif + } + + fprintf(stderr,"cell diameter: %.03g::%.03g::%.03g (%ld unused/%ld dup)\n", + min,mean/acc,max,unused,dup); + +#ifdef NOISY + qsort(spacings,count,sizeof(float),directdsort); + for(i=0;iquant)-1); + + int j,k; + + mindel=maxdel=_now(v,0)[0]; + + for(j=0;jentries;j++){ + float last=0.f; + for(k=0;kelements;k++){ + if(mindel>_now(v,j)[k]-last)mindel=_now(v,j)[k]-last; + if(maxdel<_now(v,j)[k]-last)maxdel=_now(v,j)[k]-last; + if(q->sequencep)last=_now(v,j)[k]; + } + } + + + /* first find the basic delta amount from the maximum span to be + encoded. Loosen the delta slightly to allow for additional error + during sequence quantization */ + + delta=(maxdel-mindel)/((1<quant)-1.5f); + + q->min=_float32_pack(mindel); + q->delta=_float32_pack(delta); + + mindel=_float32_unpack(q->min); + delta=_float32_unpack(q->delta); + + for(j=0;jentries;j++){ + float last=0; + for(k=0;kelements;k++){ + float val=_now(v,j)[k]; + float now=rint((val-last-mindel)/delta); + + _now(v,j)[k]=now; + if(now<0){ + /* be paranoid; this should be impossible */ + fprintf(stderr,"fault; quantized value<0\n"); + exit(1); + } + + if(now>maxquant){ + /* be paranoid; this should be impossible */ + fprintf(stderr,"fault; quantized value>max\n"); + exit(1); + } + if(q->sequencep)last=(now*delta)+mindel+last; + } + } +} + +/* much easier :-). Unlike in the codebook, we don't un-log log + scales; we just make sure they're properly offset. */ +void vqgen_unquantize(vqgen *v,quant_meta *q){ + long j,k; + float mindel=_float32_unpack(q->min); + float delta=_float32_unpack(q->delta); + + for(j=0;jentries;j++){ + float last=0.f; + for(k=0;kelements;k++){ + float now=_now(v,j)[k]; + now=fabs(now)*delta+last+mindel; + if(q->sequencep)last=now; + _now(v,j)[k]=now; + } + } +} + +void vqgen_init(vqgen *v,int elements,int aux,int entries,float mindist, + float (*metric)(vqgen *,float *, float *), + float *(*weight)(vqgen *,float *),int centroid){ + memset(v,0,sizeof(vqgen)); + + v->centroid=centroid; + v->elements=elements; + v->aux=aux; + v->mindist=mindist; + v->allocated=32768; + v->pointlist=_ogg_malloc(v->allocated*(v->elements+v->aux)*sizeof(float)); + + v->entries=entries; + v->entrylist=_ogg_malloc(v->entries*v->elements*sizeof(float)); + v->assigned=_ogg_malloc(v->entries*sizeof(long)); + v->bias=_ogg_calloc(v->entries,sizeof(float)); + v->max=_ogg_calloc(v->entries,sizeof(float)); + if(metric) + v->metric_func=metric; + else + v->metric_func=_dist; + if(weight) + v->weight_func=weight; + else + v->weight_func=_weight_null; + + v->asciipoints=tmpfile(); + +} + +void vqgen_addpoint(vqgen *v, float *p,float *a){ + int k; + for(k=0;kelements;k++) + fprintf(v->asciipoints,"%.12g\n",p[k]); + for(k=0;kaux;k++) + fprintf(v->asciipoints,"%.12g\n",a[k]); + + if(v->points>=v->allocated){ + v->allocated*=2; + v->pointlist=_ogg_realloc(v->pointlist,v->allocated*(v->elements+v->aux)* + sizeof(float)); + } + + memcpy(_point(v,v->points),p,sizeof(float)*v->elements); + if(v->aux)memcpy(_point(v,v->points)+v->elements,a,sizeof(float)*v->aux); + + /* quantize to the density mesh if it's selected */ + if(v->mindist>0.f){ + /* quantize to the mesh */ + for(k=0;kelements+v->aux;k++) + _point(v,v->points)[k]= + rint(_point(v,v->points)[k]/v->mindist)*v->mindist; + } + v->points++; + if(!(v->points&0xff))spinnit("loading... ",v->points); +} + +/* yes, not threadsafe. These utils aren't */ +static int sortit=0; +static int sortsize=0; +static int meshcomp(const void *a,const void *b){ + if(((sortit++)&0xfff)==0)spinnit("sorting mesh...",sortit); + return(memcmp(a,b,sortsize)); +} + +void vqgen_sortmesh(vqgen *v){ + sortit=0; + if(v->mindist>0.f){ + long i,march=1; + + /* sort to make uniqueness detection trivial */ + sortsize=(v->elements+v->aux)*sizeof(float); + qsort(v->pointlist,v->points,sortsize,meshcomp); + + /* now march through and eliminate dupes */ + for(i=1;ipoints;i++){ + if(memcmp(_point(v,i),_point(v,i-1),sortsize)){ + /* a new, unique entry. march it down */ + if(i>march)memcpy(_point(v,march),_point(v,i),sortsize); + march++; + } + spinnit("eliminating density... ",v->points-i); + } + + /* we're done */ + fprintf(stderr,"\r%ld training points remining out of %ld" + " after density mesh (%ld%%)\n",march,v->points,march*100/v->points); + v->points=march; + + } + v->sorted=1; +} + +float vqgen_iterate(vqgen *v,int biasp){ + long i,j,k; + + float fdesired; + long desired; + long desired2; + + float asserror=0.f; + float meterror=0.f; + float *new; + float *new2; + long *nearcount; + float *nearbias; + #ifdef NOISY + char buff[80]; + FILE *assig; + FILE *bias; + FILE *cells; + sprintf(buff,"cells%d.m",v->it); + cells=fopen(buff,"w"); + sprintf(buff,"assig%d.m",v->it); + assig=fopen(buff,"w"); + sprintf(buff,"bias%d.m",v->it); + bias=fopen(buff,"w"); + #endif + + + if(v->entries<2){ + fprintf(stderr,"generation requires at least two entries\n"); + exit(1); + } + + if(!v->sorted)vqgen_sortmesh(v); + if(!v->seeded)_vqgen_seed(v); + + fdesired=(float)v->points/v->entries; + desired=fdesired; + desired2=desired*2; + new=_ogg_malloc(sizeof(float)*v->entries*v->elements); + new2=_ogg_malloc(sizeof(float)*v->entries*v->elements); + nearcount=_ogg_malloc(v->entries*sizeof(long)); + nearbias=_ogg_malloc(v->entries*desired2*sizeof(float)); + + /* fill in nearest points for entry biasing */ + /*memset(v->bias,0,sizeof(float)*v->entries);*/ + memset(nearcount,0,sizeof(long)*v->entries); + memset(v->assigned,0,sizeof(long)*v->entries); + if(biasp){ + for(i=0;ipoints;i++){ + float *ppt=v->weight_func(v,_point(v,i)); + float firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0]; + float secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1]; + long firstentry=0; + long secondentry=1; + + if(!(i&0xff))spinnit("biasing... ",v->points+v->points+v->entries-i); + + if(firstmetric>secondmetric){ + float temp=firstmetric; + firstmetric=secondmetric; + secondmetric=temp; + firstentry=1; + secondentry=0; + } + + for(j=2;jentries;j++){ + float thismetric=v->metric_func(v,_now(v,j),ppt)+v->bias[j]; + if(thismetricentries;j++){ + + float thismetric,localmetric; + float *nearbiasptr=nearbias+desired2*j; + long k=nearcount[j]; + + localmetric=v->metric_func(v,_now(v,j),ppt); + /* 'thismetric' is to be the bias value necessary in the current + arrangement for entry j to capture point i */ + if(firstentry==j){ + /* use the secondary entry as the threshhold */ + thismetric=secondmetric-localmetric; + }else{ + /* use the primary entry as the threshhold */ + thismetric=firstmetric-localmetric; + } + + /* support the idea of 'minimum distance'... if we want the + cells in a codebook to be roughly some minimum size (as with + the low resolution residue books) */ + + /* a cute two-stage delayed sorting hack */ + if(kpoints+v->points+v->entries-i); + qsort(nearbiasptr,desired,sizeof(float),directdsort); + } + + }else if(thismetric>nearbiasptr[desired-1]){ + nearbiasptr[k]=thismetric; + k++; + if(k==desired2){ + spinnit("biasing... ",v->points+v->points+v->entries-i); + qsort(nearbiasptr,desired2,sizeof(float),directdsort); + k=desired; + } + } + nearcount[j]=k; + } + } + + /* inflate/deflate */ + + for(i=0;ientries;i++){ + float *nearbiasptr=nearbias+desired2*i; + + spinnit("biasing... ",v->points+v->entries-i); + + /* due to the delayed sorting, we likely need to finish it off....*/ + if(nearcount[i]>desired) + qsort(nearbiasptr,nearcount[i],sizeof(float),directdsort); + + v->bias[i]=nearbiasptr[desired-1]; + + } + }else{ + memset(v->bias,0,v->entries*sizeof(float)); + } + + /* Now assign with new bias and find new midpoints */ + for(i=0;ipoints;i++){ + float *ppt=v->weight_func(v,_point(v,i)); + float firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0]; + long firstentry=0; + + if(!(i&0xff))spinnit("centering... ",v->points-i); + + for(j=0;jentries;j++){ + float thismetric=v->metric_func(v,_now(v,j),ppt)+v->bias[j]; + if(thismetricbias[j]; + meterror+=firstmetric; + + if(v->centroid==0){ + /* set up midpoints for next iter */ + if(v->assigned[j]++){ + for(k=0;kelements;k++) + vN(new,j)[k]+=ppt[k]; + if(firstmetric>v->max[j])v->max[j]=firstmetric; + }else{ + for(k=0;kelements;k++) + vN(new,j)[k]=ppt[k]; + v->max[j]=firstmetric; + } + }else{ + /* centroid */ + if(v->assigned[j]++){ + for(k=0;kelements;k++){ + if(vN(new,j)[k]>ppt[k])vN(new,j)[k]=ppt[k]; + if(vN(new2,j)[k]v->max[firstentry])v->max[j]=firstmetric; + }else{ + for(k=0;kelements;k++){ + vN(new,j)[k]=ppt[k]; + vN(new2,j)[k]=ppt[k]; + } + v->max[firstentry]=firstmetric; + } + } + } + + /* assign midpoints */ + + for(j=0;jentries;j++){ +#ifdef NOISY + fprintf(assig,"%ld\n",v->assigned[j]); + fprintf(bias,"%g\n",v->bias[j]); +#endif + asserror+=fabs(v->assigned[j]-fdesired); + if(v->assigned[j]){ + if(v->centroid==0){ + for(k=0;kelements;k++) + _now(v,j)[k]=vN(new,j)[k]/v->assigned[j]; + }else{ + for(k=0;kelements;k++) + _now(v,j)[k]=(vN(new,j)[k]+vN(new2,j)[k])/2.f; + } + } + } + + asserror/=(v->entries*fdesired); + + fprintf(stderr,"Pass #%d... ",v->it); + fprintf(stderr,": dist %g(%g) metric error=%g \n", + asserror,fdesired,meterror/v->points); + v->it++; + + free(new); + free(nearcount); + free(nearbias); +#ifdef NOISY + fclose(assig); + fclose(bias); + fclose(cells); +#endif + return(asserror); +} + -- cgit v1.1