Skip to content

Instantly share code, notes, and snippets.

@carlobaldassi
Created November 15, 2019 16:54
Show Gist options
  • Save carlobaldassi/d407efc5027273c4d849b0971bc659a1 to your computer and use it in GitHub Desktop.
Save carlobaldassi/d407efc5027273c4d849b0971bc659a1 to your computer and use it in GitHub Desktop.
CBModules customizations diff
The CBModules.diff contains the output of `diff -r` performed between the original CBModules directory,
as downloaded from https://www.uef.fi/web/machine-learning/software (CBModules 2.0 package, dated
Jun 4, 2019), and my modified version. Since the original source code does not include licensing
information, I'm only uploading the diff, for documentation and reproducibility purposes.
The modifications consist in:
1. Disabling the data preprocessing step that scaled all dimensions individually, only allowing an
overall uniform scaling factor. Note that all of my tests the data was shifted to start at the
origin and to contain only positive values. See also the additional option -Z.
2. Changing the reporting to output the scaled sum of squared errors rather than the rescaled mean
squared error.
3. Increasing the maximum population size. This was actually only applied as needed, but it does
not seem to make any significant difference in performance anyway.
diff -r CBModules/ga/cbga.c CBModules.orig/ga/cbga.c
42c42
< #define MaxGenerations 1000
---
> #define MaxGenerations 100
154,155c154
< long watch,
< double errorscale)
---
> long watch)
161,162c160,161
< PrevError = PrintableError(PrevError * errorscale, CB);
< NewError = PrintableError(Error[0] * errorscale, CB);
---
> PrevError = PrintableError(PrevError, CB);
> NewError = PrintableError(Error[0], CB);
169,170c168,169
< case 2: printf("%3i: %9.9f \n", gen, NewError); break;
< case 3: printf("%3i: %9.9f ", gen, NewError); PrintTime(watch); break;
---
> case 2: printf("%3i: %9.4f \n", gen, NewError); break;
> case 3: printf("%3i: %9.4f ", gen, NewError); PrintTime(watch); break;
173c172
< printf("%.9f ", PrintableError(Error[i],CB));
---
> printf("%.4f ", PrintableError(Error[i],CB));
176c175
< default: printf("Iter=%3i Error=%9.9f Change=%-9.9f ",
---
> default: printf("Iter=%3i Error=%9.4f Change=%-9.4f ",
180c179
< printf(" Temp=%8.9f ", SAS->CurrentTemperature);
---
> printf(" Temp=%8.4f ", SAS->CurrentTemperature);
198c197
< case 2: printf("%9.9f (%i)", error, iterations); break;
---
> case 2: printf("%9.4f (%i)", error, iterations); break;
200c199
< printf("Distortion: %-9.9f\n", error);
---
> printf("Distortion: %-9.4f\n", error);
1821d1819
< double errorscale;
1827d1824
< errorscale = Value(ErrorScale) / 1e3;
1837c1834
< PrintProgress(&Snew[0]->CB, &SAS, 0, PrevError, Error, watch, errorscale);
---
> PrintProgress(&Snew[0]->CB, &SAS, 0, PrevError, Error, watch);
1853c1850
< PrintProgress(&Snew[0]->CB, &SAS, g, PrevError, Error, watch, errorscale);
---
> PrintProgress(&Snew[0]->CB, &SAS, g, PrevError, Error, watch);
1861c1858
< PrintResult(watch, PrintableError(Error[0] * errorscale, &Snew[0]->CB), g-1 );
---
> PrintResult(watch, PrintableError(Error[0], &Snew[0]->CB), g-1 );
diff -r CBModules/ga/cbga.fac CBModules.orig/ga/cbga.fac
54c54
< 'Z', 1, INT, 0, 1, 1000, 8, YES,
---
> 'Z', 1, INT, 0, 1, 100, 8, YES,
170,177d169
<
< Fact( ErrorScale,
< "Apply this scale, divided by 1e3, to error reporting ",
< 'A', 1, INT, 1, 1,100000000, 1000, NO,
< es1, es2, es3, es4, es5, es6, es7, es8, es9, es10,
< 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
< NO )
<
diff -r CBModules/modules/cb.c CBModules.orig/modules/cb.c
2875c2875,2876
< return (double) totalerror / 1e12;
---
> return (double) totalerror /
> (double) ( (usefreqs? TotalFreq(TS) : BookSize(TS)) * VectorSize(TS));
diff -r CBModules/modules/textfile.c CBModules.orig/modules/textfile.c
322,331d321
< float overallscale;
< float maxwidth = 0.0F;
< for (j = 0; j < dim; j++) {
< float w = MinMax[j][1];
< if (w > maxwidth) {
< maxwidth = w;
< }
< }
< overallscale = maxval / maxwidth;
<
336,338c326,327
< else {
< scale = overallscale;
< }
---
> else
> scale = maxval / (MinMax[j][1] - MinMax[j][0]);
342c331
< VectorScalar(CB, i, j) = ROUND((Data[i][j]) * scale);
---
> VectorScalar(CB, i, j) = ROUND((Data[i][j] - MinMax[j][0]) * scale);
525,536d513
< float overallscale;
< float maxwidth = 0.0F;
< int dim = VectorSize(CB);
< int j1;
< for (j1 = 0; j1 < dim; j1++) {
< float w = MinMax[j1][1];
< if (w > maxwidth) {
< maxwidth = w;
< }
< }
< overallscale = maxwidth / maxval;
<
538,539c515,516
< scale = overallscale;
< val = VectorScalar(CB, i, j)*scale;
---
> scale = (MinMax[j][1] - MinMax[j][0]) / maxval;
> val = MinMax[j][0] + VectorScalar(CB, i, j)*scale;
diff -r CBModules/rs/rs.c CBModules.orig/rs/rs.c
63c63
< #define CALC_MSE(val) (double) (val) / 1e12
---
> #define CALC_MSE(val) (double) (val) / (TotalFreq(pTS) * VectorSize(pTS))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment