Skip to content

Instantly share code, notes, and snippets.

@p-i-
Created September 19, 2016 22:34
Show Gist options
  • Save p-i-/db802a6c4a105a297f9a0bf878d74250 to your computer and use it in GitHub Desktop.
Save p-i-/db802a6c4a105a297f9a0bf878d74250 to your computer and use it in GitHub Desktop.
% Version 1.000
%
% Code provided by Ruslan Salakhutdinov
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
% This program reads raw MNIST files available at
% http://yann.lecun.com/exdb/mnist/
% and converts them to files in matlab format
% Before using this program you first need to download files:
% train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz
% t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz
% and gunzip them. You need to allocate some space for this.
% This program was originally written by Yee Whye Teh
% RUN: converter('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte', 1000, 'test', 10)
function converter
convert( 't10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte', 1000, 'test', 10 );
convert( 'train-images-idx3-ubyte', 'train-labels-idx1-ubyte', 1000, 'train', 60 );
end
function convert(imagesFilepath, labelsFilepath, nChunkSize, strDataSet, nBatches)
STDOUT = 1;
STDERR = 2;
% open source files
hImagesFile = fopen( imagesFilepath, 'r' );
hLabelsFile = fopen( labelsFilepath, 'r' );
if hImagesFile==-1; fprintf(STDERR, 'Failed to find/open file: %s\n', imagesFilepath); end
if hLabelsFile==-1; fprintf(STDERR, 'Failed to find/open file: %s\n', labelsFilepath); end
if hImagesFile==-1 || hLabelsFile==-1; return; end
% trim header bumpf??
[~, ~] = fread( hImagesFile, 4,'int32');
[~, ~] = fread( hLabelsFile, 2,'int32');
% create & populate tempfile for each digit class (e.g. tmp_5.ascii)
hAsciiTempFile = cell(1,10);
for d = 0 : 9,
hAsciiTempFile{d+1} = fopen(['tmp_' num2str(d) '.ascii'], 'w');
end
fprintf(STDOUT, 'Starting to convert Test MNIST images (prints 10 dots) \n');
for i = 1 : nBatches,
fprintf('.');
rawimages = fread( hImagesFile, 28*28*nChunkSize, 'uchar' );
rawlabels = fread( hLabelsFile, nChunkSize, 'uchar' );
rawimages = reshape( rawimages, 28*28, nChunkSize );
for j = 1 : nChunkSize,
fprintf( hAsciiTempFile{rawlabels(j)+1}, '%3d ', rawimages(:,j) );
fprintf( hAsciiTempFile{rawlabels(j)+1}, '\n' );
end
end
fclose( hImagesFile ); clear hImagesFile;
fclose( hLabelsFile ); clear hLabelsFile;
fprintf(STDOUT, '\n');
for d=0:9,
D = load(['tmp_' num2str(d) '.ascii'], '-ascii');
fprintf(STDOUT, '%5d Digits of class %d\n', size(D,1), d);
save([strDataSet num2str(d) '.mat'],'D','-mat');
end;
for d=0:9,
handle = hAsciiTempFile{d+1};
filename = fopen(handle); % get handle
fclose( handle );
delete( filename );
end; clear hAsciiTempFile;
end
function converterOld()
% Work with test files first
fprintf(1,'You first need to download files:\n train-images-idx3-ubyte.gz\n train-labels-idx1-ubyte.gz\n t10k-images-idx3-ubyte.gz\n t10k-labels-idx1-ubyte.gz\n from http://yann.lecun.com/exdb/mnist/\n and gunzip them \n');
f = fopen('t10k-images-idx3-ubyte','r');
[a,count] = fread(f,4,'int32');
g = fopen('t10k-labels-idx1-ubyte','r');
[l,count] = fread(g,2,'int32');
fprintf(1,'Starting to convert Test MNIST images (prints 10 dots) \n');
n = 1000;
Df = cell(1,10);
for d=0:9,
Df{d+1} = fopen(['test' num2str(d) '.ascii'],'w');
end;
for i=1:10,
fprintf('.');
rawimages = fread(f,28*28*n,'uchar');
rawlabels = fread(g,n,'uchar');
rawimages = reshape(rawimages,28*28,n);
for j=1:n,
fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j));
fprintf(Df{rawlabels(j)+1},'\n');
end;
end;
fprintf(1,'\n');
for d=0:9,
fclose(Df{d+1});
D = load(['test' num2str(d) '.ascii'],'-ascii');
fprintf('%5d Digits of class %d\n',size(D,1),d);
save(['test' num2str(d) '.mat'],'D','-mat');
end;
% Work with trainig files second
f = fopen('train-images-idx3-ubyte','r');
[a,count] = fread(f,4,'int32');
g = fopen('train-labels-idx1-ubyte','r');
[l,count] = fread(g,2,'int32');
fprintf(1,'Starting to convert Training MNIST images (prints 60 dots)\n');
n = 1000;
Df = cell(1,10);
for d=0:9,
Df{d+1} = fopen(['digit' num2str(d) '.ascii'],'w');
end;
for i=1:60,
fprintf('.');
rawimages = fread(f,28*28*n,'uchar');
rawlabels = fread(g,n,'uchar');
rawimages = reshape(rawimages,28*28,n);
for j=1:n,
fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j));
fprintf(Df{rawlabels(j)+1},'\n');
end;
end;
fprintf(1,'\n');
for d=0:9,
fclose(Df{d+1});
D = load(['digit' num2str(d) '.ascii'],'-ascii');
fprintf('%5d Digits of class %d\n',size(D,1),d);
save(['digit' num2str(d) '.mat'],'D','-mat');
end;
dos('rm *.ascii');
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment