Skip to content

Instantly share code, notes, and snippets.

@jugglinmike
Created October 30, 2013 00:47
Show Gist options
  • Save jugglinmike/7225370 to your computer and use it in GitHub Desktop.
Save jugglinmike/7225370 to your computer and use it in GitHub Desktop.
File hashing research

File Hashing Research

Tests in Mozilla-Central

$ time ./hash.sh `find ~/projects/mozilla/mozilla-central/dom/ -type f -path */test/*.js`
real  0m0.059s
user  0m0.032s
sys 0m0.020s

$ time ./hash.py `find ~/projects/mozilla/mozilla-central/dom/ -type f -path */test/*.js`
real  0m0.070s
user  0m0.048s
sys 0m0.020s

$ time ./hash.js `find ~/projects/mozilla/mozilla-central/dom/ -type f -path */test/*.js`
real  0m0.223s
user  0m0.172s
sys 0m0.080s

Gaia's .git directory

$ time ./hash.sh `find ~/projects/mozilla/gaia/.git/ -type f`
real  0m2.118s
user  0m1.948s
sys 0m0.168s

$ time ./hash.py `find ~/projects/mozilla/gaia/.git/ -type f`
real  0m1.896s
user  0m1.344s
sys 0m0.552s

$ time ./hash.js `find ~/projects/mozilla/gaia/.git/ -type f`
real  0m3.959s
user  0m3.676s
sys 0m1.664s
#!/usr/bin/env node
// Processing the files in parallel to be fair to the other implementations
var crypto = require('crypto');
var fs = require('fs');
function hashIt(files) {
var stream, shasum;
if (!files.length) {
return;
}
shasum = crypto.createHash('sha1');
stream = fs.ReadStream(files.shift());
stream.on('data', function(chunk) {
shasum.update(chunk);
});
stream.on('end', function() {
shasum.digest('hex');
hashIt(files);
});
}
hashIt(process.argv.slice(2));
/*
// This approach fails on huge files
process.argv.slice(2).forEach(function(fileName) {
crypto.createHash('sha1').update(
fs.readFileSync(fileName, { encoding: 'utf8' })
).digest('hex');
});
*/
#!/usr/bin/env python
import hashlib
import sys
for file_name in sys.argv[1:]:
handle = open(file_name)
hashlib.sha1(handle.read()).hexdigest()
handle.close()
#!/usr/bin/env bash
sha1sum $@ > /dev/null
@lightsofapollo
Copy link

Great so it looks like the python option is not only reasonable but faster!

Here is a build on travis: https://travis-ci.org/mozilla-b2g/gaia/builds/13241326

And my modified version which runs node in parallel:

#! /usr/bin/env node

var crypto = require('crypto');
var fs = require('fs');

function hashIt(file) {
  var stream, shasum;

  shasum = crypto.createHash('sha1');
  stream = fs.createReadStream(file);
  stream.on('data', function(chunk) {
    shasum.update(chunk);
  });

  stream.on('end', function() {
    shasum.digest('hex');
  });
}

process.argv.slice(2).forEach(hashIt);

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment