Created
January 20, 2015 06:48
-
-
Save PandaWhisperer/3576290fef258f8f0f5a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var async= require('async'), | |
cv= require('opencv'), | |
daoUtil= require('../dao/util'), | |
skybio= require('../lib/skybio'), | |
kairos= require('../lib/kairos'), | |
fre= require('../lib/fre'), | |
_= require('underscore'), | |
stats= require('stats-lite'), | |
fs = require('fs'), | |
request = require('request'), | |
uuid= require('node-uuid').v4, | |
done= console.log, | |
util= require('../lib/util'); | |
var s3= require('../lib/s3'), | |
db= require('../lib/dynamo').db, | |
pkBucket= s3('photokharma-files'), | |
bucket= s3('photokharma-users'); | |
var user_id= 'af0a8a88-f560-45db-b8a2-a47c29b49624', | |
//user_id= '4267ad25-48cc-4ad3-ad1f-2496171d4ed6', | |
//dir_out= '/home/ec2-user/skybio_exp/' + user_id + '/', | |
dir_out= '/Users/agr/Documents/sm14/photokharma/tmp/skybio/' + user_id + '/', | |
namespace= 'clustering_5'; | |
var context= {}; | |
async.waterfall([ | |
function(done) | |
{ | |
if(!fs.existsSync(dir_out)) | |
fs.mkdirSync(dir_out); | |
done(); | |
}, | |
function(done) | |
{ | |
return done(); | |
db.face.queryIndex(['user_id','face_id'], user_id, | |
function (err, faces) | |
{ | |
if (err) return done(err); | |
context.pkFaces= faces; | |
console.log('faces.length=' + faces.length); | |
done(); | |
}, ['photo_id','face_id','hash','drop_reason','facebook','facebook_tag_date','facebook_tag_id','facebook_tag_name']); | |
}, | |
function(done) | |
{ | |
return done(); | |
async.eachLimit(_.range(context.pkFaces.length), 1, | |
function(idx, done) | |
{ | |
var face= context.pkFaces[idx]; | |
if(!face.drop_reason) | |
{ | |
pkBucket.getBuffer(['face',face.hash+'.jpg'], | |
function(err, buff) | |
{ | |
if(err) return done(err); | |
fs.writeFileSync(dir_out + face.hash + '.jpg', buff); | |
fs.writeFileSync(dir_out + face.hash + '.json', JSON.stringify(face)); | |
done(); | |
}); | |
} | |
else done(); | |
}, done); | |
}, | |
function(done) | |
{ | |
var fileNames= fs.readdirSync(dir_out); | |
fileNames= _.filter(fileNames, function(file_name) { return file_name.indexOf('.json') != -1; }); | |
context.fileNames= fileNames; | |
dir_out = dir_out + 'skybio/'; | |
if(!fs.existsSync(dir_out)) | |
fs.mkdirSync(dir_out); | |
if(!fs.existsSync(dir_out+'responses/')) | |
fs.mkdirSync(dir_out+'responses/'); | |
if(!fs.existsSync(dir_out+'low_confidence/')) | |
fs.mkdirSync(dir_out+'low_confidence/'); | |
if(!fs.existsSync(dir_out+'too_small/')) | |
fs.mkdirSync(dir_out+'too_small/'); | |
done(); | |
}, | |
function(done) | |
{ | |
return done(); | |
// detect and add faces | |
async.eachLimit(_.range(context.fileNames.length), 1, | |
function(idx, done) | |
{ | |
var dbface= JSON.parse(fs.readFileSync(dir_out + '../' + context.fileNames[idx]).toString()); | |
url= util.faceUrl(dbface); | |
console.log(url); | |
if(fs.existsSync(dir_out + 'responses/' + dbface.hash + '.json')) | |
{ | |
console.log('Skipping.'); | |
return done(); | |
} | |
var buff, im; | |
skybio.facesRecognize(namespace, ['all'], [url], 10000, true, | |
function(err,res) | |
{ | |
if(err) | |
{ | |
//return done(); | |
res= { photos: [{ tags: [] }] }; | |
} | |
fs.writeFileSync(dir_out + 'responses/' + dbface.hash + '.json', JSON.stringify(res)); | |
var detections= res.photos[0].tags; | |
console.log('detections.length=' + detections.length); | |
async.eachLimit(detections, 1, | |
function(detection, done) | |
{ | |
detection.url= res.photos[0].url; | |
async.waterfall([ | |
function(done) | |
{ | |
if(!buff) | |
{ | |
request.get({ url: url, encoding: null }, | |
function (err, res, body) | |
{ | |
if (err) return done(err); | |
buff= body; | |
cv.readImage(buff, | |
function(err, _im) | |
{ | |
if (err) return done(err); | |
im= _im; | |
done(); | |
}); | |
}); | |
} | |
else done(); | |
}, | |
function(done) | |
{ | |
var detection_box= { x: detection.center.x - detection.width/2, | |
y: detection.center.y - detection.height/2, | |
w: detection.width, | |
h: detection.height }; | |
var cd= fre.cropDetection(im, detection_box, 0.6), | |
crop= cd.crop; | |
crop.buff= crop.toBuffer(); | |
fs.writeFileSync(dir_out + detection.tid + '.jpg', crop.buff); | |
fs.writeFileSync(dir_out + detection.tid + '.json', JSON.stringify(detection)); | |
var matches= detection.uids ? detection.uids : []; | |
console.log('matches.length=' + matches.length); | |
matches= _.map(matches, | |
function(_match) | |
{ | |
var face_uid= _match.uid.substring(0, _match.uid.indexOf('@')), | |
match= { face_uid: face_uid, confidence: _match.confidence }; | |
return match; | |
}); | |
fs.writeFileSync(dir_out + detection.tid + '_matches.json', JSON.stringify(matches)); | |
done(); | |
}], done); | |
}, done); | |
}); | |
}, done); | |
}, | |
function(done) | |
{ | |
console.log('Generating similarity matrix...'); | |
var detection_uids= fs.readdirSync(dir_out); | |
detection_uids= _.filter(detection_uids, function(file_name) { return file_name.indexOf('.jpg') != -1; }); | |
var face_uids= [], | |
face_attributes= [], | |
uidToIndexMap= {}; | |
async.eachLimit(_.range(detection_uids.length), 1, | |
function(i, done) | |
{ | |
var detection_uid= detection_uids[i]; | |
detection_uid= detection_uid.substring(0, detection_uid.indexOf('.jpg')); | |
var detection= JSON.parse(fs.readFileSync(dir_out + detection_uid + '.json').toString()); | |
if(detection.attributes.face.confidence > 50) | |
{ | |
var buff= fs.readFileSync(dir_out + detection_uid + '.jpg'); | |
cv.readImage(buff, | |
function(err, _im) | |
{ | |
if (err) return done(err); | |
var size= _im.size(), h= size[0], w= size[1]; | |
if(Math.min(w,h) > 125) | |
{ | |
face_uids.push(detection_uid); | |
face_attributes.push(detection.attributes); | |
uidToIndexMap[detection_uid]= face_uids.length - 1; | |
} | |
else | |
{ | |
fs.createReadStream(dir_out + detection_uid + '.jpg').pipe(fs.createWriteStream(dir_out + 'too_small/' + detection_uid + '.jpg')); | |
} | |
done(); | |
}); | |
} | |
else | |
{ | |
fs.createReadStream(dir_out + detection_uid + '.jpg').pipe(fs.createWriteStream(dir_out + 'low_confidence/' + detection_uid + '.jpg')); | |
done(); | |
} | |
}, | |
function(err) | |
{ | |
if(err) return done(err); | |
context.face_uids= face_uids; | |
context.face_attributes= face_attributes; | |
context.uidToIndexMap= uidToIndexMap; | |
done(); | |
}); | |
}, | |
function(done) | |
{ | |
var face_uids= context.face_uids, | |
face_attributes= context.face_attributes, | |
uidToIndexMap= context.uidToIndexMap; | |
console.log('D'); | |
var npoints = face_uids.length, | |
D = []; | |
_.range(npoints-1).forEach( | |
function(i) | |
{ | |
D[i]= _.map(_.range(npoints-i-1), function(j) { return 0; }); | |
}); | |
console.log('D matches'); | |
async.eachLimit(_.range(face_uids.length), 1, | |
function(i, done) | |
{ | |
var face_uid= face_uids[i], | |
index1= uidToIndexMap[face_uid]; | |
//var matches= JSON.parse(fs.readFileSync(dir_out + 'matches/' + face_uid + '.json').toString()); | |
var detection= JSON.parse(fs.readFileSync(dir_out + face_uid + '.json').toString()), | |
matches= detection.uids ? detection.uids : []; | |
matches= _.map(matches, | |
function(_match) | |
{ | |
var face_uid= _match.uid.substring(0, _match.uid.indexOf('@')), | |
match= { face_uid: face_uid, confidence: _match.confidence }; | |
return match; | |
}); | |
matches.forEach( | |
function(match) | |
{ | |
var index2= uidToIndexMap[match.face_uid]; | |
if(index2 && index1 != index2) | |
{ | |
var minIndex= Math.min(index1, index2), | |
maxIndex= Math.max(index1, index2); | |
//if(D[minIndex][maxIndex - minIndex - 1] != 0) | |
// console.log(D[minIndex][maxIndex - minIndex - 1] + ' <= ' + match.confidence); | |
D[minIndex][maxIndex - minIndex - 1]= match.confidence; | |
} | |
}); | |
done(); | |
}, | |
function(err) | |
{ | |
if(err) return done(err); | |
console.log('D done.'); | |
context.D= D; | |
done(); | |
}); | |
}, | |
function(done) | |
{ | |
var face_uid= '001d0021_0060d7e5a738e'; | |
fre.printMatchScores(context.D, context.face_uids, context.uidToIndexMap[face_uid]); | |
done(); | |
}, | |
function(done) | |
{ | |
var params = { minMergeSimilarity1: 60, minMergeSimilarity2: 100, N: 10 }; | |
params.face_uids= context.face_uids; // for debugging only | |
params.uidToIndexMap= context.uidToIndexMap; // for debugging only | |
var clustering_0_fileName= dir_out + 'clustering_' + 68 + '_' + 100 + '_sz/clustering.json'; | |
//if(fs.existsSync(clustering_0_fileName)) params.clustering_0 = JSON.parse(fs.readFileSync(clustering_0_fileName).toString()); | |
context.params= params; | |
//context.D.forEach(function(row) { console.log(row.join(', ')); }); | |
console.time('Clustering.'); | |
var clustering = fre.clusterLibrary(context.D, params); | |
console.timeEnd('Clustering.'); | |
var clustering_dir= dir_out + 'clustering_' + params.minMergeSimilarity1 + '_' + params.minMergeSimilarity2 + '_sz'; | |
if(!fs.existsSync(clustering_dir)) | |
fs.mkdirSync(clustering_dir); | |
fs.writeFileSync(clustering_dir + '/clustering.json', JSON.stringify(clustering)); | |
done(); | |
}, | |
function(done) | |
{ | |
var params = context.params, | |
clustering_dir= dir_out + 'clustering_' + params.minMergeSimilarity1 + '_' + params.minMergeSimilarity2 + '_sz'; | |
var clustering= JSON.parse(fs.readFileSync(clustering_dir + '/clustering.json').toString()); | |
context.clustering_dir= clustering_dir; | |
console.log('_.unique(clustering.clusters).length=' + _.unique(clustering.clusters).length); | |
console.log('clustering.medoids.length=' + clustering.medoids.length); | |
console.log(clustering.clusters); | |
console.log(clustering.medoids); | |
// get cluster sizes | |
var clusterSizes= {}; | |
_.range(clustering.clusters.length).forEach( | |
function(i) | |
{ | |
var cluster_id= clustering.clusters[i]; | |
if(clusterSizes[cluster_id]) | |
clusterSizes[cluster_id]= clusterSizes[cluster_id] + 1; | |
else | |
clusterSizes[cluster_id]= 1; | |
}); | |
console.log('copying files...'); | |
//_.range(clustering.clusters.length).forEach( | |
//function(i) | |
async.eachLimit(_.range(clustering.clusters.length), 10, | |
function(i, done) | |
{ | |
var face_uid= context.face_uids[i], | |
cluster_id= clustering.clusters[i], | |
cluster_dir= clustering_dir + '/cluster_' + cluster_id, | |
unclustered_dir= clustering_dir + '/unclustered'; | |
if(clusterSizes[cluster_id] >= 3) | |
{ | |
if(!fs.existsSync(cluster_dir)) | |
fs.mkdirSync(cluster_dir); | |
//fs.createReadStream(dir_out + face_uid + '.jpg').pipe(fs.createWriteStream(cluster_dir + '/' + face_uid + '.jpg')); | |
var buff= fs.readFileSync(dir_out + face_uid + '.jpg'); | |
fs.writeFileSync(cluster_dir + '/' + face_uid + '.jpg', buff); | |
} | |
else | |
{ | |
if(!fs.existsSync(unclustered_dir)) | |
fs.mkdirSync(unclustered_dir); | |
//fs.createReadStream(dir_out + face_uid + '.jpg').pipe(fs.createWriteStream(unclustered_dir + '/' + face_uid + '.jpg')); | |
var buff= fs.readFileSync(dir_out + face_uid + '.jpg'); | |
fs.writeFileSync(unclustered_dir + '/' + face_uid + '.jpg', buff); | |
} | |
done(); | |
}, done); | |
/*_.range(clustering.medoids.length).forEach( | |
function(i) | |
{ | |
var face_uid= context.face_uids[clustering.medoids[i]], | |
cluster_id= i, | |
cluster_dir= clustering_dir + '/cluster_' + cluster_id; | |
if(!fs.existsSync(cluster_dir)) | |
fs.mkdirSync(cluster_dir); | |
fs.createReadStream(dir_out + face_uid + '.jpg').pipe(fs.createWriteStream(cluster_dir + '/' + face_uid + '_medoid.jpg')); | |
});*/ | |
//done(); | |
}, | |
function(done) | |
{ | |
// compute age and gender for each cluster | |
var cluster_dirs= fs.readdirSync(context.clustering_dir); | |
cluster_dirs= _.filter(cluster_dirs, function(file_name) { return file_name.indexOf('cluster_') == 0; }); | |
async.eachLimit(_.range(cluster_dirs.length), 1, | |
function(i, done) | |
{ | |
var cluster_dir= context.clustering_dir + '/' + cluster_dirs[i], | |
face_files= fs.readdirSync(cluster_dir); | |
face_files= _.filter(face_files, function(file_name) { return file_name.indexOf('.jpg') != -1; }); | |
var ages= [], | |
maleAcc= 0, femaleAcc= 0; | |
face_files.forEach( | |
function(file_name) | |
{ | |
var face_uid= file_name.substring(0, file_name.length-4), | |
detection= JSON.parse(fs.readFileSync(context.clustering_dir + '/../' + face_uid + '.json').toString()); | |
ages.push(+detection.attributes.age_est.value); | |
if(detection.attributes.gender.value == 'male') maleAcc+= detection.attributes.gender.confidence; | |
else femaleAcc+= detection.attributes.gender.confidence; | |
}); | |
var clusterAttributes= {}; | |
clusterAttributes.age= _.reduce(ages, function(memo, num) { return memo + num; }, 0) / ages.length; | |
clusterAttributes.male= maleAcc / ages.length; | |
clusterAttributes.female= femaleAcc / ages.length; | |
clusterAttributes.gender= maleAcc > femaleAcc ? 'male' : 'female'; | |
fs.writeFileSync(cluster_dir + '/clusterAttributes.json', JSON.stringify(clusterAttributes)); | |
done(); | |
}, done); | |
}], | |
function(err) | |
{ | |
console.log(err); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment