PandaWhisperer · January 20, 2015 06:48
diff --git a/clustering_skybio_pk.js b/clustering_skybio_pk.js
 var async= require('async'),
    cv= require('opencv'),
    daoUtil= require('../dao/util'),
    skybio= require('../lib/skybio'),
    kairos= require('../lib/kairos'),
    fre= require('../lib/fre'),
    _= require('underscore'),
    stats= require('stats-lite'),
    fs = require('fs'),
    request = require('request'),
    uuid= require('node-uuid').v4,
    done= console.log,
    util= require('../lib/util');

 var s3= require('../lib/s3'),
    db= require('../lib/dynamo').db,
    pkBucket= s3('photokharma-files'),
    bucket= s3('photokharma-users');


 var user_id= 'af0a8a88-f560-45db-b8a2-a47c29b49624',
    //user_id= '4267ad25-48cc-4ad3-ad1f-2496171d4ed6',
    //dir_out= '/home/ec2-user/skybio_exp/' + user_id + '/',
    dir_out= '/Users/agr/Documents/sm14/photokharma/tmp/skybio/' + user_id + '/',
    namespace= 'clustering_5';

 var context= {};

 async.waterfall([
 function(done)
 {
    if(!fs.existsSync(dir_out))
        fs.mkdirSync(dir_out);
    done();
 },
 function(done)
 {
    return done();
    db.face.queryIndex(['user_id','face_id'], user_id,
    function (err, faces)
    {
        if (err) return done(err);

        context.pkFaces= faces;
        console.log('faces.length=' + faces.length);

        done();
    }, ['photo_id','face_id','hash','drop_reason','facebook','facebook_tag_date','facebook_tag_id','facebook_tag_name']);
 },
 function(done)
 {
    return done();
    async.eachLimit(_.range(context.pkFaces.length), 1,
    function(idx, done)
    {
        var face= context.pkFaces[idx];
        if(!face.drop_reason)
        {
            pkBucket.getBuffer(['face',face.hash+'.jpg'],
            function(err, buff)
            {
                if(err) return done(err);

                fs.writeFileSync(dir_out + face.hash + '.jpg', buff);
                fs.writeFileSync(dir_out + face.hash + '.json', JSON.stringify(face));
                done();
            });
        }
        else done();
    }, done);
 },
 function(done)
 {
    var fileNames= fs.readdirSync(dir_out);
    fileNames= _.filter(fileNames, function(file_name) { return file_name.indexOf('.json') != -1; });
    context.fileNames= fileNames;
    dir_out = dir_out + 'skybio/';
    if(!fs.existsSync(dir_out))
        fs.mkdirSync(dir_out);
    if(!fs.existsSync(dir_out+'responses/'))
        fs.mkdirSync(dir_out+'responses/');
    if(!fs.existsSync(dir_out+'low_confidence/'))
        fs.mkdirSync(dir_out+'low_confidence/');
    if(!fs.existsSync(dir_out+'too_small/'))
        fs.mkdirSync(dir_out+'too_small/');
    done();
 },
 function(done)
 {
    return done();
    // detect and add faces
    async.eachLimit(_.range(context.fileNames.length), 1,
    function(idx, done)
    {
        var dbface= JSON.parse(fs.readFileSync(dir_out + '../' + context.fileNames[idx]).toString());
            url= util.faceUrl(dbface);
        console.log(url);

        if(fs.existsSync(dir_out + 'responses/' + dbface.hash + '.json'))
        {
            console.log('Skipping.');
            return done();
        }

        var buff, im;

        skybio.facesRecognize(namespace, ['all'], [url], 10000, true,
        function(err,res)
        {
            if(err)
            {
                //return done();
                res= { photos: [{ tags: [] }] };
            }

            fs.writeFileSync(dir_out + 'responses/' + dbface.hash + '.json', JSON.stringify(res));

            var detections= res.photos[0].tags;
            console.log('detections.length=' + detections.length);

            async.eachLimit(detections, 1,
            function(detection, done)
            {
                detection.url= res.photos[0].url;

                async.waterfall([
                function(done)
                {
                    if(!buff)
                    {
                        request.get({ url: url, encoding: null },
                        function (err, res, body)
                        {
                            if (err) return done(err);

                            buff= body;
                            cv.readImage(buff,
                            function(err, _im)
                            {
                                if (err) return done(err);

                                im= _im;
                                done();
                            });
                        });
                    }
                    else done();
                },
                function(done)
                {
                    var detection_box=  { x: detection.center.x - detection.width/2,
                         y: detection.center.y - detection.height/2,
                         w: detection.width,
                         h: detection.height };
                    var cd= fre.cropDetection(im, detection_box, 0.6),
                        crop= cd.crop;
                    crop.buff= crop.toBuffer();

                    fs.writeFileSync(dir_out + detection.tid + '.jpg', crop.buff);
                    fs.writeFileSync(dir_out + detection.tid + '.json', JSON.stringify(detection));

                    var matches= detection.uids ? detection.uids : [];
                    console.log('matches.length=' + matches.length);

                    matches= _.map(matches,
                        function(_match)
                        {
                            var face_uid= _match.uid.substring(0, _match.uid.indexOf('@')),
                                match= { face_uid: face_uid, confidence: _match.confidence };
                            return match;
                        });

                    fs.writeFileSync(dir_out + detection.tid + '_matches.json', JSON.stringify(matches));
                    done();
                }], done);
            }, done);
        });
    }, done);
 },
 function(done)
 {
    console.log('Generating similarity matrix...');
    var detection_uids= fs.readdirSync(dir_out);
    detection_uids= _.filter(detection_uids, function(file_name) { return file_name.indexOf('.jpg') != -1; });

    var face_uids= [],
        face_attributes= [],
        uidToIndexMap= {};

    async.eachLimit(_.range(detection_uids.length), 1,
    function(i, done)
    {
        var detection_uid= detection_uids[i];
        detection_uid= detection_uid.substring(0, detection_uid.indexOf('.jpg'));

        var detection= JSON.parse(fs.readFileSync(dir_out + detection_uid + '.json').toString());

        if(detection.attributes.face.confidence > 50)
        {
            var buff= fs.readFileSync(dir_out + detection_uid + '.jpg');
            cv.readImage(buff,
            function(err, _im)
            {
                if (err) return done(err);

                var size= _im.size(), h= size[0], w= size[1];

                if(Math.min(w,h) > 125)
                {
                    face_uids.push(detection_uid);
                    face_attributes.push(detection.attributes);
                    uidToIndexMap[detection_uid]= face_uids.length - 1;
                }
                else
                {
                    fs.createReadStream(dir_out + detection_uid + '.jpg').pipe(fs.createWriteStream(dir_out + 'too_small/' + detection_uid + '.jpg'));
                }
                done();
            });
        }
        else
        {
            fs.createReadStream(dir_out + detection_uid + '.jpg').pipe(fs.createWriteStream(dir_out + 'low_confidence/' + detection_uid + '.jpg'));
            done();
        }
    },
    function(err)
    {
        if(err) return done(err);

        context.face_uids= face_uids;
        context.face_attributes= face_attributes;
        context.uidToIndexMap= uidToIndexMap;
        done();
    });
 },
 function(done)
 {
    var face_uids= context.face_uids,
        face_attributes= context.face_attributes,
        uidToIndexMap= context.uidToIndexMap;

    console.log('D');
    var npoints = face_uids.length,
        D = [];

    _.range(npoints-1).forEach(
        function(i)
        {
            D[i]= _.map(_.range(npoints-i-1), function(j) { return 0; });
        });

    console.log('D matches');
    async.eachLimit(_.range(face_uids.length), 1,
    function(i, done)
    {
        var face_uid= face_uids[i],
            index1= uidToIndexMap[face_uid];

        //var matches= JSON.parse(fs.readFileSync(dir_out + 'matches/' + face_uid + '.json').toString());

        var detection= JSON.parse(fs.readFileSync(dir_out + face_uid + '.json').toString()),
            matches= detection.uids ? detection.uids : [];

        matches= _.map(matches,
            function(_match)
            {
                var face_uid= _match.uid.substring(0, _match.uid.indexOf('@')),
                    match= { face_uid: face_uid, confidence: _match.confidence };
                return match;
            });

        matches.forEach(
        function(match)
        {
            var index2= uidToIndexMap[match.face_uid];

            if(index2 && index1 != index2)
            {
                var minIndex= Math.min(index1, index2),
                    maxIndex= Math.max(index1, index2);

                //if(D[minIndex][maxIndex - minIndex - 1] != 0)
                //    console.log(D[minIndex][maxIndex - minIndex - 1] + ' <= ' + match.confidence);

                D[minIndex][maxIndex - minIndex - 1]= match.confidence;
            }
        });

        done();
    },
    function(err)
    {
        if(err) return done(err);

        console.log('D done.');
        context.D= D;
        done();
    });
 },
 function(done)
 {
    var face_uid= '001d0021_0060d7e5a738e';

    fre.printMatchScores(context.D, context.face_uids, context.uidToIndexMap[face_uid]);
    done();
 },
 function(done)
 {
    var params = { minMergeSimilarity1: 60, minMergeSimilarity2: 100, N: 10 };
    params.face_uids= context.face_uids; // for debugging only
    params.uidToIndexMap= context.uidToIndexMap; // for debugging only

    var clustering_0_fileName= dir_out + 'clustering_' + 68 + '_' + 100 + '_sz/clustering.json';
    //if(fs.existsSync(clustering_0_fileName)) params.clustering_0 = JSON.parse(fs.readFileSync(clustering_0_fileName).toString());

    context.params= params;

    //context.D.forEach(function(row) { console.log(row.join(', ')); });
    console.time('Clustering.');
    var clustering = fre.clusterLibrary(context.D, params);
    console.timeEnd('Clustering.');

    var clustering_dir= dir_out + 'clustering_' + params.minMergeSimilarity1 + '_' + params.minMergeSimilarity2 + '_sz';
    if(!fs.existsSync(clustering_dir))
        fs.mkdirSync(clustering_dir);

    fs.writeFileSync(clustering_dir + '/clustering.json', JSON.stringify(clustering));
    done();
 },
 function(done)
 {
    var params = context.params,
        clustering_dir= dir_out + 'clustering_' + params.minMergeSimilarity1 + '_' + params.minMergeSimilarity2 + '_sz';

    var clustering= JSON.parse(fs.readFileSync(clustering_dir + '/clustering.json').toString());
    context.clustering_dir= clustering_dir;

    console.log('_.unique(clustering.clusters).length=' + _.unique(clustering.clusters).length);
    console.log('clustering.medoids.length=' + clustering.medoids.length);
    console.log(clustering.clusters);
    console.log(clustering.medoids);

    // get cluster sizes
    var clusterSizes= {};
    _.range(clustering.clusters.length).forEach(
    function(i)
    {
        var cluster_id= clustering.clusters[i];
        if(clusterSizes[cluster_id])
            clusterSizes[cluster_id]= clusterSizes[cluster_id] + 1;
        else
            clusterSizes[cluster_id]= 1;
    });

    console.log('copying files...');
    //_.range(clustering.clusters.length).forEach(
    //function(i)
    async.eachLimit(_.range(clustering.clusters.length), 10,
    function(i, done)
    {
        var face_uid= context.face_uids[i],
            cluster_id= clustering.clusters[i],
            cluster_dir= clustering_dir + '/cluster_' + cluster_id,
            unclustered_dir= clustering_dir + '/unclustered';

        if(clusterSizes[cluster_id] >= 3)
        {
            if(!fs.existsSync(cluster_dir))
                fs.mkdirSync(cluster_dir);

            //fs.createReadStream(dir_out + face_uid + '.jpg').pipe(fs.createWriteStream(cluster_dir + '/' + face_uid + '.jpg'));
            var buff= fs.readFileSync(dir_out + face_uid + '.jpg');
            fs.writeFileSync(cluster_dir + '/' + face_uid + '.jpg', buff);
        }
        else
        {
            if(!fs.existsSync(unclustered_dir))
                fs.mkdirSync(unclustered_dir);

            //fs.createReadStream(dir_out + face_uid + '.jpg').pipe(fs.createWriteStream(unclustered_dir + '/' + face_uid + '.jpg'));
            var buff= fs.readFileSync(dir_out + face_uid + '.jpg');
            fs.writeFileSync(unclustered_dir + '/' + face_uid + '.jpg', buff);
        }
        done();
    }, done);

    /*_.range(clustering.medoids.length).forEach(
    function(i)
    {
        var face_uid= context.face_uids[clustering.medoids[i]],
            cluster_id= i,
            cluster_dir= clustering_dir + '/cluster_' + cluster_id;

        if(!fs.existsSync(cluster_dir))
            fs.mkdirSync(cluster_dir);

        fs.createReadStream(dir_out + face_uid + '.jpg').pipe(fs.createWriteStream(cluster_dir + '/' + face_uid + '_medoid.jpg'));
    });*/
    //done();
 },
 function(done)
 {
    // compute age and gender for each cluster
    var cluster_dirs= fs.readdirSync(context.clustering_dir);
    cluster_dirs= _.filter(cluster_dirs, function(file_name) { return file_name.indexOf('cluster_') == 0; });

    async.eachLimit(_.range(cluster_dirs.length), 1,
    function(i, done)
    {
        var cluster_dir= context.clustering_dir + '/' + cluster_dirs[i],
            face_files= fs.readdirSync(cluster_dir);
        face_files= _.filter(face_files, function(file_name) { return file_name.indexOf('.jpg') != -1; });

        var ages= [],
            maleAcc= 0, femaleAcc= 0;

        face_files.forEach(
        function(file_name)
        {
            var face_uid= file_name.substring(0, file_name.length-4),
                detection= JSON.parse(fs.readFileSync(context.clustering_dir + '/../' + face_uid + '.json').toString());

            ages.push(+detection.attributes.age_est.value);
            if(detection.attributes.gender.value == 'male') maleAcc+=  detection.attributes.gender.confidence;
            else femaleAcc+=  detection.attributes.gender.confidence;
        });

        var clusterAttributes= {};
        clusterAttributes.age= _.reduce(ages, function(memo, num) { return memo + num; }, 0) / ages.length;
        clusterAttributes.male= maleAcc / ages.length;
        clusterAttributes.female= femaleAcc / ages.length;
        clusterAttributes.gender= maleAcc > femaleAcc ? 'male' : 'female';

        fs.writeFileSync(cluster_dir + '/clusterAttributes.json', JSON.stringify(clusterAttributes));
        done();
    }, done);
 }],
 function(err)
 {
    console.log(err);
 });
	var async= require('async'),
	cv= require('opencv'),
	daoUtil= require('../dao/util'),
	skybio= require('../lib/skybio'),
	kairos= require('../lib/kairos'),
	fre= require('../lib/fre'),
	_= require('underscore'),
	stats= require('stats-lite'),
	fs = require('fs'),
	request = require('request'),
	uuid= require('node-uuid').v4,
	done= console.log,
	util= require('../lib/util');

	var s3= require('../lib/s3'),
	db= require('../lib/dynamo').db,
	pkBucket= s3('photokharma-files'),
	bucket= s3('photokharma-users');


	var user_id= 'af0a8a88-f560-45db-b8a2-a47c29b49624',
	//user_id= '4267ad25-48cc-4ad3-ad1f-2496171d4ed6',
	//dir_out= '/home/ec2-user/skybio_exp/' + user_id + '/',
	dir_out= '/Users/agr/Documents/sm14/photokharma/tmp/skybio/' + user_id + '/',
	namespace= 'clustering_5';

	var context= {};

	async.waterfall([
	function(done)
	{
	if(!fs.existsSync(dir_out))
	fs.mkdirSync(dir_out);
	done();
	},
	function(done)
	{
	return done();
	db.face.queryIndex(['user_id','face_id'], user_id,
	function (err, faces)
	{
	if (err) return done(err);

	context.pkFaces= faces;
	console.log('faces.length=' + faces.length);

	done();
	}, ['photo_id','face_id','hash','drop_reason','facebook','facebook_tag_date','facebook_tag_id','facebook_tag_name']);
	},
	function(done)
	{
	return done();
	async.eachLimit(_.range(context.pkFaces.length), 1,
	function(idx, done)
	{
	var face= context.pkFaces[idx];
	if(!face.drop_reason)
	{
	pkBucket.getBuffer(['face',face.hash+'.jpg'],
	function(err, buff)
	{
	if(err) return done(err);

	fs.writeFileSync(dir_out + face.hash + '.jpg', buff);
	fs.writeFileSync(dir_out + face.hash + '.json', JSON.stringify(face));
	done();
	});
	}
	else done();
	}, done);
	},
	function(done)
	{
	var fileNames= fs.readdirSync(dir_out);
	fileNames= _.filter(fileNames, function(file_name) { return file_name.indexOf('.json') != -1; });
	context.fileNames= fileNames;
	dir_out = dir_out + 'skybio/';
	if(!fs.existsSync(dir_out))
	fs.mkdirSync(dir_out);
	if(!fs.existsSync(dir_out+'responses/'))
	fs.mkdirSync(dir_out+'responses/');
	if(!fs.existsSync(dir_out+'low_confidence/'))
	fs.mkdirSync(dir_out+'low_confidence/');
	if(!fs.existsSync(dir_out+'too_small/'))
	fs.mkdirSync(dir_out+'too_small/');
	done();
	},
	function(done)
	{
	return done();
	// detect and add faces
	async.eachLimit(_.range(context.fileNames.length), 1,
	function(idx, done)
	{
	var dbface= JSON.parse(fs.readFileSync(dir_out + '../' + context.fileNames[idx]).toString());
	url= util.faceUrl(dbface);
	console.log(url);

	if(fs.existsSync(dir_out + 'responses/' + dbface.hash + '.json'))
	{
	console.log('Skipping.');
	return done();
	}

	var buff, im;

	skybio.facesRecognize(namespace, ['all'], [url], 10000, true,
	function(err,res)
	{
	if(err)
	{
	//return done();
	res= { photos: [{ tags: [] }] };
	}

	fs.writeFileSync(dir_out + 'responses/' + dbface.hash + '.json', JSON.stringify(res));

	var detections= res.photos[0].tags;
	console.log('detections.length=' + detections.length);

	async.eachLimit(detections, 1,
	function(detection, done)
	{
	detection.url= res.photos[0].url;

	async.waterfall([
	function(done)
	{
	if(!buff)
	{
	request.get({ url: url, encoding: null },
	function (err, res, body)
	{
	if (err) return done(err);

	buff= body;
	cv.readImage(buff,
	function(err, _im)
	{
	if (err) return done(err);

	im= _im;
	done();
	});
	});
	}
	else done();
	},
	function(done)
	{
	var detection_box= { x: detection.center.x - detection.width/2,
	y: detection.center.y - detection.height/2,
	w: detection.width,
	h: detection.height };
	var cd= fre.cropDetection(im, detection_box, 0.6),
	crop= cd.crop;
	crop.buff= crop.toBuffer();

	fs.writeFileSync(dir_out + detection.tid + '.jpg', crop.buff);
	fs.writeFileSync(dir_out + detection.tid + '.json', JSON.stringify(detection));

	var matches= detection.uids ? detection.uids : [];
	console.log('matches.length=' + matches.length);

	matches= _.map(matches,
	function(_match)
	{
	var face_uid= _match.uid.substring(0, _match.uid.indexOf('@')),
	match= { face_uid: face_uid, confidence: _match.confidence };
	return match;
	});

	fs.writeFileSync(dir_out + detection.tid + '_matches.json', JSON.stringify(matches));
	done();
	}], done);
	}, done);
	});
	}, done);
	},
	function(done)
	{
	console.log('Generating similarity matrix...');
	var detection_uids= fs.readdirSync(dir_out);
	detection_uids= _.filter(detection_uids, function(file_name) { return file_name.indexOf('.jpg') != -1; });

	var face_uids= [],
	face_attributes= [],
	uidToIndexMap= {};

	async.eachLimit(_.range(detection_uids.length), 1,
	function(i, done)
	{
	var detection_uid= detection_uids[i];
	detection_uid= detection_uid.substring(0, detection_uid.indexOf('.jpg'));

	var detection= JSON.parse(fs.readFileSync(dir_out + detection_uid + '.json').toString());

	if(detection.attributes.face.confidence > 50)
	{
	var buff= fs.readFileSync(dir_out + detection_uid + '.jpg');
	cv.readImage(buff,
	function(err, _im)
	{
	if (err) return done(err);

	var size= _im.size(), h= size[0], w= size[1];

	if(Math.min(w,h) > 125)
	{
	face_uids.push(detection_uid);
	face_attributes.push(detection.attributes);
	uidToIndexMap[detection_uid]= face_uids.length - 1;
	}
	else
	{
	fs.createReadStream(dir_out + detection_uid + '.jpg').pipe(fs.createWriteStream(dir_out + 'too_small/' + detection_uid + '.jpg'));
	}
	done();
	});
	}
	else
	{
	fs.createReadStream(dir_out + detection_uid + '.jpg').pipe(fs.createWriteStream(dir_out + 'low_confidence/' + detection_uid + '.jpg'));
	done();
	}
	},
	function(err)
	{
	if(err) return done(err);

	context.face_uids= face_uids;
	context.face_attributes= face_attributes;
	context.uidToIndexMap= uidToIndexMap;
	done();
	});
	},
	function(done)
	{
	var face_uids= context.face_uids,
	face_attributes= context.face_attributes,
	uidToIndexMap= context.uidToIndexMap;

	console.log('D');
	var npoints = face_uids.length,
	D = [];

	_.range(npoints-1).forEach(
	function(i)
	{
	D[i]= _.map(_.range(npoints-i-1), function(j) { return 0; });
	});

	console.log('D matches');
	async.eachLimit(_.range(face_uids.length), 1,
	function(i, done)
	{
	var face_uid= face_uids[i],
	index1= uidToIndexMap[face_uid];

	//var matches= JSON.parse(fs.readFileSync(dir_out + 'matches/' + face_uid + '.json').toString());

	var detection= JSON.parse(fs.readFileSync(dir_out + face_uid + '.json').toString()),
	matches= detection.uids ? detection.uids : [];

	matches= _.map(matches,
	function(_match)
	{
	var face_uid= _match.uid.substring(0, _match.uid.indexOf('@')),
	match= { face_uid: face_uid, confidence: _match.confidence };
	return match;
	});

	matches.forEach(
	function(match)
	{
	var index2= uidToIndexMap[match.face_uid];

	if(index2 && index1 != index2)
	{
	var minIndex= Math.min(index1, index2),
	maxIndex= Math.max(index1, index2);

	//if(D[minIndex][maxIndex - minIndex - 1] != 0)
	// console.log(D[minIndex][maxIndex - minIndex - 1] + ' <= ' + match.confidence);

	D[minIndex][maxIndex - minIndex - 1]= match.confidence;
	}
	});

	done();
	},
	function(err)
	{
	if(err) return done(err);

	console.log('D done.');
	context.D= D;
	done();
	});
	},
	function(done)
	{
	var face_uid= '001d0021_0060d7e5a738e';

	fre.printMatchScores(context.D, context.face_uids, context.uidToIndexMap[face_uid]);
	done();
	},
	function(done)
	{
	var params = { minMergeSimilarity1: 60, minMergeSimilarity2: 100, N: 10 };
	params.face_uids= context.face_uids; // for debugging only
	params.uidToIndexMap= context.uidToIndexMap; // for debugging only

	var clustering_0_fileName= dir_out + 'clustering_' + 68 + '_' + 100 + '_sz/clustering.json';
	//if(fs.existsSync(clustering_0_fileName)) params.clustering_0 = JSON.parse(fs.readFileSync(clustering_0_fileName).toString());

	context.params= params;

	//context.D.forEach(function(row) { console.log(row.join(', ')); });
	console.time('Clustering.');
	var clustering = fre.clusterLibrary(context.D, params);
	console.timeEnd('Clustering.');

	var clustering_dir= dir_out + 'clustering_' + params.minMergeSimilarity1 + '_' + params.minMergeSimilarity2 + '_sz';
	if(!fs.existsSync(clustering_dir))
	fs.mkdirSync(clustering_dir);

	fs.writeFileSync(clustering_dir + '/clustering.json', JSON.stringify(clustering));
	done();
	},
	function(done)
	{
	var params = context.params,
	clustering_dir= dir_out + 'clustering_' + params.minMergeSimilarity1 + '_' + params.minMergeSimilarity2 + '_sz';

	var clustering= JSON.parse(fs.readFileSync(clustering_dir + '/clustering.json').toString());
	context.clustering_dir= clustering_dir;

	console.log('_.unique(clustering.clusters).length=' + _.unique(clustering.clusters).length);
	console.log('clustering.medoids.length=' + clustering.medoids.length);
	console.log(clustering.clusters);
	console.log(clustering.medoids);

	// get cluster sizes
	var clusterSizes= {};
	_.range(clustering.clusters.length).forEach(
	function(i)
	{
	var cluster_id= clustering.clusters[i];
	if(clusterSizes[cluster_id])
	clusterSizes[cluster_id]= clusterSizes[cluster_id] + 1;
	else
	clusterSizes[cluster_id]= 1;
	});

	console.log('copying files...');
	//_.range(clustering.clusters.length).forEach(
	//function(i)
	async.eachLimit(_.range(clustering.clusters.length), 10,
	function(i, done)
	{
	var face_uid= context.face_uids[i],
	cluster_id= clustering.clusters[i],
	cluster_dir= clustering_dir + '/cluster_' + cluster_id,
	unclustered_dir= clustering_dir + '/unclustered';

	if(clusterSizes[cluster_id] >= 3)
	{
	if(!fs.existsSync(cluster_dir))
	fs.mkdirSync(cluster_dir);

	//fs.createReadStream(dir_out + face_uid + '.jpg').pipe(fs.createWriteStream(cluster_dir + '/' + face_uid + '.jpg'));
	var buff= fs.readFileSync(dir_out + face_uid + '.jpg');
	fs.writeFileSync(cluster_dir + '/' + face_uid + '.jpg', buff);
	}
	else
	{
	if(!fs.existsSync(unclustered_dir))
	fs.mkdirSync(unclustered_dir);

	//fs.createReadStream(dir_out + face_uid + '.jpg').pipe(fs.createWriteStream(unclustered_dir + '/' + face_uid + '.jpg'));
	var buff= fs.readFileSync(dir_out + face_uid + '.jpg');
	fs.writeFileSync(unclustered_dir + '/' + face_uid + '.jpg', buff);
	}
	done();
	}, done);

	/*_.range(clustering.medoids.length).forEach(
	function(i)
	{
	var face_uid= context.face_uids[clustering.medoids[i]],
	cluster_id= i,
	cluster_dir= clustering_dir + '/cluster_' + cluster_id;

	if(!fs.existsSync(cluster_dir))
	fs.mkdirSync(cluster_dir);

	fs.createReadStream(dir_out + face_uid + '.jpg').pipe(fs.createWriteStream(cluster_dir + '/' + face_uid + '_medoid.jpg'));
	});*/
	//done();
	},
	function(done)
	{
	// compute age and gender for each cluster
	var cluster_dirs= fs.readdirSync(context.clustering_dir);
	cluster_dirs= _.filter(cluster_dirs, function(file_name) { return file_name.indexOf('cluster_') == 0; });

	async.eachLimit(_.range(cluster_dirs.length), 1,
	function(i, done)
	{
	var cluster_dir= context.clustering_dir + '/' + cluster_dirs[i],
	face_files= fs.readdirSync(cluster_dir);
	face_files= _.filter(face_files, function(file_name) { return file_name.indexOf('.jpg') != -1; });

	var ages= [],
	maleAcc= 0, femaleAcc= 0;

	face_files.forEach(
	function(file_name)
	{
	var face_uid= file_name.substring(0, file_name.length-4),
	detection= JSON.parse(fs.readFileSync(context.clustering_dir + '/../' + face_uid + '.json').toString());

	ages.push(+detection.attributes.age_est.value);
	if(detection.attributes.gender.value == 'male') maleAcc+= detection.attributes.gender.confidence;
	else femaleAcc+= detection.attributes.gender.confidence;
	});

	var clusterAttributes= {};
	clusterAttributes.age= _.reduce(ages, function(memo, num) { return memo + num; }, 0) / ages.length;
	clusterAttributes.male= maleAcc / ages.length;
	clusterAttributes.female= femaleAcc / ages.length;
	clusterAttributes.gender= maleAcc > femaleAcc ? 'male' : 'female';

	fs.writeFileSync(cluster_dir + '/clusterAttributes.json', JSON.stringify(clusterAttributes));
	done();
	}, done);
	}],
	function(err)
	{
	console.log(err);
	});