new visualization
//"The New York Times")&api-key=f25c99da2f24daefca165f7a452d05ec:1:35029882
var pagesOfStoriesToRequest = 9;
var requestsPromises = [];
var keywordsArray = [];
var uniqueKeywordsArray;
var templateVectorMap = {};
var featureVectorsRaw = [];
var coordinates = []; //array of arrays for d3 to scatterplot...
var rawUserInput = $('#timesApiSearchInput').val()
var whiteSpaceToPlus = rawUserInput.replace(/ /g, '+');
function makeAPIcall(searchKeys){
for (ii=0; ii < pagesOfStoriesToRequest; ii++) {
// url: ""+searchKeys+"&fq=source:(%22The%20New%20York%20Times%22)&page="+ii+"&api-key=ebd81f171d792a60638e4dfa1eaec121:7:68519429"
url: ""+searchKeys+"&begin_date=20030101&fq=source:(%22The%20New%20York%20Times%22)&page="+ii+"&api-key=ebd81f171d792a60638e4dfa1eaec121:7:68519429"
$.when.apply($, requestsPromises).then(function() {
var arrayOfResponseObjects = []
_.each(arguments, function(arg){
var nyt = _.flatten(arrayOfResponseObjects)
function addToMasterKeywordsArray (doc) {
function createTemplateVectorMap () {
_.each(uniqueKeywordsArray, function(keyword, indexposition){
templateVectorMap[keyword] = indexposition;
function vectorizeStory (doc) {
var vector = [] //we push arrays onto the trainingData array
_.each(templateVectorMap, function(){
}) //push a zero onto vector for each key
_.each(doc.keywords, function(keywordObj){
var indexPos = templateVectorMap[keywordObj.value]
vector[indexPos] = 1;
}) //get the position in templateVectorMap and set that position in the vector to 1
function processDocs (data) {
//let's see what we get back...
console.log('- - - - - - - - - - processing response data - - - - - - - - - - ')
window.allResponses = data;
//for each times story we get back... add each story's keywords to the master array
console.log('the raw master keyword list now has ' + keywordsArray.length + ' elements in it.')
//sort and unique, faster algo if sorted and we are sorting it, so pass true
//produce template vector
uniqueKeywordsArray = _.uniq(keywordsArray.sort(), true)
console.log('the uniqd master keyword list now has ' + uniqueKeywordsArray.length + ' elements in it.')
console.log('- - - - - - - - - - index position of keywords map - - - - - - - - - - ')
//turn keyword list into vector ['iran', 'israel'] => [0, 1] etc.
//transmute arrays into [{input: array, output: array}]
var finalTrainingData =, function(vector){
return {input: vector, output: vector} // because it's an autoencoder, input and output are the same.
}) // we are instead interested in the hidden layer
return finalTrainingData;
function initializeNeuralNetwork (data) {
var nytimes = processDocs(data)
window.neuralNetwork = new brain.NeuralNetwork({
hiddenLayers: [2]
console.log('- - - - - - - - - - neural network - - - - - - - - - -')
console.log('- - - - - - - - - - input === output autoencoder feature vectors - - - - - - - - - -')
console.log('- - - - - - - - - - commencing training - - - - - - - -')
neuralNetwork.train(nytimes, {
errorThresh: 0.004,
learningRate: 0.3,
iterations: 4001,
log: true,
logPeriod: 1000
console.log('- - - - - - - - - - training complete, running real data - - - - - - - - - - -')
var runDataSigmoid = []
var runDataLinear = window.dataset = []
_.each(featureVectorsRaw, function(storyAsVector, i){
run =
runDataSigmoid.push(neuralNetwork.outputs[1].slice(0)) // this line... ask colin.
_.each(featureVectorsRaw, function(storyAsVector, i){
runLinear = neuralNetwork.runLinear(storyAsVector)
runDataLinear.push(neuralNetwork.outputs[1].slice(0)) // this line... ask colin.
console.log('The run was successful. Here are the values of the hidden layer for each run: ')
console.log('- - - - - - - - - - visualizing... - - - - - - - - - - -')
//DONE sort keywords array
//DONE uniq sorted array
//DONE de facto at which position each keyword belongs... index 50 is 'iran'... each time take in a story...
//DONE when want to make [1,0]... make an empty vector full of zeros... make a map out of it too...
//DONE make an object that would be a map... keywords[currentWord] = indexposition
//DONE index of 1s and 0s...
//DONE when training... pass in... array of vectors that am creating... training case is the classifcation value
// when classifcation ... ... vector is the target vector... array of those
// transform vectors into format that they want - just so happens that the input and output are the same
// after i've trained it, go through them one at a time and check the two nodes of the hidden layer.
// that's the x y position!
function median(values) {
var newValues = values.slice();
newValues.sort( function(a,b) {return a - b;} );
var half = Math.floor(newValues.length/2);
if(newValues.length % 2)
return newValues[half];
return (newValues[half-1] + newValues[half]) / 2.0;
function visualization (dataset){
//define width and height
var w = 600;
var h = 600;
var times = [];
_.each(allResponses, function(d, i) {
times[i] = (new Date(allResponses[i].pub_date)).getTime();
var medianTime = median(times);
var xScale = d3.scale.linear()
.domain([d3.min(dataset, function(d){ return d[0];}), d3.max(dataset, function(d){ return d[0]; })])
.range([40, w-40]);
var yScale = d3.scale.linear()
.domain([d3.min(dataset, function(d){ return d[1];}), d3.max(dataset, function(d){ return d[1];})])
.range([40, h-40]);
// var colorScale = d3.scale.sqrt()
var colorScale = d3.scale.pow().exponent(.3)
d3.min(dataset, function(d, i) { return times[i] - medianTime; }),
d3.max(dataset, function(d, i) { return times[i] - medianTime; })
.range([0, 255]);
var tip = d3.tip()
.attr('class', 'd3-tip')
.html(function(d, i) { return allResponses[i].headline.main; })
//First, we need to create the SVG element in which to place all our shapes:
var svg ="body")
"width": w,
"height": h
cx: function(d,i){ return xScale(d[0]) },
cy: function(d,i){ return yScale(d[1]) },
// fill: "black",
stroke: "#2980b9",
fill: function(d, i){
var dateInt = times[i] - medianTime;
var value = colorScale(dateInt);
var color ="rgb(" + Math.floor(value) + ", 0, " + Math.floor(255-value) + ")";
console.log(i, dateInt, color);
return color;
// fill: function(d,i) {
// value = "rgb(" + (i*2) + ", 0 , " + (250-(i*2)) + ")";
// return value;
// },
a few steps needed here. we can still use the index of d, i in the lambda. that is good.
1. sort the article indexes by date using their boolean
2. create a map of these new indexes, as in, var dateIndexForColors = { 37: 0, 42: 1, 14: 2 }
3. check
function mergeSort(array) {
// Recursion base case
if(array.length < 2)
return array;
// Split array into two equal sized chunks
var mid = Math.floor(array.length / 2),
left = array.slice(0, mid),
right = array.slice(mid);
// Sort each chunk using merge sort
var leftSorted = mergeSort(left),
rightSorted = mergeSort(right);
// Combine the chucks back into a single array and return it
var sortedResult = [];
while(leftSorted.length > 0 || rightSorted.length > 0) {
if(leftSorted.length == 0) {
Array.prototype.splice.apply(sortedResult, [sortedResult.length, 0].concat(rightSorted));
} else if(rightSorted.length == 0) {
Array.prototype.splice.apply(sortedResult, [sortedResult.length, 0].concat(leftSorted));
} else {
var elem = (leftSorted[0] < rightSorted[0]) ? leftSorted.shift() : rightSorted.shift();
return sortedResult;
r: 4,
.on('mouseout', tip.hide)
.on('click', function(d,i){[i].web_url) })
