Created
December 5, 2013 22:50
-
-
Save divmain/7815457 to your computer and use it in GitHub Desktop.
new visualization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//http://api.nytimes.com/svc/search/v2/articlesearch.json?q=israel+iran&fq=source:("The New York Times")&api-key=f25c99da2f24daefca165f7a452d05ec:1:35029882 | |
var pagesOfStoriesToRequest = 9; | |
var requestsPromises = []; | |
var keywordsArray = []; | |
var uniqueKeywordsArray; | |
var templateVectorMap = {}; | |
var featureVectorsRaw = []; | |
var coordinates = []; //array of arrays for d3 to scatterplot... | |
$(document).ready(function(){ | |
$('#timesApiSearch').submit(function(e){ | |
event.preventDefault(); | |
console.log('ready') | |
var rawUserInput = $('#timesApiSearchInput').val() | |
$('#timesApiSearchInput').val('') | |
var whiteSpaceToPlus = rawUserInput.replace(/ /g, '+'); | |
makeAPIcall(whiteSpaceToPlus) | |
}) | |
}) | |
function makeAPIcall(searchKeys){ | |
for (ii=0; ii < pagesOfStoriesToRequest; ii++) { | |
requestsPromises.push( | |
$.ajax({ | |
// url: "http://api.nytimes.com/svc/search/v2/articlesearch.json?q="+searchKeys+"&fq=source:(%22The%20New%20York%20Times%22)&page="+ii+"&api-key=ebd81f171d792a60638e4dfa1eaec121:7:68519429" | |
url: "http://api.nytimes.com/svc/search/v2/articlesearch.json?q="+searchKeys+"&begin_date=20030101&fq=source:(%22The%20New%20York%20Times%22)&page="+ii+"&api-key=ebd81f171d792a60638e4dfa1eaec121:7:68519429" | |
}) | |
) | |
} | |
$.when.apply($, requestsPromises).then(function() { | |
var arrayOfResponseObjects = [] | |
_.each(arguments, function(arg){ | |
arrayOfResponseObjects.push(arg[0].response.docs) | |
}); | |
var nyt = _.flatten(arrayOfResponseObjects) | |
initializeNeuralNetwork(nyt) | |
}) | |
} | |
function addToMasterKeywordsArray (doc) { | |
doc.keywords.forEach(function(keyword){ | |
keywordsArray.push(keyword.value) | |
}) | |
} | |
function createTemplateVectorMap () { | |
_.each(uniqueKeywordsArray, function(keyword, indexposition){ | |
templateVectorMap[keyword] = indexposition; | |
}) | |
} | |
function vectorizeStory (doc) { | |
var vector = [] //we push arrays onto the trainingData array | |
_.each(templateVectorMap, function(){ | |
vector.push(0) | |
}) //push a zero onto vector for each key | |
_.each(doc.keywords, function(keywordObj){ | |
var indexPos = templateVectorMap[keywordObj.value] | |
vector[indexPos] = 1; | |
}) //get the position in templateVectorMap and set that position in the vector to 1 | |
featureVectorsRaw.push(vector) | |
} | |
function processDocs (data) { | |
//let's see what we get back... | |
console.log('- - - - - - - - - - processing response data - - - - - - - - - - ') | |
console.dir(data) | |
window.allResponses = data; | |
//for each times story we get back... add each story's keywords to the master array | |
data.forEach(addToMasterKeywordsArray) | |
console.log('the raw master keyword list now has ' + keywordsArray.length + ' elements in it.') | |
//sort and unique, faster algo if sorted and we are sorting it, so pass true | |
//produce template vector | |
uniqueKeywordsArray = _.uniq(keywordsArray.sort(), true) | |
console.log('the uniqd master keyword list now has ' + uniqueKeywordsArray.length + ' elements in it.') | |
createTemplateVectorMap(); | |
console.log('- - - - - - - - - - index position of keywords map - - - - - - - - - - ') | |
console.dir(templateVectorMap) | |
//turn keyword list into vector ['iran', 'israel'] => [0, 1] etc. | |
data.forEach(vectorizeStory) | |
//transmute arrays into [{input: array, output: array}] | |
var finalTrainingData = _.map(featureVectorsRaw, function(vector){ | |
return {input: vector, output: vector} // because it's an autoencoder, input and output are the same. | |
}) // we are instead interested in the hidden layer | |
return finalTrainingData; | |
} | |
function initializeNeuralNetwork (data) { | |
var nytimes = processDocs(data) | |
window.neuralNetwork = new brain.NeuralNetwork({ | |
hiddenLayers: [2] | |
}) | |
console.log('- - - - - - - - - - neural network - - - - - - - - - -') | |
console.dir(neuralNetwork) | |
console.log('- - - - - - - - - - input === output autoencoder feature vectors - - - - - - - - - -') | |
console.dir(nytimes) | |
console.log('- - - - - - - - - - commencing training - - - - - - - -') | |
neuralNetwork.train(nytimes, { | |
errorThresh: 0.004, | |
learningRate: 0.3, | |
iterations: 4001, | |
log: true, | |
logPeriod: 1000 | |
}); | |
console.log('- - - - - - - - - - training complete, running real data - - - - - - - - - - -') | |
var runDataSigmoid = [] | |
var runDataLinear = window.dataset = [] | |
_.each(featureVectorsRaw, function(storyAsVector, i){ | |
run = neuralNetwork.run(storyAsVector) | |
runDataSigmoid.push(neuralNetwork.outputs[1].slice(0)) // this line... ask colin. | |
}) | |
_.each(featureVectorsRaw, function(storyAsVector, i){ | |
runLinear = neuralNetwork.runLinear(storyAsVector) | |
runDataLinear.push(neuralNetwork.outputs[1].slice(0)) // this line... ask colin. | |
}) | |
console.log('The run was successful. Here are the values of the hidden layer for each run: ') | |
console.dir(runDataSigmoid) | |
console.dir(runDataLinear) | |
console.log('- - - - - - - - - - visualizing... - - - - - - - - - - -') | |
visualization(runDataLinear); | |
} | |
//DONE sort keywords array | |
//DONE uniq sorted array | |
//DONE de facto at which position each keyword belongs... index 50 is 'iran'... each time take in a story... | |
//DONE when want to make [1,0]... make an empty vector full of zeros... make a map out of it too... | |
//DONE make an object that would be a map... keywords[currentWord] = indexposition | |
//DONE index of 1s and 0s... | |
//DONE when training... pass in... array of vectors that am creating... training case is the classifcation value | |
// when classifcation ... ... vector is the target vector... array of those | |
// transform vectors into format that they want - just so happens that the input and output are the same | |
// after i've trained it, go through them one at a time and check the two nodes of the hidden layer. | |
// that's the x y position! | |
function median(values) { | |
var newValues = values.slice(); | |
newValues.sort( function(a,b) {return a - b;} ); | |
var half = Math.floor(newValues.length/2); | |
if(newValues.length % 2) | |
return newValues[half]; | |
else | |
return (newValues[half-1] + newValues[half]) / 2.0; | |
} | |
function visualization (dataset){ | |
//define width and height | |
var w = 600; | |
var h = 600; | |
var times = []; | |
_.each(allResponses, function(d, i) { | |
times[i] = (new Date(allResponses[i].pub_date)).getTime(); | |
}); | |
var medianTime = median(times); | |
var xScale = d3.scale.linear() | |
.domain([d3.min(dataset, function(d){ return d[0];}), d3.max(dataset, function(d){ return d[0]; })]) | |
.range([40, w-40]); | |
var yScale = d3.scale.linear() | |
.domain([d3.min(dataset, function(d){ return d[1];}), d3.max(dataset, function(d){ return d[1];})]) | |
.range([40, h-40]); | |
// var colorScale = d3.scale.sqrt() | |
var colorScale = d3.scale.pow().exponent(.3) | |
.domain([ | |
d3.min(dataset, function(d, i) { return times[i] - medianTime; }), | |
d3.max(dataset, function(d, i) { return times[i] - medianTime; }) | |
]) | |
.range([0, 255]); | |
var tip = d3.tip() | |
.attr('class', 'd3-tip') | |
.html(function(d, i) { return allResponses[i].headline.main; }) | |
//First, we need to create the SVG element in which to place all our shapes: | |
var svg = d3.select("body") | |
.append("svg") | |
.attr({ | |
"width": w, | |
"height": h | |
}) | |
.call(tip) | |
svg.selectAll("circle") | |
.data(dataset) | |
.enter() | |
.append("circle") | |
.attr({ | |
cx: function(d,i){ return xScale(d[0]) }, | |
cy: function(d,i){ return yScale(d[1]) }, | |
// fill: "black", | |
stroke: "#2980b9", | |
fill: function(d, i){ | |
var dateInt = times[i] - medianTime; | |
var value = colorScale(dateInt); | |
var color ="rgb(" + Math.floor(value) + ", 0, " + Math.floor(255-value) + ")"; | |
console.log(i, dateInt, color); | |
return color; | |
}, | |
// fill: function(d,i) { | |
// value = "rgb(" + (i*2) + ", 0 , " + (250-(i*2)) + ")"; | |
// return value; | |
// }, | |
/* | |
a few steps needed here. we can still use the index of d, i in the lambda. that is good. | |
1. sort the article indexes by date using their boolean | |
2. create a map of these new indexes, as in, var dateIndexForColors = { 37: 0, 42: 1, 14: 2 } | |
3. check | |
function mergeSort(array) { | |
// Recursion base case | |
if(array.length < 2) | |
return array; | |
// Split array into two equal sized chunks | |
var mid = Math.floor(array.length / 2), | |
left = array.slice(0, mid), | |
right = array.slice(mid); | |
// Sort each chunk using merge sort | |
var leftSorted = mergeSort(left), | |
rightSorted = mergeSort(right); | |
// Combine the chucks back into a single array and return it | |
var sortedResult = []; | |
while(leftSorted.length > 0 || rightSorted.length > 0) { | |
if(leftSorted.length == 0) { | |
Array.prototype.splice.apply(sortedResult, [sortedResult.length, 0].concat(rightSorted)); | |
break; | |
} else if(rightSorted.length == 0) { | |
console.log(sortedResult); | |
Array.prototype.splice.apply(sortedResult, [sortedResult.length, 0].concat(leftSorted)); | |
break; | |
} else { | |
var elem = (leftSorted[0] < rightSorted[0]) ? leftSorted.shift() : rightSorted.shift(); | |
sortedResult.push(elem); | |
} | |
} | |
return sortedResult; | |
} | |
*/ | |
r: 4, | |
}) | |
.on('mouseover', tip.show) | |
.on('mouseout', tip.hide) | |
.on('click', function(d,i){ window.open(allResponses[i].web_url) }) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment