sAbakumoff · September 27, 2016 07:46
diff --git a/datalab.sql b/datalab.sql
 %%bigquery udf --module extract_tokens

 /**
 * @param {{tweet_object: string, polarity: float, magnitude: float, syntax:string }} r
 * @param function({{type: string, content:string}}) emitFn
 */
 function(r, emitFn) {
  try{
    var tweet = JSON.parse(r.tweet_object);
    var tokens = JSON.parse(r.syntax);
    
    tokens.forEach(function(token){
      if(token.content !== "&amp" && token.content!== "#")
        emitFn({type : token.partOfSpeech, content : token.content.toLowerCase()})
    })
  }
  catch(e){
  }  
 }

 %%sql --module words

 part_of_speech="NOUN"
 total = 10;

 SELECT content, count(*) as count FROM extract_tokens(SELECT * FROM [in-full-gear:Dataset1.debates_tweets] 
 WHERE NOT REGEXP_MATCH(tweet_object, "GoogleJsonResponseException"))
 WHERE type=$part_of_speech
 GROUp BY content
 ORDER BY count desc
 LIMIT $total

 import datalab.bigquery as bq
 nouns =  bq.Query(words, udf=extract_tokens, part_of_speech="NOUN", total=12);

 %%chart bars --data nouns
 title: Most common nouns used in tweets about the debate
 height: 600
 width: 900
	%%bigquery udf --module extract_tokens

	/**
	* @param {{tweet_object: string, polarity: float, magnitude: float, syntax:string }} r
	* @param function({{type: string, content:string}}) emitFn
	*/
	function(r, emitFn) {
	try{
	var tweet = JSON.parse(r.tweet_object);
	var tokens = JSON.parse(r.syntax);

	tokens.forEach(function(token){
	if(token.content !== "&amp" && token.content!== "#")
	emitFn({type : token.partOfSpeech, content : token.content.toLowerCase()})
	})
	}
	catch(e){
	}
	}

	%%sql --module words

	part_of_speech="NOUN"
	total = 10;

	SELECT content, count() as count FROM extract_tokens(SELECT FROM [in-full-gear:Dataset1.debates_tweets]
	WHERE NOT REGEXP_MATCH(tweet_object, "GoogleJsonResponseException"))
	WHERE type=$part_of_speech
	GROUp BY content
	ORDER BY count desc
	LIMIT $total

	import datalab.bigquery as bq
	nouns = bq.Query(words, udf=extract_tokens, part_of_speech="NOUN", total=12);

	%%chart bars --data nouns
	title: Most common nouns used in tweets about the debate
	height: 600
	width: 900