Skip to content

Instantly share code, notes, and snippets.

View shreyaskarnik's full-sized avatar
🤖

Shreyas Karnik shreyaskarnik

🤖
View GitHub Profile
@shreyaskarnik
shreyaskarnik / example-highlight-extract.py
Created March 5, 2012 23:51
Python Code to Extract Highlighted Text from DOCX (Word 2007 and Up format)
#!usr/bin/python
# -*- coding: utf-8 -*-
from docx import *
document = opendocx(r'test.docx')
words = document.xpath('//w:r', namespaces=document.nsmap)
WPML_URI = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}"
tag_rPr = WPML_URI + 'rPr'
tag_highlight = WPML_URI + 'highlight'
tag_val = WPML_URI + 'val'
tag_t = WPML_URI + 't'
@shreyaskarnik
shreyaskarnik / llda-infer.scala
Created August 3, 2011 14:38
Infer from Labeled LDA Model
// http://nlp.stanford.edu/software/tmt/0.3/
// tells Scala where to find the TMT classes
import scalanlp.io._;
import scalanlp.stage._;
import scalanlp.stage.text._;
import scalanlp.text.tokenize._;
import scalanlp.pipes.Pipes.global._;
import edu.stanford.nlp.tmt.stage._;
@shreyaskarnik
shreyaskarnik / llda-learn.scala
Created August 3, 2011 14:36
Learning Labeled LDA Model using Stanford Topic Modeling Toolbox
// Stanford TMT Example 6 - Training a LabeledLDA model
// http://nlp.stanford.edu/software/tmt/0.3/
// tells Scala where to find the TMT classes
import scalanlp.io._;
import scalanlp.stage._;
import scalanlp.stage.text._;
import scalanlp.text.tokenize._;
import scalanlp.pipes.Pipes.global._;
#This is my own interpatation of USA.gov PubSub feed with some tips and code from HarlanH from twitter.
#I am interested finding out links about which agency are shared from which part of US.
library(stringr)
library(plyr)
library(ggplot2)
library(scrapeR)
library(RJSONIO)
library(colorspace)
library(RColorBrewer)
library(maps)