Skip to content

Instantly share code, notes, and snippets.

# Load required packages
library(dplyr)
library(reshape2)
# Load data
setwd('~/AeroFS/Googlestuff/chris/')
load("longform.rdata")
data$month <- as.Date(data$month)
# Get a random sample of word-country combinations to check against google trends
import sys
import re
f = open('~/Documents/spillovers/tech_proximity2/orbis_patents_titles.csv').read()
patents = {}
rows = f.split('\n')[1:]
numbers = [j.split(',')[0] for j in rows]
titles = [j.split(',')[1:] for j in rows]
for i in range(0,len(numbers)):
patents[numbers[i]] = titles[i]
for i in len(titles)
import sys
f = open('/Users/cigrainger/Documents/spillovers/tech_proximity2/orbis_patents_titles.csv','r')
f2 = open('/Users/cigrainger/Documents/spillovers/tech_proximity2/patenttitles.csv','w')
patents = {}
for line in f:
y = line.split(',',1)
if len(y)==2:
c = y[1].replace(',','')
f2.write(y[0]+','c+'\n')
f.close()
import sys
f = open('/Users/cigrainger/Documents/spillovers/tech_proximity2/orbis_patents_titles.csv','r')
f2 = open('/Users/cigrainger/Documents/spillovers/tech_proximity2/patenttitles.csv','w')
f2.truncate()
for line in f:
y = line.split(',',1)
if len(y)==2:
c = y[1].replace(',','')
f2.write(y[0]+','+c)
f.close()
import sys
f = open('/Users/cigrainger/Documents/spillovers/tech_proximity2/orbis_patents_titles.csv','r')
f2 = open('/Users/cigrainger/Documents/spillovers/tech_proximity2/patenttitles.csv','w')
f2.truncate()
for line in f:
y = line.split(',',1)
if len(y)==2:
c = y[1].replace(',','')
f2.write(y[0]+','+c)
f.close()
import textmining
import re
f = open('patentstitles.csv','r')
def cleantext(x):
y = []
for line in x:
y.append(line.split(',',1)[0])
z = []
library(tm)
library(textcat)
library(dplyr)
library(topicmodels)
load('patents_titles.rdata')
# write.table(patents_titles,file='patentstitles.csv')
# patents_titles <- patents_titles[sample(1:nrow(patents_titles),100000,replace=FALSE),]
patents_titles$appln_title <- tolower(patents_titles$appln_title)
patents_titles$appln_title <- gsub("[^[:alnum:] ]", "",patents_titles$appln_title)
patents_titles$language <- textcat(patents_titles$appln_title)
f = open('C:\Users\graingec\spillovers\patentstitles.csv','r').read()
rows = f.split('\n')
titles = []
id = []
for i in rows:
if len(i.split('"')) == 9:
titles.append(i.split('"')[7])
id.append(i.split('"')[4])
for i in range(0,len(id)):
id[i] = id[i].replace("\\","")
import sys
import nltk
f = open('students.csv').read()
while f.split('\n')[0] != 'name,email':
morecsv = raw_input("This does not look like the correct file. Would you like to see more? y/n?")
while morecsv != 'y' and 'n':
morecsv = raw_input("The prompt was y/n only. Would you like to see more? y/n?")
if morecsv == 'y':
print f.split('\n')[:10]
newcsv = raw_input("Would you like to overwrite this file to create the student list? y/n?")
import sys
f = open('C:\Users\graingec\spillovers\patentstitles.csv','r').read()
rows = f.split('\n')
titles = []
id = []
for i in rows:
if len(i.split('"')) == 9:
titles.append(i.split('"')[7])
id.append(i.split('"')[4])
for i in range(0,len(id)):