cigrainger’s gists

cigrainger / gist:9681359

Created March 21, 2014 07:32

	sales <- # (this is where we need data, but it's easy to find)
	cumsales <- cumsum(sales)
	time <- seq_along(sales)
	tdelt <- (1:100)/length(time)
	Bass <- sales ~ m * (((p+q)^2/p)exp(-(p+q)time))/(1+(q/p)exp(-(p+q)time))^2)
	Bass.nls <- nls(formula = Bass, start=c(p=0.03,q=0.04,m=max(S)))

	Bcoef <- coef(Bass.nls)
	m <- Bcoef[1]
	p <- Bcoef[2]

cigrainger / gist:9737671

Created March 24, 2014 10:17

	library(ggplot2)
	library(reshape2)
	library(RColorBrewer)

	sales <- c(100,84,34,27,43,17,278,92,41,88,52,30,108,201,195,117,38,26,272,110,412,191,264,71,430,219,288,191,325,81)
	cumsales <- c(1643,84,34,27,43,17,278,92,41,88,52,30,108,201,195,117,38,26,272,110,412,191,264,71,430,219,288,191,325,81)
	cumsales <- cumsum(cumsales)
	TSEP11 <- 1:30
	Tdelt <- 1:180
	Bass.nls <- nls(sales ~ M * ( ((P+Q)^2 / P) * exp(-(P+Q) * TSEP11) ) /(1+(Q/P)exp(-(P+Q)TSEP11))^2, start = list(M=max(cumsales), P=0.01, Q=0.3))

cigrainger / gist:10271509

Created April 9, 2014 13:37

	import logging
	logging.basicConfig(format='%(asctime)s : %(Levelname)s : %(message)s', level=logging.INFO)

	from gensim import corpora, models, similarities

	stoplist = set('method,device,sysetem,apparatus,a,able,about,across,after,all,almost,also,am,among,an,and,any,are,as,at,be,because,been,but,by,can,cannot,could,dear,did,do,does,either,else,ever,every,for,from,get,got,had,has,have,he,her,hers,him,his,how,however,i,if,in,into,is,it,its,just,least,let,like,likely,may,me,might,most,must,my,neither,no,nor,not,of,off,often,on,only,or,other,our,own,rather,said,say,says,she,should,since,so,some,than,that,the,their,them,then,there,these,they,this,tis,to,too,twas,us,wants,was,we,were,what,when,where,which,while,who,whom,why,will,with,would,yet,you,your'.split(','))

	dictionary = corpora.Dictionary(line.lower().split() for line in open('abstracts.txt'))
	stop_ids = [dictionary.token2id[stopword] for stopword in stoplist if stopword in dictionary.token2id]
	once_ids = [tokenid for tokenid, docfreq in dictionary.dfs.it

cigrainger / gist:10285687

Created April 9, 2014 15:54

	import re, string
	pattern=re.compile('[\W_]+')
	f = open("C:\Users\graingec\spillovers\\abstracts\\abstracts.csv","rb")
	abstracts = []
	for line in f:
	y = line.split(',',1)
	if len(y)==2:
	c = pattern.sub('',y[1].lower())
	abstracts.append(c)
	f.close()

cigrainger / gist:10977439

Created April 17, 2014 11:56

	import re, string
	pattern=re.compile(r'[^a-zA-Z ]')
	f = open("C:\Users\graingec\spillovers\\abstracts\\abstracts.csv","rb")
	abstracts = []
	for line in f:
	y = line.split(',',1)
	if len(y)==2:
	abstracts.append(y[1])
	f.close()
	abstracts1 = []

cigrainger / gist:10977462

Created April 17, 2014 11:56

	import re, string
	pattern=re.compile(r'[^a-zA-Z ]')
	abstracts = []
	with open("C:\Users\graingec\spillovers\\abstracts\\abstracts.csv","rb") as f:
	for line in f:
	y = line.split(',',1)
	if len(y)==2:
	abstracts.append(y[1])
	abstracts1 = []
	for i in abstracts:

cigrainger / gist:10979619

Created April 17, 2014 12:30

	with open("C:\Users\graingec\spillovers\\abstracts\orbis_patents_abstracts.csv","rb") as f:
	with open("C:\Users\graingec\spillovers\\abstracts\\abstracts.csv","w") as f2:
	f2.truncate()
	for line in f:
	y = line.split(',',1)
	if len(y)==2:
	c = y[1].replace(',','')
	f2.write(y[0]+','+c)

cigrainger / gist:10999211

Created April 17, 2014 17:19

	with open('/Users/cigrainger/AeroFS/techproximity/shortabstracts.txt','rb') as f:
	with open('/Users/cigrainger/AeroFS/techproximity/lemma.txt','w') as f2:
	f2.truncate()
	for line in f:
	a = line.replace('\r\n','')
	c = a.split(' ')
	b = []
	for i in c:
	b.append(lmtzr.lemmatize(i))
	a = ' '.join(b)

cigrainger / gist:11123281

Created April 20, 2014 19:44

	.libPaths("C:/Users/graingec/R/win-library")
	library(dplyr)
	library(tm)
	library(lda)
	library(wordcloud)

	load('./data/abstractslang.rdata')

	patents$APPLN_ABSTRACT_LG <- gsub("[^[:alnum:] ]", "",patents$APPLN_ABSTRACT_LG)
	patents <- filter(patents,APPLN_ABSTRACT_LG == 'EN')

cigrainger / gist:11373022

Created April 28, 2014 14:01

	from nltk.stem.wordnet import WordNetLemmatizer
	lmtzr = WordNetLemmatizer()
	import re, string
	pattern=re.compile(r'[^a-zA-Z ]')

	def clean(x):
	x = x.replace('<image>','')
	x = pattern.sub('',x.lower())
	x = x.replace('\r','')
	x = x.replace('\n','')

Christopher Grainger cigrainger