cigrainger · April 28, 2014 14:41
diff --git a/gistfile1.py b/gistfile1.py
 import sys
 from nltk.stem.wordnet import WordNetLemmatizer
 lmtzr = WordNetLemmatizer()
 import re, string
 pattern=re.compile(r'[^a-zA-Z ]')

 def clean(x):
 	x = x.replace('<image>','')
 	x = pattern.sub('',x.lower())
 	x = x.replace('\r','')
 	x = x.replace('\n','')
 	x = x.split(' ')
 	y = []
 	for i in x:
 		y.append(lmtzr.lemmatize(i))
 	x = ' '.join(y)
 	return(x)

 with open("C:\Users\graingec\spillovers\data\patents_abstracts.csv","rb") as f:
    with open("C:\Users\graingec\spillovers\\abstracts\\abstracts.txt","w") as f2:
    	with open("C:\Users\graingec\spillovers\\abstracts\\patentids.txt","w") as f3:
 	        f2.truncate()
 	        for line in f:
 	            y = line.split(',',1)
 	            if len(y)==2:
 	                c = y[1].replace(',','')
 	                a = clean(c)
 	                f2.write(a + '\n')
 	                f3.write(y[0] + '\n')
	import sys
	from nltk.stem.wordnet import WordNetLemmatizer
	lmtzr = WordNetLemmatizer()
	import re, string
	pattern=re.compile(r'[^a-zA-Z ]')

	def clean(x):
	x = x.replace('<image>','')
	x = pattern.sub('',x.lower())
	x = x.replace('\r','')
	x = x.replace('\n','')
	x = x.split(' ')
	y = []
	for i in x:
	y.append(lmtzr.lemmatize(i))
	x = ' '.join(y)
	return(x)

	with open("C:\Users\graingec\spillovers\data\patents_abstracts.csv","rb") as f:
	with open("C:\Users\graingec\spillovers\\abstracts\\abstracts.txt","w") as f2:
	with open("C:\Users\graingec\spillovers\\abstracts\\patentids.txt","w") as f3:
	f2.truncate()
	for line in f:
	y = line.split(',',1)
	if len(y)==2:
	c = y[1].replace(',','')
	a = clean(c)
	f2.write(a + '\n')
	f3.write(y[0] + '\n')
No results found