triplingual · January 22, 2019 21:35
diff --git a/jstor-dfr-wordcount-xform.py b/jstor-dfr-wordcount-xform.py
 # columns to create and populate:
 # 	WORDCOUNTS, WEIGHT

 import unicodecsv as csv # need to install unicodecsv, tho
 import re
 import os
 import sys
 import glob

 if len(sys.argv) < 2:
 	print 'Looking for wordcount files in current directory . . .'
 	print ''
 	pathname = os.getcwd()
 else:
 	print 'Using ' + sys.argv[1] + ' as working path for wordcount files.'
 	pathname = sys.argv[1].rstrip('/')

 # Get filenames from system
 metadatafilenames = glob.glob(pathname + '/*.txt')

 # Start looping through metadata files
 for ngrampath in metadatafilenames:
 	print ngrampath
 	head, tail = os.path.split(ngrampath)
 	with open(ngrampath, 'rb') as csvinput:
 		thereader = csv.reader(csvinput, delimiter='\t')
 		if (not os.path.isdir(head + '/../wordcounts')):
 			os.mkdir(head + '/../wordcounts', 0755)
 		outputfilename = tail
 		outputfilename = re.sub('journal-article-', 'wordcounts_', outputfilename)
 		outputfilename = re.sub('-ngram1', '', outputfilename)
 		outputfilename = re.sub('\.txt', '.CSV', outputfilename)
 		with open(head + '/../wordcounts/' + outputfilename, 'wb') as csvoutput:
 			thewriter = csv.writer(csvoutput)
 			thewriter.writerow(['WORDCOUNTS','WEIGHT'])
 			for row in thereader:
 				thewriter.writerow([row[0],row[1]])
	# columns to create and populate:
	# WORDCOUNTS, WEIGHT

	import unicodecsv as csv # need to install unicodecsv, tho
	import re
	import os
	import sys
	import glob

	if len(sys.argv) < 2:
	print 'Looking for wordcount files in current directory . . .'
	print ''
	pathname = os.getcwd()
	else:
	print 'Using ' + sys.argv[1] + ' as working path for wordcount files.'
	pathname = sys.argv[1].rstrip('/')

	# Get filenames from system
	metadatafilenames = glob.glob(pathname + '/*.txt')

	# Start looping through metadata files
	for ngrampath in metadatafilenames:
	print ngrampath
	head, tail = os.path.split(ngrampath)
	with open(ngrampath, 'rb') as csvinput:
	thereader = csv.reader(csvinput, delimiter='\t')
	if (not os.path.isdir(head + '/../wordcounts')):
	os.mkdir(head + '/../wordcounts', 0755)
	outputfilename = tail
	outputfilename = re.sub('journal-article-', 'wordcounts_', outputfilename)
	outputfilename = re.sub('-ngram1', '', outputfilename)
	outputfilename = re.sub('\.txt', '.CSV', outputfilename)
	with open(head + '/../wordcounts/' + outputfilename, 'wb') as csvoutput:
	thewriter = csv.writer(csvoutput)
	thewriter.writerow(['WORDCOUNTS','WEIGHT'])
	for row in thereader:
	thewriter.writerow([row[0],row[1]])
No results found