September 27, 2013 11:09 · October 22, 2013 10:10 · November 2, 2013 23:48 · November 10, 2013 15:25 · November 11, 2013 14:18 · May 28, 2019 07:29
 """ Generate image with plot & rating for each movie in a directory. """
 from __future__ import print_function
 import os
 import re
 import sys
 import glob
 import json
 import time
 import urllib
 import textwrap
 """ Run a set of XPath queries on a corpus of parse trees and compute precision
 and recall with respect to a set of hand-picked sentences. """

 from __future__ import print_function
 import io
 import os
 import glob
 import nltk
 import alpinocorpus
 --- tiger_release_aug07.corrected.16012013.xml  2013-01-16 16:35:23.000000000 +0100
 +++ tiger_2.2a.xml      2013-11-03 00:02:12.890306125 +0100
 @@ -3097934,7 +3097934,6 @@
       <nt id="s46234_505" cat="PP">
         <edge label="AC" idref="s46234_24" />
         <edge label="NK" idref="s46234_25" />
 -        <edge label="CJ" idref="s46234_135" />
       </nt>
       <nt id="s46234_506" cat="PP">
         <edge label="AC" idref="s46234_30" />
 """ Classify rows from CSV files with SVM with leave-one-out cross-validation;
 labels taken from first column, of the form 'label_description'. """
 import sys
 import pandas
 from sklearn import svm, cross_validation, preprocessing
 data = pandas.read_csv(sys.argv[1])
 xdata = data.as_matrix(data.columns[1:])
 #xdata = preprocessing.scale(xdata)  # normalize data => mean of 0, stddev of 1
 ylabels = [a.split('_')[0] for a in data.icol(0)]
 ytarget = preprocessing.LabelEncoder().fit(ylabels).transform(ylabels)
 """ A simple multiprocessing example with process pools, shared data and
 per-process initialization. """
 import multiprocessing

 # global read-only data can be shared by each process
 DATA = 11

 def initworker(a):
 	""" Initialize data specific to each process. """
 	global MOREDATA
 """Apply PCA to a CSV file and plot its datapoints (one per line).

 The first column should be a category (determines the color of each datapoint),
 the second a label (shown alongside each datapoint)."""
 import sys
 import pandas
 import pylab as pl
 from sklearn import preprocessing
 from sklearn.decomposition import PCA
 """Extract metadata from Project Gutenberg RDF catalog into a Python dict.

 Based on https://bitbucket.org/c-w/gutenberg/

 >>> md = readmetadata()
 >>> md[123]
 {'LCC': {'PS'},
 'author': u'Burroughs, Edgar Rice',
 'authoryearofbirth': 1875,
 'authoryearofdeath': 1950,
 // ==UserScript==
 // @name        Gmane vertical frames
 // @namespace   [email protected]
 // @include     http://news.gmane.org/*
 // @include     http://thread.gmane.org/*
 // @version     1
 // @grant       none
 // ==/UserScript==

 // The default GMane 'news' view has horizontal panes which wastes lots of screen space;
	""" Generate image with plot & rating for each movie in a directory. """
	from __future__ import print_function
	import os
	import re
	import sys
	import glob
	import json
	import time
	import urllib
	import textwrap
	""" Run a set of XPath queries on a corpus of parse trees and compute precision
	and recall with respect to a set of hand-picked sentences. """

	from __future__ import print_function
	import io
	import os
	import glob
	import nltk
	import alpinocorpus
	--- tiger_release_aug07.corrected.16012013.xml 2013-01-16 16:35:23.000000000 +0100
	+++ tiger_2.2a.xml 2013-11-03 00:02:12.890306125 +0100
	@@ -3097934,7 +3097934,6 @@
	<nt id="s46234_505" cat="PP">
	<edge label="AC" idref="s46234_24" />
	<edge label="NK" idref="s46234_25" />
	- <edge label="CJ" idref="s46234_135" />
	</nt>
	<nt id="s46234_506" cat="PP">
	<edge label="AC" idref="s46234_30" />
	""" Classify rows from CSV files with SVM with leave-one-out cross-validation;
	labels taken from first column, of the form 'label_description'. """
	import sys
	import pandas
	from sklearn import svm, cross_validation, preprocessing
	data = pandas.read_csv(sys.argv[1])
	xdata = data.as_matrix(data.columns[1:])
	#xdata = preprocessing.scale(xdata) # normalize data => mean of 0, stddev of 1
	ylabels = [a.split('_')[0] for a in data.icol(0)]
	ytarget = preprocessing.LabelEncoder().fit(ylabels).transform(ylabels)
	""" A simple multiprocessing example with process pools, shared data and
	per-process initialization. """
	import multiprocessing

	# global read-only data can be shared by each process
	DATA = 11

	def initworker(a):
	""" Initialize data specific to each process. """
	global MOREDATA
	"""Apply PCA to a CSV file and plot its datapoints (one per line).

	The first column should be a category (determines the color of each datapoint),
	the second a label (shown alongside each datapoint)."""
	import sys
	import pandas
	import pylab as pl
	from sklearn import preprocessing
	from sklearn.decomposition import PCA
	"""Extract metadata from Project Gutenberg RDF catalog into a Python dict.

	Based on https://bitbucket.org/c-w/gutenberg/

	>>> md = readmetadata()
	>>> md[123]
	{'LCC': {'PS'},
	'author': u'Burroughs, Edgar Rice',
	'authoryearofbirth': 1875,
	'authoryearofdeath': 1950,
	// ==UserScript==
	// @name Gmane vertical frames
	// @namespace [email protected]
	// @include http://news.gmane.org/*
	// @include http://thread.gmane.org/*
	// @version 1
	// @grant none
	// ==/UserScript==

	// The default GMane 'news' view has horizontal panes which wastes lots of screen space;