Skip to content

Instantly share code, notes, and snippets.

@reyoung
Created July 7, 2014 11:26
Show Gist options
  • Save reyoung/84174d5c96bbcb1d08c2 to your computer and use it in GitHub Desktop.
Save reyoung/84174d5c96bbcb1d08c2 to your computer and use it in GitHub Desktop.
A PCA Trainer and transformer, train pca from csv file
#!/bin/python
"""CCIPCA Trainer and transformer.
Usage:
ipca.py train <model_file_name> <csvfile>... [--ncomp=<nc>] [--ntrainiter=<nti>] [--itersave]
ipca.py trans <model_file_name> <csvfile>... [--new_prefix=<prefix>]
ipca.py (-h|--help)
Options:
--ncomp=<nc> Number of Component [default: 30].
-h --help Show this screen.
--new_prefix=<prefix> Prefix of new filename [default: trans_].
--ntrainiter=<nti> Number of Train Iterator [default: 1].
--itersave Save model when iterator complete.
"""
try:
import cPickle as pickle
except ImportError:
import pickle
import csv
import numpy as np
from docopt import docopt
from pyIPCA import CCIPCA
def load_pca(args):
try:
with open(args['<model_file_name>'], 'rb') as f:
pca = pickle.load(f)
return pca
except IOError, pickle.PickleError:
pca = CCIPCA(n_components=int(args['--ncomp']))
return pca
def train_pca(pca, fn):
data = np.genfromtxt(fn, delimiter=',')
pca.fit(data)
def trans_pca(pca, fn, pre):
data = np.genfromtxt(fn, delimiter=',')
result = pca.transform(data)
nsamp, ncomp = result.shape
with open(pre + fn, 'w') as f:
writer = csv.writer(f)
for i in range(0, nsamp):
row = [result[i, j] for j in range(0, ncomp)]
writer.writerow(row)
def save_pca(pca, args):
with open(args['<model_file_name>'], 'wb') as f:
pickle.dump(pca, f)
def main():
args = docopt(__doc__, version='CCIPCA Trainer V0.1.0')
if args['train']:
# Load PCA Model
pca = load_pca(args)
# Train PCA By CSVFile
nit = int(args['--ntrainiter'])
for _ in range(0, nit):
for fn in args['<csvfile>']:
try:
train_pca(pca, fn)
except:
pass
if args['--itersave']:
save_pca(pca, args)
save_pca(pca, args)
elif args['trans']:
pca = load_pca(args)
for fn in args['<csvfile>']:
trans_pca(pca, fn, args['--new_prefix'])
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment