Last active
August 29, 2015 14:01
-
-
Save c58/56926743567675c05167 to your computer and use it in GitHub Desktop.
Fast (very fast) Probabilistic Neural Network implementation on CPython (based on encog project). Using parallel class computation. Learning will be later.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: utf-8 | |
# cython: profile=False | |
# cython: wraparound=False | |
# cython: boundscheck=False | |
# filename: fastpnn.pyx | |
from libc.stdlib cimport malloc, realloc, free, rand | |
cimport cython | |
from cython.parallel import parallel, prange | |
from cython.view cimport array as cvarray | |
cimport libc.math as cmath | |
import math | |
import numpy as np | |
cimport numpy as np | |
from fastgen cimport BasicGeneticFunction, Population, Chromosome, GeneticAlgorithm, _new_population, _new_chromosome | |
from scipy.spatial import distance | |
###################################### | |
# | |
# NETWORK | |
# | |
cdef class ProbabilityNetwork(object): | |
cdef: | |
public int input_count | |
public int output_count | |
double _sigma | |
dict _cls_to_idx | |
dict _idx_to_cls | |
double*** _samples | |
int* _samples_count | |
int* _samples_alloc | |
property sigma: | |
def __get__(self): | |
return np.array(<double>self._sigma) | |
def __set__(self, new_sigma): | |
self._sigma = new_sigma | |
def __init__(self, int inputs_count, int output_count, double sigma=0.001): | |
cdef int i | |
# Init basic variables | |
self.input_count = inputs_count | |
self.output_count = output_count | |
self._cls_to_idx = {} | |
self._idx_to_cls = {} | |
self._sigma = sigma | |
# Set samples array for each class | |
self._samples = <double ***> malloc( sizeof(double**) * output_count) | |
self._samples_count = <int *> malloc( sizeof(int) * output_count) | |
self._samples_alloc = <int *> malloc( sizeof(int) * output_count) | |
for i in range(output_count): | |
self._samples_count[i] = 0 | |
self._samples_alloc[i] = 1000 | |
self._samples[i] = <double **> malloc( sizeof(double*) * self._samples_alloc[i]) | |
cdef double* compute(self, double[:] inp, double sigma): | |
# Init of computation | |
cdef: | |
double* out = <double *> malloc(sizeof(double) * self.output_count) | |
double diff, dist, psum | |
int i, j, k | |
# Pattern layer | |
for i in prange(self.output_count, num_threads=self.output_count, nogil=True): | |
#for i in range(self.output_count): | |
out[i] = 0 | |
for j in range(self._samples_count[i]): | |
dist = 0.0 | |
for k in range(self.input_count): | |
dist += (((inp[k] - self._samples[i][j][k])) / sigma) ** 2 | |
dist = cmath.exp(-dist) | |
if dist < 1.e-40: | |
dist = 1.e-40 | |
out[i] += dist | |
# Summation layer | |
psum = 0.0 | |
for i in range(self.output_count): | |
psum += out[i] | |
if psum < 1.e-40: | |
psum = 1.e-40 | |
# Output layer | |
for i in range(self.output_count): | |
out[i] /= psum | |
return out | |
cpdef int classify(self, double[:] inp, threshold=-1): | |
return self._classify(inp, threshold, self.sigma) | |
cdef int _classify(self, double[:] inp, threshold=-1, double sigma=0.1): | |
cdef: | |
double* res = self.compute(inp, sigma) | |
int idx = -1, i | |
double val = -1 | |
for i in range(self.output_count): | |
if res[i] > val and res[i] >= threshold: | |
val = res[i] | |
idx = i | |
free(res) | |
return self.idx_to_class(idx) if idx >= 0 else None | |
cpdef add_sample(self, double[:] s): | |
cdef: | |
double** extended_samples | |
int j, curr | |
# Get class and target index | |
cls = <int>s[self.input_count] | |
idx = self.class_to_idx(cls) | |
# Check count of classes and update class count cache | |
if idx >= self.output_count: | |
raise Exception("Training data contains more classes than neural network has output neurons to hold.") | |
# Realloc samples array if out of allocated | |
if self._samples_count[idx] + 1 >= self._samples_alloc[idx]: | |
self._samples_alloc[idx] += 1000 | |
extended_samples = <double **> realloc(self._samples[idx], sizeof(double*) * self._samples_alloc[idx]) | |
if not extended_samples: | |
raise MemoryError() | |
self._samples[idx] = extended_samples | |
# Create new sample | |
curr = self._samples_count[idx] | |
self._samples[idx][curr] = <double *> malloc( sizeof(double) * (self.input_count + 1)) | |
self._samples[idx][curr][self.input_count] = idx | |
self._samples_count[idx] += 1 | |
for j in range(self.input_count): | |
self._samples[idx][curr][j] = s[j] | |
cpdef set_samples(self, double[:,:] samples): | |
cdef: | |
int i, j, cls, idx | |
double[:] s | |
# Create empty values | |
self._cls_to_idx = {} | |
self._idx_to_cls = {} | |
# Add all samples | |
self._destroy_samples() | |
for i in range(len(samples)): | |
self.add_sample(samples[i]) | |
cpdef int class_to_idx(self, int cls): | |
if cls not in self._cls_to_idx: | |
self._cls_to_idx[cls] = len(self._cls_to_idx) | |
self._idx_to_cls[self._cls_to_idx[cls]] = cls | |
return self._cls_to_idx[cls] | |
cpdef int idx_to_class(self, int idx): | |
return self._idx_to_cls[idx] | |
cdef _destroy_samples(self, deep=False): | |
cdef int i, j, k | |
for i in range(self.output_count): | |
for j in range(self._samples_count[i]): | |
free(self._samples[i][j]) | |
if deep: | |
free(self._samples[i]) | |
else: | |
self._samples_count[i] = 0 | |
self._samples_alloc[i] = 0 | |
if deep: | |
free(self._samples) | |
free(self._samples_count) | |
free(self._samples_alloc) | |
def __dealloc__(ProbabilityNetwork self): | |
self._destroy_samples(deep=True) | |
###################################### | |
# | |
# LEARNING | |
# | |
cdef class GeneticLearning(object): | |
cdef: | |
public GeneticAlgorithm ga | |
ProbabilityNetwork network | |
def __init__(self, ProbabilityNetwork network, *args, **kwargs): | |
self.network = network | |
self.ga = GeneticAlgorithm(GeneticLearningFunction(network, *args, **kwargs)) | |
cpdef train(self): | |
self.ga.run() | |
avg_age, avg_score, max_score, sigma = self.ga.stat() | |
self.network.sigma = sigma | |
print str(sigma) | |
cdef class GeneticLearningFunction(BasicGeneticFunction): | |
cdef: | |
ProbabilityNetwork network | |
double[:,:] test_set | |
double[:,:] training_set | |
int activ_train | |
int activ_test | |
def __init__(self, ProbabilityNetwork network, double[:,:] training_set, double[:,:] test_set): | |
BasicGeneticFunction.__init__(self, pop_size=20, chromo_size=1, eq_distance=4, | |
banking_age=25, banking_score=0.2) | |
self.network = network | |
self.test_set = test_set | |
self.training_set = training_set | |
self.network.set_samples(training_set) | |
self.activ_train = 0 | |
self.activ_test = 0 | |
cdef int i | |
for i in range(len(training_set)): | |
if training_set[i][network.input_count] != 0.0: | |
self.activ_train += 1 | |
for i in range(len(test_set)): | |
if training_set[i][network.input_count] != 0.0: | |
self.activ_test += 1 | |
cdef bint maybe_stop(self, Population* pop): | |
return self.iter_count > 20 | |
cdef Population* initial(self): | |
return self._initial_random() | |
# Basic genetic functions | |
cdef Population* withhelds(self, Population* pop): | |
return self._withhelds_best(pop, 3) | |
cdef Population* mutations(self, Population* pop): | |
return self._mutations_random(pop, 5) | |
cdef Population* parents(self, Population* pop): | |
return self._parents_random(pop, 10) | |
cdef Chromosome* crossover(self, Chromosome* parent1, Chromosome* parent2): | |
cdef int i | |
cdef Chromosome* child = _new_chromosome(parent1.size) | |
child.genotype[0] = (parent1.genotype[0] * np.random.uniform(-1.5, 1.5)) + (parent2.genotype[0] * np.random.uniform(-1.5, 1.5)) | |
return child | |
cdef Chromosome* mutate(self, Chromosome* ch): | |
return self._mutate_by_element(ch) | |
cdef double fitness(self, Chromosome* chromo): | |
cdef double score = 0 | |
#score += self._fitness_on_set(chromo, self.training_set, self.activ_train) | |
score += self._fitness_on_set(chromo, self.test_set, self.activ_test) | |
return score | |
# Implementation details | |
cdef double _fitness_on_set(self, Chromosome* chromo, double[:,:] data_set, int active_set): | |
cdef double err = 0.0, active = 0.0 | |
cdef int i | |
for i in range(len(data_set)): | |
res = self.network._classify(data_set[i], -1, chromo.genotype[0]) | |
if res != 0.0: | |
active += 1 | |
if res != data_set[i][self.network.input_count]: | |
if res != 0.0: | |
err += 1 | |
if active == 0: | |
return 0 | |
else: | |
return (1.0 - (err / active)) * (1.0 - abs(active_set - active) / active_set) | |
cdef double _mutate_by_element_func(self, double value, Chromosome* orig_ch): | |
cdef double val = 1.0/((orig_ch.score + 1.0) ** 12) | |
return value + np.random.uniform(-val, val) | |
cdef double random_chromo_value(self): | |
return np.random.uniform(0, np.random.uniform(0, 10)) | |
cdef double _distance(self, Chromosome* first, Chromosome* second): | |
cdef double sumSq = 0.0 | |
cdef int i | |
for i in range(first.size): | |
sumSq += (first.genotype[i] - second.genotype[i]) ** 2 | |
return sumSq ** 0.5 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Added learning. Please replace _fitness_on_set to function for your task. Current function exclude class 0 from classification output (for my reasons)