Skip to content

Instantly share code, notes, and snippets.

@c58
Last active August 29, 2015 14:01
Show Gist options
  • Save c58/56926743567675c05167 to your computer and use it in GitHub Desktop.
Save c58/56926743567675c05167 to your computer and use it in GitHub Desktop.
Fast (very fast) Probabilistic Neural Network implementation on CPython (based on encog project). Using parallel class computation. Learning will be later.
# encoding: utf-8
# cython: profile=False
# cython: wraparound=False
# cython: boundscheck=False
# filename: fastpnn.pyx
from libc.stdlib cimport malloc, realloc, free, rand
cimport cython
from cython.parallel import parallel, prange
from cython.view cimport array as cvarray
cimport libc.math as cmath
import math
import numpy as np
cimport numpy as np
from fastgen cimport BasicGeneticFunction, Population, Chromosome, GeneticAlgorithm, _new_population, _new_chromosome
from scipy.spatial import distance
######################################
#
# NETWORK
#
cdef class ProbabilityNetwork(object):
cdef:
public int input_count
public int output_count
double _sigma
dict _cls_to_idx
dict _idx_to_cls
double*** _samples
int* _samples_count
int* _samples_alloc
property sigma:
def __get__(self):
return np.array(<double>self._sigma)
def __set__(self, new_sigma):
self._sigma = new_sigma
def __init__(self, int inputs_count, int output_count, double sigma=0.001):
cdef int i
# Init basic variables
self.input_count = inputs_count
self.output_count = output_count
self._cls_to_idx = {}
self._idx_to_cls = {}
self._sigma = sigma
# Set samples array for each class
self._samples = <double ***> malloc( sizeof(double**) * output_count)
self._samples_count = <int *> malloc( sizeof(int) * output_count)
self._samples_alloc = <int *> malloc( sizeof(int) * output_count)
for i in range(output_count):
self._samples_count[i] = 0
self._samples_alloc[i] = 1000
self._samples[i] = <double **> malloc( sizeof(double*) * self._samples_alloc[i])
cdef double* compute(self, double[:] inp, double sigma):
# Init of computation
cdef:
double* out = <double *> malloc(sizeof(double) * self.output_count)
double diff, dist, psum
int i, j, k
# Pattern layer
for i in prange(self.output_count, num_threads=self.output_count, nogil=True):
#for i in range(self.output_count):
out[i] = 0
for j in range(self._samples_count[i]):
dist = 0.0
for k in range(self.input_count):
dist += (((inp[k] - self._samples[i][j][k])) / sigma) ** 2
dist = cmath.exp(-dist)
if dist < 1.e-40:
dist = 1.e-40
out[i] += dist
# Summation layer
psum = 0.0
for i in range(self.output_count):
psum += out[i]
if psum < 1.e-40:
psum = 1.e-40
# Output layer
for i in range(self.output_count):
out[i] /= psum
return out
cpdef int classify(self, double[:] inp, threshold=-1):
return self._classify(inp, threshold, self.sigma)
cdef int _classify(self, double[:] inp, threshold=-1, double sigma=0.1):
cdef:
double* res = self.compute(inp, sigma)
int idx = -1, i
double val = -1
for i in range(self.output_count):
if res[i] > val and res[i] >= threshold:
val = res[i]
idx = i
free(res)
return self.idx_to_class(idx) if idx >= 0 else None
cpdef add_sample(self, double[:] s):
cdef:
double** extended_samples
int j, curr
# Get class and target index
cls = <int>s[self.input_count]
idx = self.class_to_idx(cls)
# Check count of classes and update class count cache
if idx >= self.output_count:
raise Exception("Training data contains more classes than neural network has output neurons to hold.")
# Realloc samples array if out of allocated
if self._samples_count[idx] + 1 >= self._samples_alloc[idx]:
self._samples_alloc[idx] += 1000
extended_samples = <double **> realloc(self._samples[idx], sizeof(double*) * self._samples_alloc[idx])
if not extended_samples:
raise MemoryError()
self._samples[idx] = extended_samples
# Create new sample
curr = self._samples_count[idx]
self._samples[idx][curr] = <double *> malloc( sizeof(double) * (self.input_count + 1))
self._samples[idx][curr][self.input_count] = idx
self._samples_count[idx] += 1
for j in range(self.input_count):
self._samples[idx][curr][j] = s[j]
cpdef set_samples(self, double[:,:] samples):
cdef:
int i, j, cls, idx
double[:] s
# Create empty values
self._cls_to_idx = {}
self._idx_to_cls = {}
# Add all samples
self._destroy_samples()
for i in range(len(samples)):
self.add_sample(samples[i])
cpdef int class_to_idx(self, int cls):
if cls not in self._cls_to_idx:
self._cls_to_idx[cls] = len(self._cls_to_idx)
self._idx_to_cls[self._cls_to_idx[cls]] = cls
return self._cls_to_idx[cls]
cpdef int idx_to_class(self, int idx):
return self._idx_to_cls[idx]
cdef _destroy_samples(self, deep=False):
cdef int i, j, k
for i in range(self.output_count):
for j in range(self._samples_count[i]):
free(self._samples[i][j])
if deep:
free(self._samples[i])
else:
self._samples_count[i] = 0
self._samples_alloc[i] = 0
if deep:
free(self._samples)
free(self._samples_count)
free(self._samples_alloc)
def __dealloc__(ProbabilityNetwork self):
self._destroy_samples(deep=True)
######################################
#
# LEARNING
#
cdef class GeneticLearning(object):
cdef:
public GeneticAlgorithm ga
ProbabilityNetwork network
def __init__(self, ProbabilityNetwork network, *args, **kwargs):
self.network = network
self.ga = GeneticAlgorithm(GeneticLearningFunction(network, *args, **kwargs))
cpdef train(self):
self.ga.run()
avg_age, avg_score, max_score, sigma = self.ga.stat()
self.network.sigma = sigma
print str(sigma)
cdef class GeneticLearningFunction(BasicGeneticFunction):
cdef:
ProbabilityNetwork network
double[:,:] test_set
double[:,:] training_set
int activ_train
int activ_test
def __init__(self, ProbabilityNetwork network, double[:,:] training_set, double[:,:] test_set):
BasicGeneticFunction.__init__(self, pop_size=20, chromo_size=1, eq_distance=4,
banking_age=25, banking_score=0.2)
self.network = network
self.test_set = test_set
self.training_set = training_set
self.network.set_samples(training_set)
self.activ_train = 0
self.activ_test = 0
cdef int i
for i in range(len(training_set)):
if training_set[i][network.input_count] != 0.0:
self.activ_train += 1
for i in range(len(test_set)):
if training_set[i][network.input_count] != 0.0:
self.activ_test += 1
cdef bint maybe_stop(self, Population* pop):
return self.iter_count > 20
cdef Population* initial(self):
return self._initial_random()
# Basic genetic functions
cdef Population* withhelds(self, Population* pop):
return self._withhelds_best(pop, 3)
cdef Population* mutations(self, Population* pop):
return self._mutations_random(pop, 5)
cdef Population* parents(self, Population* pop):
return self._parents_random(pop, 10)
cdef Chromosome* crossover(self, Chromosome* parent1, Chromosome* parent2):
cdef int i
cdef Chromosome* child = _new_chromosome(parent1.size)
child.genotype[0] = (parent1.genotype[0] * np.random.uniform(-1.5, 1.5)) + (parent2.genotype[0] * np.random.uniform(-1.5, 1.5))
return child
cdef Chromosome* mutate(self, Chromosome* ch):
return self._mutate_by_element(ch)
cdef double fitness(self, Chromosome* chromo):
cdef double score = 0
#score += self._fitness_on_set(chromo, self.training_set, self.activ_train)
score += self._fitness_on_set(chromo, self.test_set, self.activ_test)
return score
# Implementation details
cdef double _fitness_on_set(self, Chromosome* chromo, double[:,:] data_set, int active_set):
cdef double err = 0.0, active = 0.0
cdef int i
for i in range(len(data_set)):
res = self.network._classify(data_set[i], -1, chromo.genotype[0])
if res != 0.0:
active += 1
if res != data_set[i][self.network.input_count]:
if res != 0.0:
err += 1
if active == 0:
return 0
else:
return (1.0 - (err / active)) * (1.0 - abs(active_set - active) / active_set)
cdef double _mutate_by_element_func(self, double value, Chromosome* orig_ch):
cdef double val = 1.0/((orig_ch.score + 1.0) ** 12)
return value + np.random.uniform(-val, val)
cdef double random_chromo_value(self):
return np.random.uniform(0, np.random.uniform(0, 10))
cdef double _distance(self, Chromosome* first, Chromosome* second):
cdef double sumSq = 0.0
cdef int i
for i in range(first.size):
sumSq += (first.genotype[i] - second.genotype[i]) ** 2
return sumSq ** 0.5
@c58
Copy link
Author

c58 commented May 15, 2014

Added learning. Please replace _fitness_on_set to function for your task. Current function exclude class 0 from classification output (for my reasons)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment