Skip to content

Instantly share code, notes, and snippets.

@ollewelin
Last active November 17, 2017 20:42
Show Gist options
  • Save ollewelin/4d8ebaf1e92c1617a8305a1baef030ce to your computer and use it in GitHub Desktop.
Save ollewelin/4d8ebaf1e92c1617a8305a1baef030ce to your computer and use it in GitHub Desktop.
Reinforcement Learning pinball game with user configurable parameters
/// Change some settings 2017-10-25
/// Only dependancy is OpenCV C++ library need to be installed
/// Example of Reinforced Machine Learning attached on a simple Pinball game
/// The enviroment (enviroment = data feedback) for the Agient (Agient = machine learning system)
/// is the raw pixels 50x50 pixels (2500 input nodes) and 200 hidden nodes on 100 frames
/// So the input to hidden weights is 50x50x100x200 x4 bytes (float) = is 200Mbytes huges but it work anyway!!
///Enhancment to do in future.
///TODO: Add some layers of Convolutions (with unsupervised Learning for learning feature patches) will probably enhance preformance.
///TODO: Maybe add bias weigth is a good idee to enhance preformance or stability during training.
#include <opencv2/highgui/highgui.hpp> // OpenCV window I/O
#include <opencv2/imgproc/imgproc.hpp> //
#include <stdio.h>
///#include <raspicam/raspicam_cv.h>
#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp> // Basic OpenCV structures (cv::Mat, Scalar)
#include <cstdlib>
#include <ctime>
#include <math.h> // exp
#include <stdlib.h>// exit(0);
#include <iostream>
using namespace std;
using namespace cv;
#include "pinball_game.hpp"
const float Relu_neg_gain = 0.01f;///A small positive value here will prevent ReLU neuoron from dying. 0.0f pure rectify (1.0 full linear = nothing change)
float relu(float input)
{
float output=0;
output = input;
if(input < 0.0)
{
output = input * Relu_neg_gain;
}
return output;
}
int int_abs_value(int signed_value)
{
int abs_v;
abs_v = signed_value;
if(abs_v < 0)
{
abs_v = -abs_v;
}
return abs_v;
}
float revers_sigmoid(float sigm)
{
float prevent_infinity = 0.001f;
float rev_sigm;
///y = 1/(1+exp(-x)) ///Sigmoid
///x = -log((1-y)/y) ///Sigmoid revers log = ln
if(sigm > (1.0f-prevent_infinity))
{
sigm = (1.0f-prevent_infinity);
}
if(sigm < prevent_infinity)
{
sigm = prevent_infinity;
}
rev_sigm = -log((1.0f-sigm)/sigm);
return rev_sigm;
}
void randomize_dropoutHid(int *zero_ptr_dropoutHidden, int HiddenNodes, int verification, int drop_out_percent)
{
int drop_out_part = HiddenNodes * drop_out_percent/100;//
int*ptr_dropoutHidden;
for(int i=0; i<HiddenNodes; i++)
{
ptr_dropoutHidden = zero_ptr_dropoutHidden + i;
*ptr_dropoutHidden = 0;//reset
}
int check_how_many_dropout = 0;
if(verification == 0)
{
for(int k=0; k<HiddenNodes*2; k++) ///Itterate max HiddenNodes*2 number of times then give up to reach drop_out_part
{
for(int i=0; i<(drop_out_part-check_how_many_dropout); i++)
{
int r=0;
r = rand() % (HiddenNodes-1);
ptr_dropoutHidden = zero_ptr_dropoutHidden + r;
*ptr_dropoutHidden = 1;///
}
check_how_many_dropout = 0;
for(int j=0; j<HiddenNodes; j++)
{
ptr_dropoutHidden = zero_ptr_dropoutHidden + j;
check_how_many_dropout += *ptr_dropoutHidden;
}
if(check_how_many_dropout >= drop_out_part)
{
break;
}
}
// printf("check_how_many_dropout =%d\n", check_how_many_dropout);
}
}
int main()
{
char filename[100];
char answer_character;
printf("Reinforcment Learning test of pixels data input from a simple ping/pong game\n");
FILE *fp2;
int nr_of_episodes=0;
int auto_save_w_counter =100;
const int auto_save_after = 200;///Auto Save weights after this number of episodes
int show_w_counter =19;///Show the weights graphical not ever episodes (to save CPU time)
const int show_w_after = 20;///Show the weights graphical not ever episodes (to save CPU time)
int pixel_height = 50;///The input data pixel height, note game_Width = 220
int pixel_width = 50;///The input data pixel width, note game_Height = 200
Mat resized_grapics, test, pix2hid_weight, hid2out_weight;
Size size(pixel_width,pixel_height);//the dst image size,e.g.100x100
pinball_game gameObj1;///Instaniate the pinball game
gameObj1.init_game();///Initialize the pinball game with serten parametrers
gameObj1.slow_motion=0;///0=full speed game. 1= slow down
gameObj1.replay_times = 0;///If =0 no replay. >0 this is the nuber of replay with serveral diffrent actions so the ageint take the best rewards before make any weights update
float pix2hid_learning_rate = 0.4f;///0.02
float hid2out_learning_rate = 0.1f;///0.001
/*
if(gameObj1.replay_times > 0)
{
pix2hid_learning_rate *= (float)gameObj1.replay_times;///0.02
hid2out_learning_rate *= (float)gameObj1.replay_times;///0.001
}
*/
///========== Setup weights and nodes for the Reinforcemnt learning network ==========================
///This will contain all the training weigts for training. It will have exact same size as the grapics * number of frames * hidden nodes
///Use also here OpenCV Mat so it is easy to Visualize some of the data as well as store the weights
int Nr_of_hidden_nodes = 200;///Number of hidden nodes on one frame weight
float *input_node_stored;///This will have a record of all frames of the resized_grapics used for weight updates
input_node_stored = new float[pixel_width * pixel_height * gameObj1.nr_of_frames];
int visual_nr_of_frames = 20;///Visualization weights of a only few frames othewize the image will be to large
int visual_nr_of_hid_node = 20;///Visualization weights of a only few hidden nodes othewize the image will be to large
pix2hid_weight.create(pixel_height * visual_nr_of_hid_node, pixel_width * visual_nr_of_frames, CV_32FC1);///Visualization weights of a only few frames and hidden nodes othewize the image will be to large CV_32FC1 is pixel format
float *pix2hid_weightB;///File data read/write connect to tied weights
pix2hid_weightB = new float[pixel_height * pixel_width * Nr_of_hidden_nodes * gameObj1.nr_of_frames];///File data is same size as all tied weights pix2hid_weight.create
float *hidden_node;
hidden_node = new float[gameObj1.nr_of_frames * Nr_of_hidden_nodes];///200 hidden nodes and 100 frames for example
float *hidden_delta;
hidden_delta = new float[gameObj1.nr_of_frames * Nr_of_hidden_nodes];///200 hidden nodes and 100 frames for example
hid2out_weight.create(gameObj1.nr_of_frames, Nr_of_hidden_nodes, CV_32FC1);///Use also here OpenCV Mat so it is easy to Visualize the data as well as store weights
float *hid2out_weightB;
hid2out_weightB = new float[gameObj1.nr_of_frames * Nr_of_hidden_nodes];
float *output_node;
output_node = new float[gameObj1.nr_of_frames];
float *output_delta;
output_delta = new float[gameObj1.nr_of_frames];
float *action;
action = new float[gameObj1.nr_of_frames];
float *best_actions;///Used for pick and save the best rewards action serie to then replay that event serie and only update weight from that action serie. used when replay_times > 0
best_actions = new float[gameObj1.nr_of_frames];
int *dropoutHidden;///dropout table
dropoutHidden = new int[gameObj1.nr_of_frames * Nr_of_hidden_nodes];///data 0 normal fc_hidden_node. 1= dropout this fc_hidden_node this training turn.
///Some reports to user
printf("Number of hidden nodes to one frames = %d\n", Nr_of_hidden_nodes);
printf("Total number of hidden nodes fo all frames together = %d\n", gameObj1.nr_of_frames * Nr_of_hidden_nodes);
printf("Number of output nodes alway equal to the number of frames on one episode = %d\n", gameObj1.nr_of_frames);
///===================================================================================================
///=================== index variable for the weights ====================
int ix=0;///index to f_data[ix]
///=======================================================================
///============ Prepare pointers to make it possible to direct acces Mat data matrix ==================
test = gameObj1.gameGrapics.clone();
resize(test, resized_grapics, size);
/// only used when use_diff ==1
Mat diff_grap, pre_grap;
diff_grap = resized_grapics.clone();
pre_grap = resized_grapics.clone();
float *zero_ptr_diff_grap = diff_grap.ptr<float>(0);///zero_... Always point at first pixel
float *index_ptr_diff_grap = diff_grap.ptr<float>(0);///index_... Adjusted to abritary pixel
// float *zero_ptr_pre_grap = pre_grap.ptr<float>(0);///zero_... Always point at first pixel
// float *index_ptr_pre_grap = pre_grap.ptr<float>(0);///index_... Adjusted to abritary pixel
float *zero_ptr_res_grap = resized_grapics.ptr<float>(0);///zero_... Always point at first pixel
float *index_ptr_res_grap = resized_grapics.ptr<float>(0);///index_... Adjusted to abritary pixel
float *zero_ptr_pix2hid_w = pix2hid_weight.ptr<float>(0);///Only used for visualization of weights
float *index_ptr_pix2hid_w = pix2hid_weight.ptr<float>(0);///Only used for visualization of weights
float *zero_ptr_hid2out_w = hid2out_weight.ptr<float>(0);///Only used for visualization of weights
float *index_ptr_hid2out_w = hid2out_weight.ptr<float>(0);///Only used for visualization of weights
///====================================================================================================
///================ Initialize weight with random noise =====================================
printf("Insert noise to weights. Please wait...\n");
srand (static_cast <unsigned> (time(0)));///Seed the randomizer (need to do only once)
float start_weight_noise_range = 0.05f;///0.05
float Rando=0.0f;
for(int i=0; i<(pixel_height * pixel_width * Nr_of_hidden_nodes * gameObj1.nr_of_frames); i++)
{
Rando = (float) (rand() % 65535) / 65536;//0..1.0 range
Rando -= 0.5f;
Rando *= start_weight_noise_range;
pix2hid_weightB[i] = Rando;///Insert the noise to the weight pixel to hidden
}
printf("Noise to the weight pixel to hidden is inserted\n");
for(int i=0; i<(gameObj1.nr_of_frames * Nr_of_hidden_nodes); i++)
{
Rando = (float) (rand() % 65535) / 65536;//0..1.0 range
Rando -= 0.5f;
Rando *= start_weight_noise_range;
hid2out_weightB[i] = Rando;
}
printf("Noise to the weight hidden to output node is inserted\n");
///==================== End of Initialize weight with random noise ===========================
///============ Regardning Load weights to file ==========================
gameObj1.use_image_diff=0;
gameObj1.high_precition_mode = 1; ///This will make adjustable rewards highest at center of the pad.
gameObj1.use_dice_action=1;
gameObj1.drop_out_percent=0;
gameObj1.Not_dropout=1;
gameObj1.flip_reward_sign =0;
gameObj1.print_out_nodes = 0;
float gamma = 0.97f;
gameObj1.enable_ball_swan =1;
gameObj1.use_character =1;
gameObj1.max_rewards = 5.0f;
gameObj1.enabel_3_state = 0;
gameObj1.pix2hid_learning_rate = pix2hid_learning_rate;
gameObj1.hid2out_learning_rate = hid2out_learning_rate;
int best_rewards_serie=0;
float highest_rewards=-1000.0f;
int use_noise_image=0;///This is only if you want to add noise on input graphics to test the systems how it handle noise
float noise_thres = 0.10;///This is only if you use noise on input graphics to test the systems how it handle noise
printf("Would want to use default settings <Y>/<N> \n");
answer_character = getchar();
if(answer_character == 'Y' || answer_character == 'y')
{
printf("********** Default settings **********\n");
getchar();
}
else
{
printf("********** Set user settings **********\n");
gameObj1.set_user_settings();
pix2hid_learning_rate = gameObj1.pix2hid_learning_rate;
hid2out_learning_rate = gameObj1.hid2out_learning_rate;
}
printf("pix2hid_learning_rate = %f\n", pix2hid_learning_rate);
printf("hid2out_learning_rate = %f\n", hid2out_learning_rate);
printf("Would you like to load stored weights, pix2hid_weight.dat and hid2out_weight.dat <Y>/<N> \n");
printf("Example use (if you don't already trainied some good files) <N>\n");
answer_character = getchar();
if(answer_character == 'Y' || answer_character == 'y')
{
sprintf(filename, "pix2hid_weight.dat");
fp2 = fopen(filename, "r");
if (fp2 == NULL)
{
printf("Error while opening file pix2hid_weight.dat");
exit(0);
}
printf("Start so load pix2hid_weight.dat Please wait... The file size is = %d bytes\n", (sizeof pix2hid_weightB[0]) * (pixel_height * pixel_width * Nr_of_hidden_nodes * gameObj1.nr_of_frames));
fread(pix2hid_weightB, sizeof pix2hid_weightB[0], (pixel_height * pixel_width * Nr_of_hidden_nodes * gameObj1.nr_of_frames), fp2);///+1 is because the bias. So the nr FLx_size+1 is the bias weight.
fclose(fp2);
printf("weights are loaded from pix2hid_weight.dat file\n");
sprintf(filename, "hid2out_weight.dat");
fp2 = fopen(filename, "r");
if (fp2 == NULL)
{
printf("Error while opening file hid2out_weight.dat");
exit(0);
}
printf("Start so load hid2out_weight.dat Please wait... The file size is = %d bytes\n", (sizeof hid2out_weightB[0]) * (gameObj1.nr_of_frames * Nr_of_hidden_nodes));
fread(hid2out_weightB, sizeof hid2out_weightB[0], (gameObj1.nr_of_frames * Nr_of_hidden_nodes), fp2);
fclose(fp2);
printf("weights are loaded from hid2out_weight.dat file\n");
}
///============ End of Regardning Load weights to file ==========================
///test and debug
float threshold_1;
float threshold_2;
threshold_1 = (1.0f/3.0f);
threshold_2 = (2.0f/3.0f);
printf("threshold_1 %f\n", threshold_1);
printf("threshold_2 %f\n", threshold_2);
float rev_out_node;
float rev_rand_dice;
float action_dice;
while(1)
{
gameObj1.replay_episode = 0;
float dot_product = 0.0f;
gameObj1.start_episode();///Staring a new game turn
// float dice=0;///Only random for the first frame
randomize_dropoutHid(&dropoutHidden[0], (gameObj1.nr_of_frames * Nr_of_hidden_nodes), gameObj1.Not_dropout, gameObj1.drop_out_percent);///select dropout node to the hidden node
for(int frame_g=0; frame_g<gameObj1.nr_of_frames; frame_g++) ///Loop throue each of the 100 frames
{
output_node[frame_g] = 0.0f;///Start with clear this node
gameObj1.frame = frame_g;
gameObj1.run_episode();
test = gameObj1.gameGrapics.clone();
resize(test, resized_grapics, size);
if(gameObj1.use_image_diff==1)
{
if(frame_g==0)
{
pre_grap = Scalar(0.0f);
}
else
{
diff_grap = pre_grap - resized_grapics;
pre_grap = resized_grapics.clone();///Used to calculate diff_grap For next frame
}
//diff_grap = diff_grap + 0.5f;
}
///=============== Forward data for this frame ==================
///Make the Dot product to this frames hidden nodes and output node
for(int i=0; i<Nr_of_hidden_nodes; i++)
{
hidden_node[frame_g * Nr_of_hidden_nodes + i] = 0.0f;///Start with clear this value before sum up the dot product
dot_product = 0.0f;///Start with clear this value before sum up the dot product
for(int j=0; j<(pixel_height * pixel_width); j++)
{
ix = ((pixel_width * Nr_of_hidden_nodes) * (pixel_height * frame_g + j/pixel_width) + (pixel_width * i) + j%pixel_width);///Prepare the index to point on the right place in the weight matrix pix2hid_weightB[]
if(gameObj1.use_image_diff==1)
{
index_ptr_diff_grap = zero_ptr_diff_grap + j;///Prepare the pointer address to point on the right place on the grapical image of this grapical frame
dot_product += pix2hid_weightB[ix] * (*index_ptr_diff_grap);///Make the dot product of Weights * Game grapichs
input_node_stored[pixel_width * pixel_height * frame_g + j] = (*index_ptr_diff_grap);///Save this grame grapich pixel must read this pixel later when update weights
}
else
{
index_ptr_res_grap = zero_ptr_res_grap + j;///Prepare the pointer address to point on the right place on the grapical image of this grapical frame
dot_product += pix2hid_weightB[ix] * (*index_ptr_res_grap);///Make the dot product of Weights * Game grapichs
if(use_noise_image==1 && i==0)
{
float noise;
noise = ((float) (rand() % 65535) / 65536) ;
if(noise < noise_thres)
{
(*index_ptr_res_grap) = ((float) (rand() % 65535) / 65536);
}
}
input_node_stored[pixel_width * pixel_height * frame_g + j] = (*index_ptr_res_grap);///Save this grame grapich pixel must read this pixel later when update weights
}
}
///Relu this dot product
if(dropoutHidden[(frame_g * Nr_of_hidden_nodes + i)] == 0)
{
///Normal forward not drop out this node
dot_product = relu(dot_product);
}
else
{
dot_product = 0.0f;
}
hidden_node[frame_g * Nr_of_hidden_nodes + i] = dot_product;///Put this finnish data in the hidden node neuron
ix = (frame_g * Nr_of_hidden_nodes + i);
output_node[frame_g] += hid2out_weightB[ix] * dot_product;///Take this hidden node data how is for the moment => hidden_node[frame_g * Nr_of_hidden_nodes + i] = dot_product;
}
output_node[frame_g] = 1.0/(1.0 + exp(-(output_node[frame_g])));///Sigmoid function. x = 1.0/(1.0 + exp(-(x)))
///=============== End Forward data for this frame ==================
imshow("resized_grapics", resized_grapics);/// resize(src, dst, size);
if(gameObj1.use_image_diff==1)
{
imshow("diff", diff_grap);
}
waitKey(1);
///=============== Made the action =======
///Update 2017-09-02 9:30 ===== Use dice with the policy network output probability for what action should be done
///float action_dice;
if(gameObj1.use_dice_action == 1)
{
action_dice = (float) (rand() % 65535) / 65536;///Update 2017-09-02 9:30 ===== Use dice with the policy network output probability for what action should be done
}
else
{
action_dice = 0.5f;///Old test mode
}
if(gameObj1.enabel_3_state ==1)
{
//float threshold = 0;
//float revers_sigmoid;
//revers_sigmoid = ;
///y = 1/(1+exp(-x)) ///Sigmoid
///x = -log((1-y)/y) ///Sigmoid revers log = ln
/// 3 state mode UP, DOWN, STOP
///Obscure solution. Maybe it work. I simpy not know realy what I am doing in this example..
rev_out_node = revers_sigmoid(output_node[frame_g]);
rev_rand_dice = revers_sigmoid(action_dice);
action_dice = 1.0/(1.0 + exp(-(rev_rand_dice+rev_out_node)));///Sigmoid function. x = 1.0/(1.0 + exp(-(x)))
if(action_dice > threshold_2)
{
action[frame_g] = 1.0f;
gameObj1.move_up = 1;
}
else if(action_dice < threshold_1)
{
action[frame_g] = 0.0f;
gameObj1.move_up = 0;
}
else
{
///action[frame_g] = 0.5f;///STOP target
action[frame_g] = 0.5;
gameObj1.move_up = 2;///STOP
}
if(frame_g >95)
{
printf("frameg =%d\n", frame_g);
printf("action_dice %f\n", action_dice);
printf("output_node[frame_g] %f\n", output_node[frame_g]);
printf("abs(output_node[frame_g] - 0.5f) = %f\n", abs(output_node[frame_g] - 0.5f));
printf("abs(action_dice - 0.5f) =%f\n", abs(action_dice - 0.5f));
printf("action[frame_g] =%f\n", action[frame_g]);
}
}
else
{
/// 2 state mode UP, DOWN
///Use the data from probablility from policy network
if(output_node[frame_g] > action_dice)
{
action[frame_g] = 1.0f;
gameObj1.move_up = 1;
}
else
{
action[frame_g] = 0.0f;
gameObj1.move_up = 0;
}
}
///================= End Action ==================
}
if(gameObj1.print_out_nodes==1)
{
for(int i=0; i<gameObj1.nr_of_frames; i++)
{
printf("output_node[frame nr %d] = %f\n", gameObj1.nr_of_frames-1-i, output_node[gameObj1.nr_of_frames-1-i]);
}
}
///========== Auto save weights to files ====================
if(auto_save_w_counter>auto_save_after)
{
auto_save_w_counter=0;
sprintf(filename, "pix2hid_weight.dat");
fp2 = fopen(filename, "w+");
if (fp2 == NULL)
{
printf("Error while opening file pix2hid_weight.dat");
exit(0);
}
printf("Start so save pix2hid_weight.dat Please wait... The file size is = %d bytes\n", (sizeof pix2hid_weightB[0]) * (pixel_height * pixel_width * Nr_of_hidden_nodes * gameObj1.nr_of_frames));
fwrite(pix2hid_weightB, sizeof pix2hid_weightB[0], (pixel_height * pixel_width * Nr_of_hidden_nodes * gameObj1.nr_of_frames), fp2);
fclose(fp2);
printf("weights are saved at hid2out_weight.dat file\n");
sprintf(filename, "hid2out_weight.dat");
fp2 = fopen(filename, "w+");
if (fp2 == NULL)
{
printf("Error while opening file hid2out_weight.dat");
exit(0);
}
printf("Start so save hid2out_weight.dat Please wait... The file size is = %d bytes\n", (sizeof hid2out_weightB[0]) * (gameObj1.nr_of_frames * Nr_of_hidden_nodes));
fwrite(hid2out_weightB, sizeof hid2out_weightB[0], (gameObj1.nr_of_frames * Nr_of_hidden_nodes), fp2);
fclose(fp2);
printf("weights are saved at hid2out_weight.dat file\n");
}
else
{
auto_save_w_counter++;
}
///========== End Auto save weights to files ====================
///========== Show visualization of the weights not nessesary ==========
if(show_w_counter>show_w_after)
{
show_w_counter=0;
for(int i=0; i<(gameObj1.nr_of_frames * Nr_of_hidden_nodes); i++)
{
///Visualization of all hid2out weights
index_ptr_hid2out_w = zero_ptr_hid2out_w + i;
(*index_ptr_hid2out_w) = hid2out_weightB[i] + 0.5f;///(*index_ptr_hid2out_w) is the pointer to Mat hid2out_weight. +0.5 make a grayscale with gray center 0.5;
}
///int start_show_frame = gameObj1.nr_of_frames - visual_nr_of_frames-1;///Show only the last (20 = visual_nr_of_frames) frame weights
int show_ever5frame_start =0;
for(int si=0; si<(pixel_height * visual_nr_of_hid_node * pixel_width * visual_nr_of_frames); si++) ///si = visualize Mat itterator
{
///Visualize pix2hid weight reagrding only few frames and few hidden nodes connections
///pix2hid_weight.create(pixel_height * visual_nr_of_hid_node, pixel_width * visual_nr_of_frames, CV_32FC1);///
///The pix2hid_weightB[ix] is organized like this;
///ix = ((pixel_width * Nr_of_hidden_nodes) * (pixel_height * frame_g + j/pixel_width) + (pixel_width * i) + j%pixel_width);
///where..
///j is itterator for (pixel_height * pixel_width) is the pixel area of the game (shrinked are rezised image)
///i is itterator for Nr_of_hidden_nodes
///frame_g is of course the frame number
int vis_colum = 0;
vis_colum = si%(pixel_width * visual_nr_of_hid_node);
int vis_row = 0;
vis_row = si/(pixel_width * visual_nr_of_hid_node);
index_ptr_pix2hid_w = zero_ptr_pix2hid_w + si;
///Map over frome the large weight vevtor to visualize Mat
///============== This make so each patch row have a jump by 5 frames in the game ===
show_ever5frame_start = (si/(pixel_height * visual_nr_of_hid_node * pixel_width)) * ((gameObj1.nr_of_frames-1) / visual_nr_of_frames);
///=====================================================================
(*index_ptr_pix2hid_w) = pix2hid_weightB[(vis_row + show_ever5frame_start*pixel_height) * (pixel_width * Nr_of_hidden_nodes) + vis_colum%(pixel_width * Nr_of_hidden_nodes)];///Map over phuu...
(*index_ptr_pix2hid_w) += 0.5f;///make gray scale center at 0.5f
}
imshow("pix2hid_weight", pix2hid_weight);///Only few weights showed
imshow("hid2out_weight", hid2out_weight);
}
else
{
show_w_counter++;
}
///========== End Show visualization of the weights =================
printf("nr_of_episodes =%d\n", nr_of_episodes);
gameObj1.episode = nr_of_episodes;
nr_of_episodes++;
float rewards =0.0f;
// float pseudo_target =0.0f;///
int ball_pad_diff = 0;/// Only used in high_precition_mode, Propotion reward mode
ball_pad_diff = gameObj1.pad_ball_diff;
ball_pad_diff = int_abs_value(ball_pad_diff);///remove sign only positive
if(gameObj1.win_this_game == 1)
{
if(gameObj1.high_precition_mode==0)
{
rewards = +4.0f;///Yea.. the Ageint win this episode
}
else
{
///Propotion reward mode
printf("ball_pad_diff abs = %d\n", ball_pad_diff);
if(ball_pad_diff == 0)
{
///Perfect catch
rewards = +20.0f;///Yea.. the Ageint win this episode
}
else
{
rewards = 20.0f / (float) ball_pad_diff;
}
if(rewards > gameObj1.max_rewards)
{
rewards = gameObj1.max_rewards;
}
}
}
else
{
if(gameObj1.high_precition_mode==0)
{
rewards = -1.0f;///We lose this episode
}
else
{
///Propotion reward mode
printf("ball_pad_diff abs = %d\n", ball_pad_diff);
if(ball_pad_diff == 0)
{
///Perfect catch WILL NOT happend when lose. just 0 diviton security
}
else
{
rewards = - (((float) ball_pad_diff) / 60.0f ) ;
}
if(rewards < -3.0f)
{
rewards = -3.0f;///Limit the negative rewards
}
}
}
if(gameObj1.flip_reward_sign == 1)
{
printf("Flip sign of the rewards \n");
rewards *= -1.0;
///Testing what happen with flipped reward
}
printf("rewards = %f\n", rewards);
if(rewards > highest_rewards)
{
highest_rewards = rewards;
best_rewards_serie = gameObj1.replay_count;
///Save this action serie for a later replay
for(int i=0;i<gameObj1.nr_of_frames;i++)
{
best_actions[i] = action[i];
}
printf("best_rewards_serie = %d\n", best_rewards_serie);
}
if(gameObj1.replay_count == gameObj1.replay_times)
{
if(gameObj1.replay_times > 0)
{
gameObj1.replay_episode = 1;///Replay mode
///-------------------------- Replay best serie so with best actions it will contain ----------------
gameObj1.start_episode();///Staring a new game turn
randomize_dropoutHid(&dropoutHidden[0], (gameObj1.nr_of_frames * Nr_of_hidden_nodes), gameObj1.Not_dropout, gameObj1.drop_out_percent);///select dropout node to the hidden node
for(int frame_g=0; frame_g<gameObj1.nr_of_frames; frame_g++) ///Loop throue each of the 100 frames
{
output_node[frame_g] = 0.0f;///Start with clear this node
gameObj1.frame = frame_g;
gameObj1.run_episode();
test = gameObj1.gameGrapics.clone();
resize(test, resized_grapics, size);
if(gameObj1.use_image_diff==1)
{
if(frame_g==0)
{
pre_grap = Scalar(0.0f);
}
else
{
diff_grap = pre_grap - resized_grapics;
pre_grap = resized_grapics.clone();///Used to calculate diff_grap For next frame
}
//diff_grap = diff_grap + 0.5f;
}
///=============== Forward data for this frame ==================
///Make the Dot product to this frames hidden nodes and output node
for(int i=0; i<Nr_of_hidden_nodes; i++)
{
hidden_node[frame_g * Nr_of_hidden_nodes + i] = 0.0f;///Start with clear this value before sum up the dot product
dot_product = 0.0f;///Start with clear this value before sum up the dot product
for(int j=0; j<(pixel_height * pixel_width); j++)
{
ix = ((pixel_width * Nr_of_hidden_nodes) * (pixel_height * frame_g + j/pixel_width) + (pixel_width * i) + j%pixel_width);///Prepare the index to point on the right place in the weight matrix pix2hid_weightB[]
if(gameObj1.use_image_diff==1)
{
index_ptr_diff_grap = zero_ptr_diff_grap + j;///Prepare the pointer address to point on the right place on the grapical image of this grapical frame
dot_product += pix2hid_weightB[ix] * (*index_ptr_diff_grap);///Make the dot product of Weights * Game grapichs
input_node_stored[pixel_width * pixel_height * frame_g + j] = (*index_ptr_diff_grap);///Save this grame grapich pixel must read this pixel later when update weights
}
else
{
index_ptr_res_grap = zero_ptr_res_grap + j;///Prepare the pointer address to point on the right place on the grapical image of this grapical frame
dot_product += pix2hid_weightB[ix] * (*index_ptr_res_grap);///Make the dot product of Weights * Game grapichs
if(use_noise_image==1 && i==0)
{
float noise;
noise = ((float) (rand() % 65535) / 65536) ;
if(noise < noise_thres)
{
(*index_ptr_res_grap) = ((float) (rand() % 65535) / 65536);
}
}
input_node_stored[pixel_width * pixel_height * frame_g + j] = (*index_ptr_res_grap);///Save this grame grapich pixel must read this pixel later when update weights
}
}
///Relu this dot product
if(dropoutHidden[(frame_g * Nr_of_hidden_nodes + i)] == 0)
{
///Normal forward not drop out this node
dot_product = relu(dot_product);
}
else
{
dot_product = 0.0f;
}
hidden_node[frame_g * Nr_of_hidden_nodes + i] = dot_product;///Put this finnish data in the hidden node neuron
ix = (frame_g * Nr_of_hidden_nodes + i);
output_node[frame_g] += hid2out_weightB[ix] * dot_product;///Take this hidden node data how is for the moment => hidden_node[frame_g * Nr_of_hidden_nodes + i] = dot_product;
}
output_node[frame_g] = 1.0/(1.0 + exp(-(output_node[frame_g])));///Sigmoid function. x = 1.0/(1.0 + exp(-(x)))
///=============== End Forward data for this frame ==================
imshow("resized_grapics", resized_grapics);/// resize(src, dst, size);
if(gameObj1.use_image_diff==1)
{
imshow("diff", diff_grap);
}
waitKey(1);
///=============== Made the action from stored best action serie =======
if(best_actions[frame_g] == 0.0f)
{
action[frame_g] = 0.0f;
gameObj1.move_up = 0;
}
if(best_actions[frame_g] == 1.0f)
{
action[frame_g] = 1.0f;
gameObj1.move_up = 1;
}
if(best_actions[frame_g] == 0.5f)
{
action[frame_g] = 0.5f;
gameObj1.move_up = 2;
}
///================= End Action ==================
}
///---------------------------End Replay best serie -------------------------------------------------
printf("Best series was replayed\n");
gameObj1.replay_episode = 0;
rewards = highest_rewards;
printf("replayed rewards = %f\n", rewards);
}
highest_rewards = -1000.0f;///clear to next series of run
}
if(gameObj1.replay_count == gameObj1.replay_times)
{
///================== Make the backprop now when the hole episode is done ==============
/// for(int frame_g=0; frame_g<gameObj1.nr_of_frames; frame_g++) ///This loop thoue will only go thorue here to make backpropagate (not play game in the gameObj1)
for(int frame_g = gameObj1.nr_of_frames; frame_g>0; true) ///This loop thoue will only go thorue here to make backpropagate (not play game in the gameObj1)
{
frame_g--;
/// ***** Make a pseudo target value = action[frame_g] ******
///This is the cool stuff about Reinforcment Learning
///You pruduce a pseudo target value = action[frame_g] because then you can imidiet generate a gradient decent for this frame
///even if you don't know yet if this pseudo target is the right (in this case have right polarity +/- only because actions is only 2 = UP/DOWN )
///When the episode is over you can fix this eventually wrong +/- by the rewards
output_delta[frame_g] = (action[frame_g] - output_node[frame_g]) * output_node[frame_g] * (1.0f - output_node[frame_g]);///Backpropagate. Make a gradient decent for this frame even if it may have wrong polarity
for(int i=0; i<Nr_of_hidden_nodes; i++)
{
///**** Backprop delta_hid ****
///delta_hid = delta_out * output_weight[1];
ix = (frame_g * Nr_of_hidden_nodes + i);
if(dropoutHidden[frame_g * Nr_of_hidden_nodes + i] == 0)
{
hidden_delta[frame_g * Nr_of_hidden_nodes + i] = output_delta[frame_g] * hid2out_weightB[ix];///Relu Backprop to hidden delta
}
else
{
hidden_delta[frame_g * Nr_of_hidden_nodes + i] = 0.0f;/// Hidden node delta zero when drop out no change of the weight regarding this backprop
}
///===== Update weights depend on the stored delta ========
for(int j=0; j<(pixel_height * pixel_width); j++)
{
ix = ((pixel_width * Nr_of_hidden_nodes) * (pixel_height * frame_g + j/pixel_width) + (pixel_width * i) + j%pixel_width);
pix2hid_weightB[ix] += hidden_delta[frame_g * Nr_of_hidden_nodes + i] * input_node_stored[pixel_width * pixel_height * frame_g + j] * pix2hid_learning_rate * rewards;/// input_node_stored = new float[pixel_width * pixel_height * gameObj1.nr_of_frames];
}
ix = (frame_g * Nr_of_hidden_nodes + i);
hid2out_weightB[ix] += output_delta[frame_g] * hidden_node[frame_g * Nr_of_hidden_nodes + i] * hid2out_learning_rate * rewards;///Update weights
///=============End update weights for this position ==============
}
///printf("Rewards %f\n", rewards);
rewards *= gamma;
}
///=================== End Backprop ====================================================
waitKey(1);
}
}
return 0;
}
#ifndef PINBALL_GAME_H
#define PINBALL_GAME_H
///Increase size of angle character and sign of angle so the Agent cas see this rezized char and sign +/- then it seems to learn that.
#include <opencv2/highgui/highgui.hpp> // OpenCV window I/O
#include <opencv2/imgproc/imgproc.hpp> // Gaussian Blur
#include <stdio.h>
///#include <raspicam/raspicam_cv.h>
#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp> // Basic OpenCV structures (cv::Mat, Scalar)
#include <cstdlib>
#include <ctime>
#include <math.h> // exp
#include <stdlib.h>// exit(0);
#include <iostream>
using namespace std;
using namespace cv;
class pinball_game
{
public:
pinball_game()///Constructor
{
printf("Construct a arcade game object\n");
}
virtual ~pinball_game()///Destructor
{
printf("Destruct game object\n");
}
///== User settings used outside this object but Set by this object =============
int use_character;///1=enable charackters
int enable_ball_swan;///1= enbale swan after the ball
int use_image_diff;
int high_precition_mode; ///This will make adjustable rewards highest at center of the pad.
int use_dice_action;
int drop_out_percent;
int Not_dropout;
int flip_reward_sign;
int print_out_nodes;
int replay_times;///If =0 no replay. >0 this is the nuber of replay with serveral diffrent actions so the ageint take the best rewards before make any weights update
int replay_count;//
int replay_episode;
float pix2hid_learning_rate;
float hid2out_learning_rate;
float max_rewards;
int enabel_3_state;///1= then enable 3 actions UP, DOWN, STOP
///=== End user settings ========
int game_Width;///Pixel width of game grapics. A constant value set in init_game
int game_Height;///Pixel height of game grapics. A constant value set in init_game
int move_up;///Input Action from Agent. 1= Move up pad. 0= Move down pad. 2= STOP used only when enabel_3_state = 1
int win_this_game;///1= Catched the ball this episode. 0= miss. This will be used as the reward feedback to the Reinforcment Learning
int pad_ball_diff;
int nr_of_frames;///The number of frames on one episode. A constant value set in init_game
int slow_motion;///1= slow down speed of game 0= full speed
Mat gameGrapics;///This is the grapics of the game how is the enviroment
int episode;///Episode is only use to ensure no pattern in randomizer
void init_game(void);
void start_episode(void);
void run_episode(void);
void set_user_settings(void);
int frame;
protected:
private:
int pad_position;
int ball_pos_x;
int ball_pos_y;
float ball_angle_derivate;///Example 0 mean strigh forward. +1.0 mean ball go up 1 pixel on one frame 45 deg.
float save_replay_start_ball_ang;
int frame_steps;
int ball_offset_y;///
CvPoint P1;///The ball point OpenCV
CvPoint P2;///The pad point uppe corner OpenCV
CvPoint P3;///The pad point lower corner OpenCV
};
void pinball_game::set_user_settings(void)
{
char answer_character;
getchar();
printf("Do you want to use the differance between last pre and now image as input <Y>/<N> \n");
printf("Example use <N>\n");
answer_character = getchar();
if(answer_character == 'Y' || answer_character == 'y')
{
use_image_diff=1;
}
else
{
use_image_diff=0;
}
getchar();
printf("Do you print out output node values <Y>/<N> \n");
printf("Example use <N>\n");
answer_character = getchar();
if(answer_character == 'Y' || answer_character == 'y')
{
print_out_nodes=1;
}
else
{
print_out_nodes=0;
}
getchar();
printf("Do you want LOW precition mode (only -1 ot +4 arwards) <Y>/<N> \n");
printf("Example use <N>\n");
answer_character = getchar();
if(answer_character == 'Y' || answer_character == 'y')
{
high_precition_mode=0;
}
else
{
high_precition_mode=1;///Now the rewards be depend on how good centerd the ball was catched.
}
getchar();
printf("Do you want to use Action made from dice AND policy network <Y>/<N> \n");
printf("Example use <Y>\n");
answer_character = getchar();
if(answer_character == 'Y' || answer_character == 'y')
{
use_dice_action=1;
}
else
{
use_dice_action=0;
}
getchar();
printf("Do you want to use dropout at hidden node <Y>/<N> \n");
printf("Example use <N>\n");
answer_character = getchar();
if(answer_character == 'Y' || answer_character == 'y')
{
Not_dropout=0;
printf("Enter (int) drop_out_percent (drop_out_percent =%d)\n", drop_out_percent);
scanf("%d", &drop_out_percent);
}
else
{
Not_dropout=1;
drop_out_percent=0;
}
getchar();
printf("Do you want flip sign of Awarwd <Y>/<N> \n");
printf("Example use <N>\n");
answer_character = getchar();
if(answer_character == 'Y' || answer_character == 'y')
{
flip_reward_sign =1;
}
else
{
flip_reward_sign =0;
}
getchar();
printf("Do you want to enable swan after the ball <Y>/<N> \n");
printf("Example use <Y>\n");
answer_character = getchar();
if(answer_character == 'Y' || answer_character == 'y')
{
enable_ball_swan =1;
}
else
{
enable_ball_swan =0;
}
getchar();
printf("Do you want to enable extra characters on the graphics <Y>/<N> \n");
printf("Example use <Y>\n");
answer_character = getchar();
if(answer_character == 'Y' || answer_character == 'y')
{
use_character =1;
}
else
{
use_character =0;
}
getchar();
printf("Do you want to use default learining rate and max rewards parameters <Y>/<N> \n");
printf("Example use <Y>\n");
answer_character = getchar();
if(answer_character == 'Y' || answer_character == 'y')
{
///Default Learning rate
printf("Default learning rate and default max rewards used\n");
}
else
{
printf("Enter (float) pix2hid_learning_rate (default was =%f)\n", pix2hid_learning_rate);
scanf("%f", &pix2hid_learning_rate);
printf("Enter (float) hid2out_learning_rate (default was =%f)\n", hid2out_learning_rate);
scanf("%f", &hid2out_learning_rate);
printf("Enter (float) max_rewards (default was =%f)\n", max_rewards);
scanf("%f", &max_rewards);
}
getchar();
printf("Do you want to use replay same ball direction several times and only use bets rewards <Y>/<N> \n");
printf("Example use <N>\n");
answer_character = getchar();
if(answer_character == 'Y' || answer_character == 'y')
{
printf("Enter (int) replay_times (default was =%d)\n", replay_times);
scanf("%d", &replay_times);
}
else
{
replay_times = 0;
printf("No replay. replay_times =%d\n", replay_times);
}
getchar();
printf("Do you want to enable 3 state action UP, DOWN and STOP <Y>/<N> \n");
printf("Example use <N>\n");
answer_character = getchar();
if(answer_character == 'Y' || answer_character == 'y')
{
enabel_3_state =1;
}
else
{
enabel_3_state =0;
}
getchar();
printf("********* You have enter this settings ***********\n");
printf("pix2hid_learning_rate =%f \n", pix2hid_learning_rate);
printf("hid2out_learning_rate =%f \n", hid2out_learning_rate);
printf("max_rewards =%f \n", max_rewards);
printf("Not_dropout =%d \n", Not_dropout);
printf("drop_out_percent =%d \n", drop_out_percent);
printf("use_character =%d \n", use_character);
printf("enable_ball_swan =%d \n", enable_ball_swan);
printf("use_image_diff =%d\n", use_image_diff);
printf("high_precition_mode =%d\n", high_precition_mode);
printf("use_dice_action =%d\n", use_dice_action);
printf("flip_reward_sign =%d\n", flip_reward_sign);
printf("print_out_nodes =%d\n", print_out_nodes);
printf("enabel_3_state =%d\n", enabel_3_state);
printf("replay_times =%d\n", replay_times);
printf("********* Good luck with your settings :) ***********\n");
}
void pinball_game::init_game(void)
{
replay_count=0;//Fix this missing bugg 2017-11-15
use_character=0;
enable_ball_swan=0;///default no swan
slow_motion=0;
move_up=0;///Init down if there was no Agent action done.
win_this_game=0;///Init
frame=0;///Init with frame 0 for the episode
pad_position = game_Height/2;///Start the game at center
game_Width = 220;///
game_Height = 200;///
nr_of_frames = 100;///
gameGrapics.create(game_Height, game_Width, CV_32FC1);
gameGrapics = Scalar(0.0f);///Init with Black
srand (static_cast <unsigned> (time(0)));///Seed the randomizer
ball_angle_derivate = (float) (rand() % 65535) / 65536;///Set ball (here only first time) shoot angle. Random value 0..1.0 range
}
void pinball_game::start_episode(void)
{
if(replay_episode ==0)
{
if(replay_count < replay_times)
{
replay_count++;
ball_angle_derivate = save_replay_start_ball_ang;
}
else
{
replay_count = 0;
ball_angle_derivate = (float) (rand() % 65535) / 65536;///Set ball shoot angle. Random value 0..1.0 range
save_replay_start_ball_ang = ball_angle_derivate;
}
}
else
{
ball_angle_derivate = save_replay_start_ball_ang;
}
ball_angle_derivate *= 6.0;
ball_angle_derivate -= 3.0;/// -0.5..+0.5 will mean +/- 12.5 deg random ball angle
frame_steps=0;
ball_offset_y = game_Height/2;///
pad_position = game_Height/2;///Start the game at center
}
void pinball_game::run_episode(void)
{
int circle_zize = 5;
int ball_start_x = 10;///Start 10 pixel inside game plan
int y_bounce_constraints = 20;
int y_pad_constraints = 28;
int pad_width = 3;
int pad_height = 40;
int pad_speed = 4;//4
///The frame loop is outside this class so The Agient cad do actions each frame step
frame_steps++;///This is need to handle bounce. This will reset when bounce
///=========== Draw the ball =================
///Bounce handle
ball_pos_y = ((int) ((float)frame_steps * ball_angle_derivate)) + ball_offset_y;///
if(ball_pos_y > (game_Height-y_bounce_constraints) || ball_pos_y < y_bounce_constraints)
{
frame_steps=0;
ball_angle_derivate = -ball_angle_derivate;
float bounce_extra;
if(ball_angle_derivate<0.0f)
{
bounce_extra = -1.0f;///This ensure bounce even if ball_angle_derivate is less then +/-1.0 angle
}
else
{
bounce_extra = 1.0f;///This ensure bounce even if ball_angle_derivate is less then +/-1.0 angle
}
ball_offset_y = ball_pos_y + (ball_angle_derivate+bounce_extra);
}
ball_pos_x = (frame * 2) + ball_start_x;///Take 2 pixel step forward
P1.x = ball_pos_x;///Set to control grapic OpenCV circle() below
P1.y = ball_pos_y;///Set to control grapic OpenCV circle() below
if(enable_ball_swan==1)
{
float *index_ptr_gameGapics = gameGrapics.ptr<float>(0);
for(int i=0; i<gameGrapics.rows * gameGrapics.cols; i++)
{
*index_ptr_gameGapics = *index_ptr_gameGapics * 0.85f;
index_ptr_gameGapics++;
}
P2.y = 30;
P3.y = 50;
P2.x = 0;
P3.x = 20;
rectangle(gameGrapics, P2, P3, Scalar(0.0), 15);/// Erease characters
P2.y = 0;
P3.y = 20;
P2.x = 0;
P3.x = 20;
rectangle(gameGrapics, P2, P3, Scalar(0.0), 15);/// Erease characters
P2.y = 0;
P3.y = game_Height;
P2.x = game_Width-10;
P3.x = game_Width-10;
rectangle(gameGrapics, P2, P3, Scalar(0.0), 2);/// Erase old pad
}
else
{
gameGrapics = Scalar(0.05f);///Begin with all black then draw up grapics ball and pad
}
circle(gameGrapics, P1, circle_zize, Scalar(0.9), 7, -1);///Ball size 7
///===============================
///========= Draw the Pad ==============
if(!(frame > nr_of_frames-2))
{
if(pad_position > (game_Height-y_pad_constraints))
{
///Allow only move up
if(move_up==1)
{
pad_position = pad_position - pad_speed;
}
else if(move_up==0)
{
pad_position = (game_Height-y_pad_constraints) + pad_speed;
}
else
{
///STOP move only used when enable_3_state = 1
}
}
else if(pad_position < y_pad_constraints)
{
///Allow only move down
if(move_up==0)
{
pad_position = pad_position + pad_speed;
}
else if(move_up==0)
{
pad_position = (y_pad_constraints) - pad_speed;
}
else
{
///STOP move only used when enable_3_state = 1
}
}
else
{
///Move up or down
if(move_up==1)
{
pad_position = pad_position - pad_speed;
}
else if(move_up==0)
{
pad_position = pad_position + pad_speed;
}
else
{
///STOP move only used when enable_3_state = 1
}
}
}
P2.y = pad_position - (pad_height/2);
P3.y = pad_position + (pad_height/2);
P2.x = - (pad_width/2) + game_Width-10;
P3.x = (pad_width/2) + game_Width-10;
rectangle(gameGrapics, P2, P3, Scalar(0.8), 2);/// C++: void rectangle(Mat& img, Point pt1, Point pt2, const Scalar& color, int thickness=1, int lineType=8, int shift=0)
if(frame > nr_of_frames-2)
{
///This episode is over
///Is the pad catch the ball ??
if(((pad_position + (pad_height/2)) < ball_pos_y) || ((pad_position - (pad_height/2)) > ball_pos_y))
{
///Lose
win_this_game = 0;
printf("Lose \n");
}
else
{
///Win catced
win_this_game = 1;
printf("Win \n");
}
pad_ball_diff = pad_position - ball_pos_y;
}
if(use_character==1)
{
char episode_char = ((char) episode);
char ball_ang_char;
string angle = "xx";
std::string::iterator It = angle.begin();
if(ball_angle_derivate < 0.0)
{
*It = '-';
ball_ang_char = (char) (-ball_angle_derivate*4.2);
}
else
{
*It = '+';
ball_ang_char = (char) (ball_angle_derivate*4.2);
}
It++;
///ball_ang_char *= 10;
*It = ball_ang_char+48;
/// cv::putText(gameGrapics, angle, cvPoint((5+episode_char/20),(175+(char) (rand() % 16))), CV_FONT_HERSHEY_PLAIN, 2, cvScalar(55),2);
cv::putText(gameGrapics, angle, cvPoint((5),(175)), CV_FONT_HERSHEY_PLAIN, 4, cvScalar(0.5),3);
/// char rand_char = rand() % 255;
string random = "x";
std::string::iterator It2 = random.begin();
if(episode_char < 40)
{
episode_char = episode_char + 40;
}
*It2 = episode_char ;///Episode is only use to ensure no pattern in randomizer
cv::putText(gameGrapics, random, cvPoint((3+episode_char/20),(35+((char) (rand() % 16)))), CV_FONT_HERSHEY_PLAIN, 2, cvScalar(0.5),2);
}
imshow("Game", gameGrapics);
if(slow_motion==1)
{
waitKey(20);///Wait 100msec
}
else
{
waitKey(1);///Wait 1msec for only OpenCV grapics
}
}
#endif // PINBALL_GAME_H
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment