feat: added mini-batch to AI
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@ -3,9 +3,10 @@
|
||||
#include <err.h>
|
||||
#include <string.h>
|
||||
#include "../Application/ApplicationUtils.h"
|
||||
#include "../Image/ImageUtils.h"
|
||||
#include "neural_utils.h"
|
||||
|
||||
void network_train(neural_network* network, char* training_data_dir, char* save_path, size_t iteration, size_t warmup, size_t warmup_iteration, double learning_rate, size_t AdaFactor)
|
||||
void network_train(neural_network* network, char* training_data_dir, char* save_path, double batch_pourcent, size_t iteration, size_t warmup, size_t warmup_iteration, double learning_rate, size_t AdaFactor)
|
||||
{
|
||||
network->nb_input = 169;
|
||||
network->hidden_height = 30;
|
||||
@ -18,21 +19,26 @@ void network_train(neural_network* network, char* training_data_dir, char* save_
|
||||
|
||||
training_data* training_datas = load_dataset(training_data_dir, AdaFactor, &data_len);
|
||||
|
||||
train_network(network, training_datas, data_len, learning_rate, warmup, warmup_iteration, iteration);
|
||||
size_t batch_size = (size_t)(data_len * batch_pourcent);
|
||||
|
||||
train_network(network, training_datas, data_len, learning_rate, batch_size, warmup, warmup_iteration, iteration);
|
||||
|
||||
printf("Final network cost: %f\n", get_network_total_cost(network, training_datas, data_len));
|
||||
printf("Final success rate: %i\n", (int)(get_network_success_rate(network, training_datas, data_len, AdaFactor) * 100.0));
|
||||
|
||||
save_neural_network(network, save_path);
|
||||
}
|
||||
|
||||
void network_use(neural_network* network, char** inputs)
|
||||
void network_use(neural_network* network, double* inputs)
|
||||
{
|
||||
for (size_t i = 0; i < network->nb_input; i++)
|
||||
{
|
||||
network->inputs[i].activation = atof(inputs[i]);
|
||||
network->inputs[i].activation = inputs[i];
|
||||
}
|
||||
|
||||
process_network(network);
|
||||
|
||||
printf("Predicted character: %c", get_network_char_prediction(network, network->nb_output / 26));
|
||||
printf("Predicted character: %c\n", get_network_char_prediction(network, network->nb_output / 26));
|
||||
}
|
||||
|
||||
void network_main(int argc, char* argv[])
|
||||
@ -47,49 +53,45 @@ void network_main(int argc, char* argv[])
|
||||
|
||||
neural_network network;
|
||||
|
||||
if (strcmp(action, "train") == 0) //train network: ./network train <network.csv> <data directory> <iterations> <warmup> <warmup iterations> [learning_rate] [AdaFactor]
|
||||
if (strcmp(action, "train") == 0) //train network: ./network train <network.csv> <data directory> <batch pourcent> <iterations> <warmup> <warmup iterations> [learning_rate] [AdaFactor]
|
||||
{
|
||||
if (argc < 7)
|
||||
errx(EXIT_FAILURE, "missing arguments, usage: ./network train <network.csv> <data directory> <iterations> <warmup> <warmup iterations> [learning_rate] [AdaFactor]");
|
||||
if (argc < 8)
|
||||
errx(EXIT_FAILURE, "missing arguments, usage: ./network train <network.csv> <data directory> <batch pourcent> <iterations> <warmup> <warmup iterations> [learning_rate] [AdaFactor]");
|
||||
|
||||
char* network_path = combine_path(network_application_directory, argv[2]);
|
||||
|
||||
char* data_dir = combine_path(network_application_directory, argv[3]);
|
||||
size_t iterations = (size_t)atoi(argv[4]);
|
||||
size_t warmup = (size_t)atoi(argv[5]);
|
||||
size_t warmup_iterations = (size_t)atoi(argv[6]);
|
||||
double batch_pourcent = atof(argv[4]);
|
||||
if (batch_pourcent > 1)
|
||||
errx(EXIT_FAILURE, "invalid argument: <batch_pourcent> must be between 0 and 1");
|
||||
size_t iterations = (size_t)atoi(argv[5]);
|
||||
size_t warmup = (size_t)atoi(argv[6]);
|
||||
size_t warmup_iterations = (size_t)atoi(argv[7]);
|
||||
|
||||
double learning_rate = 0.1;
|
||||
if (argc > 7)
|
||||
learning_rate = atof(argv[7]);
|
||||
if (argc > 8)
|
||||
learning_rate = atof(argv[8]);
|
||||
|
||||
size_t AdaFactor = 1;
|
||||
if (argc > 8)
|
||||
AdaFactor = (size_t)atoi(argv[8]);
|
||||
if (argc > 9)
|
||||
AdaFactor = (size_t)atoi(argv[9]);
|
||||
|
||||
network_train(&network, data_dir, network_path, iterations, warmup, warmup_iterations, learning_rate, AdaFactor);
|
||||
network_train(&network, data_dir, network_path, batch_pourcent, iterations, warmup, warmup_iterations, learning_rate, AdaFactor);
|
||||
}
|
||||
else if (strcmp(action, "use") == 0) //use network: ./network use <network.csv> input1,input2,...,inputx
|
||||
else if (strcmp(action, "use") == 0) //use network: ./network use <network.csv> <image path>
|
||||
{
|
||||
if (argc < 3)
|
||||
errx(EXIT_FAILURE, "missing arguments, usage: ./network use <network.csv> input1,input2,...,inputx");
|
||||
errx(EXIT_FAILURE, "missing arguments, usage: ./network use <network.csv> <image path>");
|
||||
|
||||
char* network_path = combine_path(network_application_directory, argv[2]);
|
||||
|
||||
char* input_str = argv[3];
|
||||
|
||||
size_t nb_input = 0;
|
||||
|
||||
char** input_array = string_split(input_str, ',', &nb_input);
|
||||
char* input_path = combine_path(network_application_directory, argv[3]);
|
||||
|
||||
neural_network network;
|
||||
|
||||
load_neural_network(&network, read_file(network_path));
|
||||
|
||||
if (nb_input != network.nb_input)
|
||||
errx(EXIT_FAILURE, "inputs are not valid");
|
||||
|
||||
network_use(&network, input_array);
|
||||
network_use(&network, image_to_bool_array(input_path));
|
||||
}
|
||||
|
||||
/*printf("Succes rate: %i\n", (int)(get_network_success_rate(&network, training_datas, data_len, 1) * 100));
|
||||
|
@ -146,6 +146,46 @@ void free_neural_network(neural_network* network)
|
||||
free(network->outputs);
|
||||
}
|
||||
|
||||
void reset_neural_network(neural_network* network)
|
||||
{
|
||||
for (size_t i = 0; i < network->nb_input; i++)
|
||||
{
|
||||
network->inputs[i].activation = 0;
|
||||
network->inputs[i].activation_input = 0;
|
||||
network->inputs[i].bias = 0;
|
||||
network->inputs[i].local_gradient = 0;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < network->nb_output; i++)
|
||||
{
|
||||
network->outputs[i].activation = 0;
|
||||
network->outputs[i].activation_input = 0;
|
||||
network->outputs[i].bias = 0;
|
||||
network->outputs[i].local_gradient = 0;
|
||||
|
||||
for (size_t h = 0; h < network->hidden_height; h++)
|
||||
{
|
||||
network->outputs[i].weights[h] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t x = 0; x < network->hidden_depth; x++)
|
||||
{
|
||||
for (size_t y = 0; y < network->nb_output; y++)
|
||||
{
|
||||
network->hidden[x][y].activation = 0;
|
||||
network->hidden[x][y].activation_input = 0;
|
||||
network->hidden[x][y].bias = 0;
|
||||
network->hidden[x][y].local_gradient = 0;
|
||||
|
||||
for (size_t h = 0; h < (x == 0 ? network->nb_input : network->hidden_height); h++)
|
||||
{
|
||||
network->hidden[x][y].weights[h] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void save_neural_network(neural_network* network, const char* file_path)
|
||||
{
|
||||
FILE *fptr;
|
||||
@ -409,6 +449,19 @@ double get_network_cost(neural_network* network, training_data expected_data)
|
||||
return 1.0/2.0 * cost;
|
||||
}
|
||||
|
||||
void shuffle_dataset(training_data* datas, size_t data_len, size_t nb_shuffle)
|
||||
{
|
||||
for (size_t i = 0; i < nb_shuffle; i++)
|
||||
{
|
||||
size_t first = rand() % data_len;
|
||||
size_t second = rand() % data_len;
|
||||
|
||||
training_data temp = datas[first];
|
||||
datas[first] = datas[second];
|
||||
datas[second] = temp;
|
||||
}
|
||||
}
|
||||
|
||||
double calculate_hidden_local_gradiant(neural_network* network, size_t x, size_t y)
|
||||
{
|
||||
neuron* curr = &network->hidden[x][y];
|
||||
@ -437,13 +490,14 @@ double calculate_hidden_local_gradiant(neural_network* network, size_t x, size_t
|
||||
return local_gradient_sum * sigmoid_derivate(curr->activation);
|
||||
}
|
||||
|
||||
void network_hidden_back_propagation(neural_network* network, double learning_rate)
|
||||
void network_hidden_calculate_propagation(neural_network* network, neural_network* memory_network)
|
||||
{
|
||||
for (long int x = ((long int)network->hidden_depth-1); x >= 0; x--)
|
||||
{
|
||||
for (size_t y = 0; y < network->hidden_height; y++)
|
||||
{
|
||||
neuron* curr = &network->hidden[x][y];
|
||||
neuron* curr_mem = &memory_network->hidden[x][y];
|
||||
|
||||
curr->local_gradient = calculate_hidden_local_gradiant(network, x, y);
|
||||
|
||||
@ -455,7 +509,7 @@ void network_hidden_back_propagation(neural_network* network, double learning_ra
|
||||
|
||||
double total_error_w = curr->local_gradient * connect->activation;
|
||||
|
||||
curr->weights[h] -= learning_rate * total_error_w;
|
||||
curr_mem->weights[h] += total_error_w;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -466,22 +520,23 @@ void network_hidden_back_propagation(neural_network* network, double learning_ra
|
||||
|
||||
double total_error_w = curr->local_gradient * connect->activation;
|
||||
|
||||
curr->weights[h] -= learning_rate * total_error_w;
|
||||
curr_mem->weights[h] += total_error_w;
|
||||
}
|
||||
}
|
||||
|
||||
curr->bias -= learning_rate * curr->local_gradient;
|
||||
curr_mem->bias += curr->local_gradient;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void network_output_back_propagation(neural_network* network, double* expected_data, double learning_rate)
|
||||
void network_output_calculate_propagation(neural_network* network, neural_network* memory_network, training_data data)
|
||||
{
|
||||
for (size_t i = 0; i < network->nb_output; i++)
|
||||
{
|
||||
neuron* curr = &network->outputs[i];
|
||||
neuron* curr_mem = &memory_network->outputs[i];
|
||||
|
||||
curr->local_gradient = (curr->activation - expected_data[i]) * sigmoid_derivate(curr->activation_input);
|
||||
curr->local_gradient = (curr->activation - data.outputs[i]) * sigmoid_derivate(curr->activation_input);
|
||||
|
||||
for (size_t h = 0; h < network->hidden_height; h++)
|
||||
{
|
||||
@ -489,17 +544,77 @@ void network_output_back_propagation(neural_network* network, double* expected_d
|
||||
|
||||
double total_error_w = curr->local_gradient * connect->activation;
|
||||
|
||||
curr->weights[h] -= learning_rate * total_error_w;
|
||||
curr_mem->weights[h] += total_error_w;
|
||||
}
|
||||
|
||||
curr->bias -= learning_rate * curr->local_gradient;
|
||||
curr_mem->bias += curr->local_gradient;
|
||||
}
|
||||
}
|
||||
|
||||
void network_back_propagation(neural_network* network, double* expected_data, double learning_rate)
|
||||
void network_hidden_apply_propagation(neural_network* network, neural_network* memory_network, size_t batch_size, double learning_rate)
|
||||
{
|
||||
network_output_back_propagation(network, expected_data, learning_rate);
|
||||
network_hidden_back_propagation(network, learning_rate);
|
||||
for (long int x = ((long int)network->hidden_depth-1); x >= 0; x--)
|
||||
{
|
||||
for (size_t y = 0; y < network->hidden_height; y++)
|
||||
{
|
||||
neuron* curr = &network->hidden[x][y];
|
||||
neuron* curr_mem = &memory_network->hidden[x][y];
|
||||
|
||||
if (x == 0)
|
||||
{
|
||||
for (size_t h = 0; h < network->nb_input; h++)
|
||||
{
|
||||
double total_error_w = curr_mem->weights[h] / (double)batch_size;
|
||||
|
||||
curr->weights[h] -= learning_rate * total_error_w;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t h = 0; h < network->hidden_height; h++)
|
||||
{
|
||||
double total_error_w = curr_mem->weights[h] / (double)batch_size;
|
||||
|
||||
curr->weights[h] -= learning_rate * total_error_w;
|
||||
}
|
||||
}
|
||||
|
||||
curr->bias -= learning_rate * (curr_mem->local_gradient / (double) batch_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void network_output_apply_propagation(neural_network* network, neural_network* memory_network, size_t batch_size, double learning_rate)
|
||||
{
|
||||
for (size_t i = 0; i < network->nb_output; i++)
|
||||
{
|
||||
neuron* curr = &network->outputs[i];
|
||||
neuron* curr_mem = &memory_network->outputs[i];
|
||||
|
||||
for (size_t h = 0; h < network->hidden_height; h++)
|
||||
{
|
||||
double total_error_w = curr_mem->weights[h] / (double)batch_size;
|
||||
|
||||
curr->weights[h] -= learning_rate * total_error_w;
|
||||
}
|
||||
|
||||
curr->bias -= learning_rate * (curr_mem->local_gradient / (double)batch_size);
|
||||
}
|
||||
}
|
||||
|
||||
void network_back_propagation(neural_network* network, neural_network* memory_network, training_data* datas, size_t data_len, double learning_rate)
|
||||
{
|
||||
for (size_t i = 0; i < data_len; i++)
|
||||
{
|
||||
network_set_input_data(network, datas[i]);
|
||||
|
||||
process_network(network);
|
||||
|
||||
network_output_calculate_propagation(network, memory_network, datas[i]);
|
||||
network_hidden_calculate_propagation(network, memory_network);
|
||||
}
|
||||
network_output_apply_propagation(network, memory_network, data_len, learning_rate);
|
||||
network_hidden_apply_propagation(network, memory_network, data_len, learning_rate);
|
||||
}
|
||||
|
||||
void network_set_input_data(neural_network* network, training_data data)
|
||||
@ -510,28 +625,37 @@ void network_set_input_data(neural_network* network, training_data data)
|
||||
}
|
||||
}
|
||||
|
||||
void network_train_data(neural_network* network, training_data data, double learning_rate, double* cost)
|
||||
void network_train_data(neural_network* network, neural_network* memory_network, training_data* datas, size_t data_len, double learning_rate, double* cost)
|
||||
{
|
||||
network_set_input_data(network, data);
|
||||
|
||||
process_network(network);
|
||||
|
||||
network_back_propagation(network, data.outputs, learning_rate);
|
||||
network_back_propagation(network, memory_network, datas, data_len, learning_rate);
|
||||
|
||||
if (cost != NULL)
|
||||
{
|
||||
process_network(network);
|
||||
|
||||
*cost = get_network_cost(network, data);
|
||||
*cost = get_network_total_cost(network, datas, data_len);
|
||||
}
|
||||
}
|
||||
|
||||
void network_process_epoche(neural_network* network, training_data* data, size_t data_len, double learning_rate, double* total_cost)
|
||||
void network_process_epoche(neural_network* network, neural_network* memory_network, training_data* data, size_t data_len, size_t batch_size, size_t nb_shuffle, double learning_rate, double* total_cost)
|
||||
{
|
||||
for (size_t i = 0; i < data_len; i++)
|
||||
size_t it = data_len / batch_size;
|
||||
if (data_len % batch_size > 0)
|
||||
it++;
|
||||
|
||||
shuffle_dataset(data, data_len, nb_shuffle);
|
||||
|
||||
for (size_t i = 0; i < it; i++)
|
||||
{
|
||||
reset_neural_network(memory_network);
|
||||
size_t current_batch_size;
|
||||
if (i == it - 1 && data_len % batch_size > 0)
|
||||
current_batch_size = data_len % batch_size;
|
||||
else
|
||||
current_batch_size = batch_size;
|
||||
|
||||
double i_cost;
|
||||
network_train_data(network, data[i], learning_rate, &i_cost);
|
||||
network_train_data(network, memory_network, data + (i * batch_size), current_batch_size, learning_rate, &i_cost);
|
||||
|
||||
if (total_cost != NULL)
|
||||
*total_cost += i_cost;
|
||||
@ -550,7 +674,7 @@ double get_network_total_cost(neural_network* network, training_data* datas, siz
|
||||
cost += get_network_cost(network, datas[i]);
|
||||
}
|
||||
|
||||
return cost;
|
||||
return cost / (double)data_len;
|
||||
}
|
||||
|
||||
char get_data_char_prediction(training_data data, size_t nb_output)
|
||||
@ -588,10 +712,19 @@ char get_network_char_prediction(neural_network* network, size_t AdaFactor)
|
||||
return res;
|
||||
}
|
||||
|
||||
void train_network(neural_network* network, training_data* datas, size_t data_len, float learning_rate, size_t warmup, size_t warmup_iterations, size_t iterations)
|
||||
void train_network(neural_network* network, training_data* datas, size_t data_len, float learning_rate, size_t batch_size, size_t warmup, size_t warmup_iterations, size_t iterations)
|
||||
{
|
||||
time_t start;
|
||||
|
||||
neural_network mem_network;
|
||||
|
||||
mem_network.nb_input = network->nb_input;
|
||||
mem_network.nb_output = network->nb_output;
|
||||
mem_network.hidden_depth = network->hidden_depth;
|
||||
mem_network.hidden_height = network->hidden_height;
|
||||
|
||||
init_neural_network(&mem_network);
|
||||
|
||||
if (warmup > 0)
|
||||
{
|
||||
printf("Warming uloading...\n");
|
||||
@ -618,7 +751,7 @@ void train_network(neural_network* network, training_data* datas, size_t data_le
|
||||
|
||||
for (size_t i = 0; i < warmup_iterations; i++)
|
||||
{
|
||||
network_process_epoche(&(networks[n]), datas, data_len, learning_rate, &cost);
|
||||
network_process_epoche(&(networks[n]), &mem_network, datas, data_len, batch_size, data_len / 2, learning_rate, &cost);
|
||||
if (warmup_iterations * warmup < 100 || (n * warmup_iterations + i) % ((warmup_iterations * warmup) / 100) == 0)
|
||||
{
|
||||
time_t time_pos = time(NULL);
|
||||
@ -652,7 +785,7 @@ void train_network(neural_network* network, training_data* datas, size_t data_le
|
||||
|
||||
for (size_t i = 0; i < iterations; i++)
|
||||
{
|
||||
network_process_epoche(network, datas, data_len, learning_rate, NULL);
|
||||
network_process_epoche(network, &mem_network, datas, data_len, batch_size, data_len / 2, learning_rate, NULL);
|
||||
|
||||
if (i % (iterations / 100) == 0) //Debug
|
||||
{
|
||||
@ -670,6 +803,8 @@ void train_network(neural_network* network, training_data* datas, size_t data_le
|
||||
//printf("NetCost: %f\n", get_network_cost(&network, datas, 4));
|
||||
}
|
||||
}
|
||||
|
||||
free_neural_network(&mem_network);
|
||||
}
|
||||
|
||||
double get_network_success_rate(neural_network* network, training_data* datas, size_t data_len, size_t AdaFactor)
|
||||
@ -737,7 +872,8 @@ training_data* load_dataset(const char* directory, size_t AdaFactor, size_t* nb_
|
||||
if (datas == NULL)
|
||||
errx(EXIT_FAILURE, "load_dataset: unable to malloc");
|
||||
|
||||
*nb_data = dataset_len;
|
||||
if (nb_data != NULL)
|
||||
*nb_data = dataset_len;
|
||||
|
||||
size_t data_id = 0;
|
||||
|
||||
|
@ -52,26 +52,24 @@ void process_network(neural_network* network);
|
||||
|
||||
double get_network_cost(neural_network* network, training_data expected_data);
|
||||
|
||||
void network_back_propagation(neural_network* network, double* expected_data, double learning_rate);
|
||||
|
||||
void network_process_epoche(neural_network* network, training_data* data, size_t data_len, double learning_rate, double* total_cost);
|
||||
void network_process_epoche(neural_network* network, neural_network* memory_network, training_data* data, size_t data_len, size_t batch_size, size_t nb_shuffle, double learning_rate, double* total_cost);
|
||||
|
||||
double get_network_total_cost(neural_network* network, training_data* datas, size_t data_len);
|
||||
|
||||
void train_network(neural_network* network, training_data* datas, size_t data_len, float learning_rate, size_t warmup, size_t warmup_iterations, size_t iterations);
|
||||
void train_network(neural_network* network, training_data* datas, size_t data_len, float learning_rate, size_t batch_size, size_t warmup, size_t warmup_iterations, size_t iterations);
|
||||
|
||||
double get_network_success_rate(neural_network* network, training_data* datas, size_t data_len, size_t AdaFactor);
|
||||
|
||||
training_data* load_dataset(const char* directory, size_t AdaFactor, size_t* nb_data);
|
||||
|
||||
char get_data_char_prediction(training_data data, size_t nb_output);
|
||||
|
||||
char get_network_char_prediction(neural_network* network, size_t AdaFactor);
|
||||
|
||||
void print_network_activations(neural_network* network);
|
||||
|
||||
void network_set_input_data(neural_network* network, training_data data);
|
||||
|
||||
void network_train_data(neural_network* network, training_data data, double learning_rate, double* cost);
|
||||
|
||||
void print_network_state(neural_network *network);
|
||||
|
||||
void print_training_debug(neural_network* network, training_data* data, size_t data_len);
|
||||
|
Reference in New Issue
Block a user