feat: added mini-batch to AI

2024-12-01 13:39:01 +01:00
parent 8b2d804c34
commit 3ca0c09d91
3 changed files with 195 additions and 59 deletions
--- a/src/utils/AI/network_main.c
+++ b/src/utils/AI/network_main.c
@ -3,9 +3,10 @@
 #include <err.h>
 #include <string.h>
 #include "../Application/ApplicationUtils.h"
+#include "../Image/ImageUtils.h"
 #include "neural_utils.h"

-void network_train(neural_network* network, char* training_data_dir, char* save_path, size_t iteration, size_t warmup, size_t warmup_iteration, double learning_rate, size_t AdaFactor)
+void network_train(neural_network* network, char* training_data_dir, char* save_path, double batch_pourcent, size_t iteration, size_t warmup, size_t warmup_iteration, double learning_rate, size_t AdaFactor)
 {
    network->nb_input = 169;
    network->hidden_height = 30;
@ -18,21 +19,26 @@ void network_train(neural_network* network, char* training_data_dir, char* save_

    training_data* training_datas = load_dataset(training_data_dir, AdaFactor, &data_len);

-    train_network(network, training_datas, data_len, learning_rate, warmup, warmup_iteration, iteration);
+    size_t batch_size = (size_t)(data_len * batch_pourcent);
+
+    train_network(network, training_datas, data_len, learning_rate, batch_size, warmup, warmup_iteration, iteration);
+
+    printf("Final network cost: %f\n", get_network_total_cost(network, training_datas, data_len));
+    printf("Final success rate: %i\n", (int)(get_network_success_rate(network, training_datas, data_len, AdaFactor) * 100.0));

    save_neural_network(network, save_path);
 }

-void network_use(neural_network* network, char** inputs)
+void network_use(neural_network* network, double* inputs)
 {
    for (size_t i = 0; i < network->nb_input; i++)
    {
-        network->inputs[i].activation = atof(inputs[i]);
+        network->inputs[i].activation = inputs[i];
    }

    process_network(network);

-    printf("Predicted character: %c", get_network_char_prediction(network, network->nb_output / 26));
+    printf("Predicted character: %c\n", get_network_char_prediction(network, network->nb_output / 26));
 }

 void network_main(int argc, char* argv[])
@ -47,49 +53,45 @@ void network_main(int argc, char* argv[])

    neural_network network;
    
-    if (strcmp(action, "train") == 0)       //train network: ./network train <network.csv> <data directory> <iterations> <warmup> <warmup iterations> [learning_rate] [AdaFactor]
+    if (strcmp(action, "train") == 0)       //train network: ./network train <network.csv> <data directory> <batch pourcent> <iterations> <warmup> <warmup iterations> [learning_rate] [AdaFactor]
    {
-        if (argc < 7)
-            errx(EXIT_FAILURE, "missing arguments, usage: ./network train <network.csv> <data directory> <iterations> <warmup> <warmup iterations> [learning_rate] [AdaFactor]");
+        if (argc < 8)
+            errx(EXIT_FAILURE, "missing arguments, usage: ./network train <network.csv> <data directory> <batch pourcent> <iterations> <warmup> <warmup iterations> [learning_rate] [AdaFactor]");
        
        char* network_path = combine_path(network_application_directory, argv[2]);

        char* data_dir = combine_path(network_application_directory, argv[3]);
-        size_t iterations = (size_t)atoi(argv[4]);
-        size_t warmup = (size_t)atoi(argv[5]);
-        size_t warmup_iterations = (size_t)atoi(argv[6]);
+        double batch_pourcent = atof(argv[4]);
+        if (batch_pourcent > 1)
+            errx(EXIT_FAILURE, "invalid argument: <batch_pourcent> must be between 0 and 1");
+        size_t iterations = (size_t)atoi(argv[5]);
+        size_t warmup = (size_t)atoi(argv[6]);
+        size_t warmup_iterations = (size_t)atoi(argv[7]);

        double learning_rate = 0.1;
-        if (argc > 7)
-            learning_rate = atof(argv[7]);
+        if (argc > 8)
+            learning_rate = atof(argv[8]);

        size_t AdaFactor = 1;
-        if (argc > 8)
-            AdaFactor = (size_t)atoi(argv[8]);
+        if (argc > 9)
+            AdaFactor = (size_t)atoi(argv[9]);

-        network_train(&network, data_dir, network_path, iterations, warmup, warmup_iterations, learning_rate, AdaFactor);
+        network_train(&network, data_dir, network_path, batch_pourcent, iterations, warmup, warmup_iterations, learning_rate, AdaFactor);
    }
-    else if (strcmp(action, "use") == 0)    //use network: ./network use <network.csv> input1,input2,...,inputx
+    else if (strcmp(action, "use") == 0)    //use network: ./network use <network.csv> <image path>
    {
        if (argc < 3)
-            errx(EXIT_FAILURE, "missing arguments, usage: ./network use <network.csv> input1,input2,...,inputx");
+            errx(EXIT_FAILURE, "missing arguments, usage: ./network use <network.csv> <image path>");

        char* network_path = combine_path(network_application_directory, argv[2]);

-        char* input_str = argv[3];
-
-        size_t nb_input = 0;
-
-        char** input_array = string_split(input_str, ',', &nb_input);
+        char* input_path = combine_path(network_application_directory, argv[3]);

        neural_network network;

        load_neural_network(&network, read_file(network_path));

-        if (nb_input != network.nb_input)
-            errx(EXIT_FAILURE, "inputs are not valid");
-
-        network_use(&network, input_array);
+        network_use(&network, image_to_bool_array(input_path));
    }

    /*printf("Succes rate: %i\n", (int)(get_network_success_rate(&network, training_datas, data_len, 1) * 100));
--- a/src/utils/AI/neural_utils.c
+++ b/src/utils/AI/neural_utils.c
@ -146,6 +146,46 @@ void free_neural_network(neural_network* network)
    free(network->outputs);
 }

+void reset_neural_network(neural_network* network)
+{
+    for (size_t i = 0; i < network->nb_input; i++)
+    {
+        network->inputs[i].activation = 0;
+        network->inputs[i].activation_input = 0;
+        network->inputs[i].bias = 0;
+        network->inputs[i].local_gradient = 0;
+    }
+
+    for (size_t i = 0; i < network->nb_output; i++)
+    {
+        network->outputs[i].activation = 0;
+        network->outputs[i].activation_input = 0;
+        network->outputs[i].bias = 0;
+        network->outputs[i].local_gradient = 0;
+
+        for (size_t h = 0; h < network->hidden_height; h++)
+        {
+            network->outputs[i].weights[h] = 0;
+        }
+    }
+
+    for (size_t x = 0; x < network->hidden_depth; x++)
+    {
+        for (size_t y = 0; y < network->nb_output; y++)
+        {
+            network->hidden[x][y].activation = 0;
+            network->hidden[x][y].activation_input = 0;
+            network->hidden[x][y].bias = 0;
+            network->hidden[x][y].local_gradient = 0;
+
+            for (size_t h = 0; h < (x == 0 ? network->nb_input : network->hidden_height); h++)
+            {
+                network->hidden[x][y].weights[h] = 0;
+            }
+        }
+    }
+}
+
 void save_neural_network(neural_network* network, const char* file_path)
 {
    FILE *fptr;
@ -409,6 +449,19 @@ double get_network_cost(neural_network* network, training_data expected_data)
    return 1.0/2.0 * cost;
 }

+void shuffle_dataset(training_data* datas, size_t data_len, size_t nb_shuffle)
+{
+    for (size_t i = 0; i < nb_shuffle; i++)
+    {
+        size_t first = rand() % data_len;
+        size_t second = rand() % data_len;
+
+        training_data temp = datas[first];
+        datas[first] = datas[second];
+        datas[second] = temp;
+    }
+}
+
 double calculate_hidden_local_gradiant(neural_network* network, size_t x, size_t y)
 {
    neuron* curr = &network->hidden[x][y];
@ -437,13 +490,14 @@ double calculate_hidden_local_gradiant(neural_network* network, size_t x, size_t
    return local_gradient_sum * sigmoid_derivate(curr->activation);
 }

-void network_hidden_back_propagation(neural_network* network, double learning_rate)
+void network_hidden_calculate_propagation(neural_network* network, neural_network* memory_network)
 {
    for (long int x = ((long int)network->hidden_depth-1); x >= 0; x--)
    {
        for (size_t y = 0; y < network->hidden_height; y++)
        {
            neuron* curr = &network->hidden[x][y];
+            neuron* curr_mem = &memory_network->hidden[x][y];

            curr->local_gradient = calculate_hidden_local_gradiant(network, x, y);

@ -455,7 +509,7 @@ void network_hidden_back_propagation(neural_network* network, double learning_ra

                    double total_error_w = curr->local_gradient * connect->activation;

-                    curr->weights[h] -= learning_rate * total_error_w;
+                    curr_mem->weights[h] += total_error_w;
                }
            }
            else
@ -466,22 +520,23 @@ void network_hidden_back_propagation(neural_network* network, double learning_ra

                    double total_error_w = curr->local_gradient * connect->activation;

-                    curr->weights[h] -= learning_rate * total_error_w;
+                    curr_mem->weights[h] += total_error_w;
                }
            }

-            curr->bias -= learning_rate * curr->local_gradient;
+            curr_mem->bias += curr->local_gradient;
        }
    }
 }

-void network_output_back_propagation(neural_network* network, double* expected_data, double learning_rate)
+void network_output_calculate_propagation(neural_network* network, neural_network* memory_network, training_data data)
 {
    for (size_t i = 0; i < network->nb_output; i++)
    {
        neuron* curr = &network->outputs[i];
+        neuron* curr_mem = &memory_network->outputs[i];

-        curr->local_gradient = (curr->activation - expected_data[i]) * sigmoid_derivate(curr->activation_input);
+        curr->local_gradient = (curr->activation - data.outputs[i]) * sigmoid_derivate(curr->activation_input);

        for (size_t h = 0; h < network->hidden_height; h++)
        {
@ -489,17 +544,77 @@ void network_output_back_propagation(neural_network* network, double* expected_d

            double total_error_w = curr->local_gradient * connect->activation;

-            curr->weights[h] -= learning_rate * total_error_w;
+            curr_mem->weights[h] += total_error_w;
        }

-        curr->bias -= learning_rate * curr->local_gradient;
+        curr_mem->bias += curr->local_gradient;
    }
 }

-void network_back_propagation(neural_network* network, double* expected_data, double learning_rate)
+void network_hidden_apply_propagation(neural_network* network, neural_network* memory_network, size_t batch_size, double learning_rate)
 {
-    network_output_back_propagation(network, expected_data, learning_rate);
-    network_hidden_back_propagation(network, learning_rate);
+    for (long int x = ((long int)network->hidden_depth-1); x >= 0; x--)
+    {
+        for (size_t y = 0; y < network->hidden_height; y++)
+        {
+            neuron* curr = &network->hidden[x][y];
+            neuron* curr_mem = &memory_network->hidden[x][y];
+
+            if (x == 0)
+            {
+                for (size_t h = 0; h < network->nb_input; h++)
+                {
+                    double total_error_w = curr_mem->weights[h] / (double)batch_size;
+
+                    curr->weights[h] -= learning_rate * total_error_w;
+                }
+            }
+            else
+            {
+                for (size_t h = 0; h < network->hidden_height; h++)
+                {
+                    double total_error_w = curr_mem->weights[h] / (double)batch_size;
+
+                    curr->weights[h] -= learning_rate * total_error_w;
+                }
+            }
+
+            curr->bias -= learning_rate * (curr_mem->local_gradient / (double) batch_size);
+        }
+    }
+}
+
+void network_output_apply_propagation(neural_network* network, neural_network* memory_network, size_t batch_size, double learning_rate)
+{
+    for (size_t i = 0; i < network->nb_output; i++)
+    {
+        neuron* curr = &network->outputs[i];
+        neuron* curr_mem = &memory_network->outputs[i];
+
+        for (size_t h = 0; h < network->hidden_height; h++)
+        {
+            double total_error_w = curr_mem->weights[h] / (double)batch_size;
+
+            curr->weights[h] -= learning_rate * total_error_w;
+        }
+
+        curr->bias -= learning_rate * (curr_mem->local_gradient / (double)batch_size);
+    }
+}
+
+void network_back_propagation(neural_network* network, neural_network* memory_network, training_data* datas, size_t data_len, double learning_rate)
+{
+    for (size_t i = 0; i < data_len; i++)
+    {
+        network_set_input_data(network, datas[i]);
+    
+        process_network(network);
+
+        network_output_calculate_propagation(network, memory_network, datas[i]);
+        network_hidden_calculate_propagation(network, memory_network);
+    }
+    network_output_apply_propagation(network, memory_network, data_len, learning_rate);
+    network_hidden_apply_propagation(network, memory_network, data_len, learning_rate);
 }

 void network_set_input_data(neural_network* network, training_data data)
@ -510,28 +625,37 @@ void network_set_input_data(neural_network* network, training_data data)
    }
 }

-void network_train_data(neural_network* network, training_data data, double learning_rate, double* cost)
+void network_train_data(neural_network* network, neural_network* memory_network, training_data* datas, size_t data_len, double learning_rate, double* cost)
 {
-    network_set_input_data(network, data);
-    
-    process_network(network);
-
-    network_back_propagation(network, data.outputs, learning_rate);
+    network_back_propagation(network, memory_network, datas, data_len, learning_rate);

    if (cost != NULL)
    {
        process_network(network);

-        *cost = get_network_cost(network, data);
+        *cost = get_network_total_cost(network, datas, data_len);
    }
 }

-void network_process_epoche(neural_network* network, training_data* data, size_t data_len, double learning_rate, double* total_cost)
+void network_process_epoche(neural_network* network, neural_network* memory_network, training_data* data, size_t data_len, size_t batch_size, size_t nb_shuffle, double learning_rate, double* total_cost)
 {
-    for (size_t i = 0; i < data_len; i++)
+    size_t it = data_len / batch_size;
+    if (data_len % batch_size > 0)
+        it++;
+
+    shuffle_dataset(data, data_len, nb_shuffle);
+
+    for (size_t i = 0; i < it; i++)
    {
+        reset_neural_network(memory_network);
+        size_t current_batch_size;
+        if (i == it - 1 && data_len % batch_size > 0)
+            current_batch_size = data_len % batch_size;
+        else
+            current_batch_size = batch_size;
+        
        double i_cost;
-        network_train_data(network, data[i], learning_rate, &i_cost);
+        network_train_data(network, memory_network, data + (i * batch_size), current_batch_size, learning_rate, &i_cost);

        if (total_cost != NULL)
            *total_cost += i_cost;
@ -550,7 +674,7 @@ double get_network_total_cost(neural_network* network, training_data* datas, siz
        cost += get_network_cost(network, datas[i]);
    }

-    return cost;
+    return cost / (double)data_len;
 }

 char get_data_char_prediction(training_data data, size_t nb_output)
@ -588,10 +712,19 @@ char get_network_char_prediction(neural_network* network, size_t AdaFactor)
    return res;
 }

-void train_network(neural_network* network, training_data* datas, size_t data_len, float learning_rate, size_t warmup, size_t warmup_iterations, size_t iterations)
+void train_network(neural_network* network, training_data* datas, size_t data_len, float learning_rate, size_t batch_size, size_t warmup, size_t warmup_iterations, size_t iterations)
 {
    time_t start;

+    neural_network mem_network;
+
+    mem_network.nb_input = network->nb_input;
+    mem_network.nb_output = network->nb_output;
+    mem_network.hidden_depth = network->hidden_depth;
+    mem_network.hidden_height = network->hidden_height;
+
+    init_neural_network(&mem_network);
+
    if (warmup > 0)
    {
        printf("Warming uloading...\n");
@ -618,7 +751,7 @@ void train_network(neural_network* network, training_data* datas, size_t data_le

            for (size_t i = 0; i < warmup_iterations; i++)
            {
-                network_process_epoche(&(networks[n]), datas, data_len, learning_rate, &cost);
+                network_process_epoche(&(networks[n]), &mem_network, datas, data_len, batch_size, data_len / 2, learning_rate, &cost);
                if (warmup_iterations * warmup < 100 || (n * warmup_iterations + i) % ((warmup_iterations * warmup) / 100) == 0)
                {
                    time_t time_pos = time(NULL);
@ -652,7 +785,7 @@ void train_network(neural_network* network, training_data* datas, size_t data_le

    for (size_t i = 0; i < iterations; i++)
    {
-        network_process_epoche(network, datas, data_len, learning_rate, NULL);
+        network_process_epoche(network, &mem_network, datas, data_len, batch_size, data_len / 2, learning_rate, NULL);
        
        if (i % (iterations / 100) == 0) //Debug
        {
@ -670,6 +803,8 @@ void train_network(neural_network* network, training_data* datas, size_t data_le
            //printf("NetCost: %f\n", get_network_cost(&network, datas, 4));
        }
    }
+
+    free_neural_network(&mem_network);
 }

 double get_network_success_rate(neural_network* network, training_data* datas, size_t data_len, size_t AdaFactor)
@ -737,7 +872,8 @@ training_data* load_dataset(const char* directory, size_t AdaFactor, size_t* nb_
    if (datas == NULL)
        errx(EXIT_FAILURE, "load_dataset: unable to malloc");

-    *nb_data = dataset_len;
+    if (nb_data != NULL)
+        *nb_data = dataset_len;

    size_t data_id = 0;

--- a/src/utils/AI/neural_utils.h
+++ b/src/utils/AI/neural_utils.h
@ -52,26 +52,24 @@ void process_network(neural_network* network);

 double get_network_cost(neural_network* network, training_data expected_data);

-void network_back_propagation(neural_network* network, double* expected_data, double learning_rate);
-
-void network_process_epoche(neural_network* network, training_data* data, size_t data_len, double learning_rate, double* total_cost);
+void network_process_epoche(neural_network* network, neural_network* memory_network, training_data* data, size_t data_len, size_t batch_size, size_t nb_shuffle, double learning_rate, double* total_cost);

 double get_network_total_cost(neural_network* network, training_data* datas, size_t data_len);

-void train_network(neural_network* network, training_data* datas, size_t data_len, float learning_rate, size_t warmup, size_t warmup_iterations, size_t iterations);
+void train_network(neural_network* network, training_data* datas, size_t data_len, float learning_rate, size_t batch_size, size_t warmup, size_t warmup_iterations, size_t iterations);

 double get_network_success_rate(neural_network* network, training_data* datas, size_t data_len, size_t AdaFactor);

 training_data* load_dataset(const char* directory, size_t AdaFactor, size_t* nb_data);

+char get_data_char_prediction(training_data data, size_t nb_output);
+
 char get_network_char_prediction(neural_network* network, size_t AdaFactor);

 void print_network_activations(neural_network* network);

 void network_set_input_data(neural_network* network, training_data data);

-void network_train_data(neural_network* network, training_data data, double learning_rate, double* cost);
-
 void print_network_state(neural_network *network);

 void print_training_debug(neural_network* network, training_data* data, size_t data_len);