feat: added AI and working on loading dataset

2024-11-27 18:14:00 +01:00
parent 3bc93c7eec
commit e2e6507dd7
4 changed files with 812 additions and 2 deletions
--- a/src/utils/AI/neural_utils.c
+++ b/src/utils/AI/neural_utils.c
@ -0,0 +1,739 @@
+#define _DEFAULT_SOURCE
+#include "neural_utils.h"
+#include "../Application/ApplicationUtils.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <err.h>
+#include <dirent.h>
+
+double sigmoid(double x)
+{
+    return 1.0 / (1.0 + exp(-x));
+}
+
+double sigmoid_derivate(double x)
+{
+    double s = sigmoid(x);
+    return s*(1-s);
+}
+
+double double_rand(double size)
+{
+    return ((double)rand() / (double)RAND_MAX) * size - (size / 2.0);
+}
+
+double clamp(double value, double min, double max)
+{
+    if (value < min)
+        return min;
+    if (value > max)
+        return max;
+    
+    return value;
+}
+
+void init_neuron(neuron* n, size_t nb_connection)
+{
+    n->bias = double_rand(12.0 / (double)nb_connection);
+    n->activation = 0.0;
+
+    double* weights = calloc( nb_connection, sizeof(double));
+
+    if (weights == NULL)
+        errx(1, "init_neuron: Cannot allocate memory!");
+
+    for (size_t i = 0; i < nb_connection; i++)
+    {
+        weights[i] = double_rand(10.0);
+    }
+
+    n->weights = weights;
+
+    return;
+}
+
+void free_neuron(neuron* n)
+{
+    free(n->weights);
+}
+
+void init_neural_network(neural_network* network)
+{
+    if (network->nb_input <= 0)
+        errx(EXIT_FAILURE, "init_neural_network: cannot allocate neural network with 0 or less input neurons");
+    neuron* in = (neuron*)calloc(network->nb_input, sizeof(neuron));
+    if (in == NULL)
+        errx(1, "init_neural_network: Cannot allocate memory!");
+    if (network->nb_output <= 0)
+        errx(EXIT_FAILURE, "init_neural_network: cannot allocate neural network with 0 or less output neurons");
+    neuron* out = (neuron*)calloc(network->nb_output, sizeof(neuron));
+    if (out == NULL)
+        errx(1, "init_neural_network: Cannot allocate memory!");
+    
+    if (network->hidden_depth <= 0)
+        errx(EXIT_FAILURE, "init_neural_network: cannot allocate neural network with 0 or less hidden neurons");
+    neuron** hid = (neuron**)calloc(network->hidden_depth, sizeof(neuron));
+    if (hid == NULL)
+        errx(1, "init_neural_network: Cannot allocate memory!");
+    
+    if (network->hidden_height <= 0)
+            errx(EXIT_FAILURE, "init_neural_network: cannot allocate neural network with 0 or less hidden neurons");
+    for (size_t i = 0; i < network->hidden_depth; i++)
+    {
+        neuron* val = (neuron*)calloc(network->hidden_height, sizeof(neuron));
+        if (val == NULL)
+            errx(1, "init_neural_network: Cannot allocate memory!");
+        hid[i] = val;
+    }
+    
+    network->inputs = in;
+    network->outputs = out;
+    network->hidden = hid;
+
+    for (size_t i = 0; i < network->nb_input; i++)
+    {
+        init_neuron(&network->inputs[i], 0);
+    }
+
+    for (size_t i = 0; i < network->nb_output; i++)
+    {
+        init_neuron(&network->outputs[i], network->hidden_height);
+    }
+
+    for (size_t x = 0; x < network->hidden_depth; x++)
+    {
+        for (size_t y = 0; y < network->hidden_height; y++)
+        {
+            if (x == 0)
+                init_neuron(&network->hidden[x][y], network->nb_input);
+            else
+                init_neuron(&network->hidden[x][y], network->hidden_height);
+        }
+    }
+}
+
+void free_neural_network(neural_network* network)
+{
+    for (size_t i = 0; i < network->nb_input; i++)
+    {
+        free_neuron(&network->inputs[i]);
+    }
+
+    for (size_t i = 0; i < network->nb_output; i++)
+    {
+        free_neuron(&network->outputs[i]);
+    }
+
+    for (size_t x = 0; x < network->hidden_depth; x++)
+    {
+        for (size_t y = 0; y < network->hidden_height; y++)
+        {
+            free_neuron(&network->hidden[x][y]);
+        }
+    }
+    
+    for (size_t i = 0; i < network->hidden_depth; i++)
+    {
+        free(network->hidden + i);
+    }
+
+    //free(network->hidden);
+    free(network->inputs);
+    free(network->outputs);
+}
+
+void save_neural_network(neural_network* network, const char* file_path)
+{
+    FILE *fptr;
+
+    fptr = fopen(file_path, "w");
+
+    fprintf(fptr, "%li,%li,%li,%li\n", network->nb_input, network->hidden_depth, network->hidden_height, network->nb_output);
+
+    for (size_t x = 0; x < network->hidden_depth; x++)
+    {
+        for (size_t y = 0; y < network->hidden_height; y++)
+        {
+            if (x == 0)
+            {
+                for (size_t h = 0; h < network->nb_input; h++)
+                {
+                    fprintf(fptr, "%f,", network->hidden[x][y].weights[h]);
+                }
+            }
+            else
+            {
+                for (size_t h = 0; h < network->hidden_height; h++)
+                {
+                    fprintf(fptr, "%f,", network->hidden[x][y].weights[h]);
+                }
+            }
+
+            fprintf(fptr, "%f,", network->hidden[x][y].bias);
+        }
+        fprintf(fptr, "\n");
+    }
+
+    for (size_t i = 0; i < network->nb_output; i++)
+    {
+        for (size_t h = 0; h < network->hidden_height; h++)
+        {
+            fprintf(fptr, "%f,", network->outputs[i].weights[h]);
+        }
+        fprintf(fptr, "%f,", network->outputs[i].bias);
+    }
+
+    fclose(fptr);
+}
+
+char** string_split(const char* string, char separator, size_t* res_len)
+{
+    size_t sep_nb = 0;
+
+    for (int i = 0; string[i] != '\0'; i++)
+    {
+        if (string[i] == separator)
+            sep_nb++;
+    }
+
+    sep_nb++;
+
+    char** res = calloc(sep_nb, sizeof(char*));
+
+    if (res == NULL)
+        errx(EXIT_FAILURE, "string_split: malloc error");
+
+    *res_len = sep_nb;
+
+    const char* str_pos = string;
+
+    for (size_t j = 0; j < sep_nb; j++)
+    {
+        size_t len = 0;
+
+        while (str_pos[j+len] != separator && str_pos[j+len] != '\0')
+        {
+            len++;
+        }
+
+        char* str = calloc(len+1, sizeof(char));
+
+        if (str == NULL)
+            errx(EXIT_FAILURE, "string_split: malloc error");
+
+        str[len] = '\0';
+
+        memcpy(str, str_pos + j, len);
+
+        res[j] = str;
+
+        str_pos += len;
+    }
+
+    return res;
+}
+
+void load_neural_network(neural_network* network, const char* content)
+{
+    size_t lines_len;
+    char** lines = string_split(content, '\n', &lines_len);
+
+    size_t first_line_len;
+    char** first_line = string_split(lines[0], ',', &first_line_len);
+
+    network->nb_input = atoi(strtok(first_line[0], ","));
+    network->hidden_depth = atoi(strtok(first_line[1], ","));
+    network->hidden_height = atoi(strtok(first_line[2], ","));
+    network->nb_output = atoi(strtok(first_line[3], ","));
+
+    free(first_line[0]);
+    free(first_line[1]);
+    free(first_line[2]);
+    free(first_line[3]);
+    free(first_line);
+
+    init_neural_network(network);
+
+    for (size_t x = 0; x < network->hidden_depth; x++)
+    {
+        size_t line_len;
+        char** line = string_split(lines[x + 1], ',', &line_len);
+
+        size_t pos = 0;
+
+        for (size_t y = 0; y < network->hidden_height; y++)
+        {
+            if (x == 0)
+            {
+                for (size_t h = 0; h < network->nb_input; h++)
+                {
+                    network->hidden[x][y].weights[h] = atof(line[pos]);
+                    free(line[pos]);
+                    pos++;
+                }
+            }
+            else
+            {
+                for (size_t h = 0; h < network->hidden_height; h++)
+                {
+                    network->hidden[x][y].weights[h] = atof(line[pos]);
+                    free(line[pos]);
+                    pos++;
+                }
+            }
+
+            network->hidden[x][y].bias = atof(line[pos]);
+            free(line[pos]);
+            pos++;
+        }
+
+        free(line);
+    }
+
+    size_t last_line_len;
+    char** last_line = string_split(lines[network->hidden_depth + 1], ',', &last_line_len);
+
+    size_t last_pos = 0;
+
+    for (size_t i = 0; i < network->nb_output; i++)
+    {
+        for (size_t h = 0; h < network->hidden_height; h++)
+        {
+            network->outputs[i].weights[h] = atof(last_line[last_pos]);
+            free(last_line[last_pos]);
+            last_pos++;
+        }
+        network->outputs[i].bias = atof(last_line[last_pos]);
+        free(last_line[last_pos]);
+        last_pos++;
+    }
+
+    free(last_line);
+}
+
+char* read_file(const char* file)
+{
+    FILE * stream = fopen( file, "r" );
+
+    size_t str_len = 0;
+
+    char* buff = calloc(1, sizeof(char));
+
+    if (buff == NULL)
+        errx(EXIT_FAILURE, "read_file: malloc error");
+
+    char* pos = buff;
+
+    while (fread(pos, sizeof(char), 1, stream) > 0)
+    {
+        str_len++;
+        buff = realloc(buff, (str_len + 1) * sizeof(char));
+        pos = buff + str_len;
+    }
+
+    buff[str_len] = '\0';
+
+    return buff;
+}
+
+void process_hidden_layer(neural_network* network)
+{
+    for (size_t x = 0; x < network->hidden_depth; x++)
+    {
+        for (size_t y = 0; y < network->hidden_height; y++)
+        {
+            neuron* curr = &network->hidden[x][y];
+            double sum = 0.0;
+
+            if (x == 0){
+                for (size_t h = 0; h < network->nb_input; h++)          //Sum with inputs
+                {
+                    double w = curr->weights[h];
+                    sum += w * network->inputs[h].activation;
+                }
+            }else{
+                for (size_t h = 0; h < network->hidden_height; h++)     //Sum with previous layer
+                {
+                    double w = curr->weights[h];
+                    sum += w * network->hidden[x-1][h].activation;
+                }
+            }
+
+            curr->activation_input = sum + curr->bias;
+            curr->activation = sigmoid(curr->activation_input);
+        }
+    }
+}
+
+void process_output_layer(neural_network* network)
+{
+    for (size_t o = 0; o < network->nb_output; o++)
+    {
+        neuron* curr = &network->outputs[o];
+        double sum = 0.0;
+
+        for (size_t h = 0; h < network->hidden_height; h++)
+        {
+            double w = curr->weights[h];
+            sum += w * network->hidden[network->hidden_depth-1][h].activation;
+        }
+
+        curr->activation_input = sum + curr->bias;
+        curr->activation = sigmoid(sum + curr->bias);
+    }
+}
+
+void process_network(neural_network* network)
+{
+    process_hidden_layer(network);
+    process_output_layer(network);
+}
+
+double get_network_cost(neural_network* network, training_data* expected_data, size_t data_len)
+{
+    double cost = 0.0;
+    for (size_t d = 0; d < data_len; d++)
+    {
+        /*for (size_t i = 0; i < network->nb_input; i++)
+        {
+            network->inputs[i].activation = expected_data[d].inputs[i];
+        }*/
+
+        network_set_input_data(network, expected_data[d]);
+
+        process_network(network);
+
+        for (size_t i = 0; i < network->nb_output; i++)
+        {
+            neuron* curr = &network->outputs[i];
+
+            double err = expected_data[d].outputs[i] - curr->activation;
+
+            cost += err * err;
+        }
+    }
+
+    return 1.0/2.0 * cost;
+}
+
+double calculate_hidden_local_gradiant(neural_network* network, size_t x, size_t y)
+{
+    neuron* curr = &network->hidden[x][y];
+
+    double local_gradient_sum = 0.0;
+
+    if (x == network->hidden_depth-1)
+    {
+        for (size_t h = 0; h < network->nb_output; h++)
+        {
+            neuron* target = &network->outputs[h];
+
+            local_gradient_sum += target->local_gradient * target->weights[h];
+        }
+    }
+    else
+    {
+        for (size_t h = 0; h < network->hidden_height; h++)
+        {
+            neuron* target = &network->hidden[x+1][h];
+
+            local_gradient_sum += target->local_gradient * target->weights[h];
+        }
+    }
+
+    return local_gradient_sum * sigmoid_derivate(curr->activation);
+}
+
+void network_hidden_back_propagation(neural_network* network, double learning_rate)
+{
+    for (long int x = ((long int)network->hidden_depth-1); x >= 0; x--)
+    {
+        for (size_t y = 0; y < network->hidden_height; y++)
+        {
+            neuron* curr = &network->hidden[x][y];
+
+            curr->local_gradient = calculate_hidden_local_gradiant(network, x, y);
+
+            if (x == 0)
+            {
+                for (size_t h = 0; h < network->nb_input; h++)
+                {
+                    neuron* connect = &network->inputs[h];
+
+                    double total_error_w = curr->local_gradient * connect->activation;
+
+                    curr->weights[h] -= learning_rate * total_error_w;
+                }
+            }
+            else
+            {
+                for (size_t h = 0; h < network->hidden_height; h++)
+                {
+                    neuron* connect = &network->hidden[x-1][h];
+
+                    double total_error_w = curr->local_gradient * connect->activation;
+
+                    curr->weights[h] -= learning_rate * total_error_w;
+                }
+            }
+
+            curr->bias -= learning_rate * curr->local_gradient;
+        }
+    }
+}
+
+void network_output_back_propagation(neural_network* network, double* expected_data, double learning_rate)
+{
+    for (size_t i = 0; i < network->nb_output; i++)
+    {
+        neuron* curr = &network->outputs[i];
+
+        curr->local_gradient = (curr->activation - expected_data[i]) * sigmoid_derivate(curr->activation_input);
+
+        for (size_t h = 0; h < network->hidden_height; h++)
+        {
+            neuron* connect = &network->hidden[network->hidden_depth-1][h];
+
+            double total_error_w = curr->local_gradient * connect->activation;
+
+            curr->weights[h] -= learning_rate * total_error_w;
+        }
+
+        curr->bias -= learning_rate * curr->local_gradient;
+    }
+}
+
+void network_back_propagation(neural_network* network, double* expected_data, double learning_rate)
+{
+    network_output_back_propagation(network, expected_data, learning_rate);
+    network_hidden_back_propagation(network, learning_rate);
+}
+
+void network_set_input_data(neural_network* network, training_data data)
+{
+    for (size_t i = 0; i < network->nb_input; i++)
+    {
+        network->inputs[i].activation = data.inputs[i];
+    }
+}
+
+void network_train_data(neural_network* network, training_data data, double learning_rate)
+{
+    network_set_input_data(network, data);
+    
+    process_network(network);
+
+    network_back_propagation(network, data.outputs, learning_rate);
+}
+
+void network_process_epoche(neural_network* network, training_data* data, size_t data_len, double learning_rate)
+{
+    for (size_t i = 0; i < data_len; i++)
+    {
+        network_train_data(network, data[i], learning_rate);
+    }
+}
+
+double get_network_total_cost(neural_network* network, training_data* datas, size_t data_len)
+{
+    double cost = 0;
+    for (size_t i = 0; i < data_len; i++)
+    {
+        network_set_input_data(network, datas[i]);
+
+        process_network(network);
+        
+        cost += get_network_cost(network, datas, data_len);
+    }
+
+    return cost;
+}
+
+void train_network(neural_network* network, training_data* datas, size_t data_len, float learning_rate, size_t warmup, size_t warmup_iterations, size_t iterations)
+{
+    if (warmup > 0)
+    {
+        neural_network networks[warmup];
+        double min_net_cost = -1;
+        size_t min_net = 0;
+
+        for (size_t n = 0; n < warmup; n++)
+        {
+            networks[n].nb_input = network->nb_input;
+            networks[n].nb_output = network->nb_output;
+            networks[n].hidden_depth = network->hidden_depth;
+            networks[n].hidden_height = network->hidden_height;
+
+            init_neural_network(networks + n);
+
+            for (size_t i = 0; i < warmup_iterations; i++)
+            {
+                network_process_epoche(&(networks[n]), datas, data_len, learning_rate);
+            }
+
+            double cost = get_network_total_cost(&networks[n], datas, data_len);
+
+            if (min_net_cost < 0 || cost < min_net_cost)
+            {
+                min_net_cost = cost;
+                min_net = n;
+            }
+        }
+
+        //printf("MinNet: %li with cost: %f\n", min_net, min_net_cost);
+
+        memcpy(network, networks + min_net, sizeof(neural_network));
+
+        for (size_t n = 0; n < warmup; n++)
+        {
+            if (n != min_net)
+                free_neural_network(networks + n);
+        }
+    }
+
+    for (size_t i = 0; i < iterations; i++)
+    {
+        network_process_epoche(network, datas, data_len, learning_rate);
+        
+        if (i % (iterations / 100) == 0) //Debug
+        {
+            printf("Training %i%% (ETA: %s)\n", (int)(((float)i / (float)iterations) * 100), "1 m");
+            /*printf("----------------------------------------\n");
+            printf("\t\tEpoche %li:\n", i);
+            print_training_debug(network, datas, 4);*/
+            //print_network_state(&network);
+            //printf("NetCost: %f\n", get_network_cost(&network, datas, 4));
+        }
+    }
+}
+
+double get_success_rate(neural_network* network, training_data* datas, size_t data_len)
+{
+    int success = 0;
+
+    for (size_t i = 0; i < data_len; i++)
+    {
+        network_set_input_data(network, datas[i]);
+
+        process_network(network);
+
+        if ((int)datas[i].outputs[0] == (int)round(network->outputs[0].activation))
+            success++;
+    }
+
+    return (double)success / (double)data_len;
+}
+
+training_data* load_dataset(const char* directory)
+{
+    DIR *d;
+    struct dirent *dir;
+
+    d = opendir(directory);
+
+    if (d)
+    {
+        while ((dir = readdir(d)) != NULL)
+        {
+            if(dir->d_type==DT_REG)
+            {
+                int len = strlen(dir->d_name);
+
+                if (len < 5)
+                    continue;
+
+                if (dir->d_name[len-1] != 'g' || dir->d_name[len-2] != 'n' || dir->d_name[len-3] != 'p' || dir->d_name[len-4] != '.')
+                    continue;
+
+                char* file = combine_path(directory, dir->d_name);
+
+                char letter = dir->d_name[0];
+            }
+        }
+    }
+
+    return NULL;
+}
+
+void print_network_activations(neural_network* network)
+{
+    for (size_t y = 0; y < network->hidden_height; y++)
+    {
+        if (y < network->nb_input)
+            printf("input%li: %f|", y, network->inputs[y].activation);
+        for (size_t x = 0; x < network->hidden_depth; x++)
+        {
+            printf("%f--", network->hidden[x][y].activation);
+        }
+        if (y < network->nb_output)
+            printf("|output%li: %f", y, network->outputs[y].activation);
+        
+        printf("\n");
+    }
+}
+
+void print_network_state(neural_network *network) {
+    printf("Network State:\n");
+    for (size_t x = 0; x < network->hidden_depth; x++) {
+        for (size_t y = 0; y < network->hidden_height; y++) {
+            printf("Hidden[%li][%li] Input: %f, Activation: %f, Bias: %f\n",
+                   x, y, network->hidden[x][y].activation_input, network->hidden[x][y].activation, network->hidden[x][y].bias);
+            for (size_t h = 0; h < network->nb_input; h++) {
+                printf("  Weight[%li]: %f\n", h, network->hidden[x][y].weights[h]);
+            }
+        }
+    }
+    for (size_t o = 0; o < network->nb_output; o++) {
+        printf("Output[%li] Input: %f, Activation: %f, Bias: %f\n",
+               o, network->outputs[o].activation_input, network->outputs[o].activation, network->outputs[o].bias);
+        for (size_t h = 0; h < network->hidden_height; h++) {
+            printf("  Weight[%li]: %f\n", h, network->outputs[o].weights[h]);
+        }
+    }
+}
+
+
+void print_training_debug(neural_network* network, training_data* data, size_t data_len)
+{
+    size_t debug_height;
+
+    if (network->nb_output < network->nb_input)
+    {
+        debug_height = network->nb_input;
+    }
+    else
+    {
+        debug_height = network->nb_output;
+    }
+
+    for (size_t data_i = 0; data_i < data_len; data_i++)
+    {
+        for (size_t i = 0; i < network->nb_input; i++)
+        {
+            network->inputs[i].activation = data[data_i].inputs[i];
+        }
+        process_network(network);
+
+        printf("----------------------------------------\n");
+        for (size_t line = 0; line < debug_height; line++)
+        {
+            if (line < network->nb_input)
+            {
+                if (line < network->nb_output)
+                {
+                    printf("input%02li:%i | output%02li:%f expected:%i\n", line, (int)data[data_i].inputs[line], line, network->outputs[line].activation, (int)data[data_i].outputs[line]);
+                }
+                else
+                {
+                    printf("input%02li:%i |\n", line, (int)data[data_i].inputs[line]);
+                }
+            }
+            else
+            {
+                printf("              | output%02li:%f expected:%i\n", line, network->outputs[line].activation, (int)data[data_i].outputs[line]);
+            }
+        }
+    }
+}
--- a/src/utils/AI/neural_utils.h
+++ b/src/utils/AI/neural_utils.h
@ -0,0 +1,71 @@
+#include <aio.h>
+#include <math.h>
+#ifndef NEURAL_UTILS_H
+#define NEURAL_UTILS_H
+
+typedef struct
+{
+    double activation;
+    double activation_input;
+    double local_gradient;
+    double bias;
+    double* weights;
+}neuron;
+
+typedef struct
+{
+    size_t nb_input;
+    size_t hidden_height;
+    size_t hidden_depth;
+    size_t nb_output;
+
+    double learning_rate;
+
+    neuron* inputs;
+    neuron** hidden;
+    neuron* outputs;
+}neural_network;
+
+typedef struct
+{
+    double* inputs;
+    double* outputs;
+}training_data;
+
+double sigmoid(double x);
+
+double double_rand();
+
+void init_neuron(neuron* n, size_t nb_connection);
+
+void init_neural_network(neural_network* network);
+
+void save_neural_network(neural_network* network, const char* file_path);
+
+void load_neural_network(neural_network* network, const char* content);
+
+char* read_file(const char* file);
+
+void process_network(neural_network* network);
+
+double get_network_cost(neural_network* network, training_data* expected_data, size_t data_len);
+
+void network_back_propagation(neural_network* network, double* expected_data, double learning_rate);
+
+void network_process_epoche(neural_network* network, training_data* data, size_t data_len, double learning_rate);
+
+double get_network_total_cost(neural_network* network, training_data* datas, size_t data_len);
+
+void train_network(neural_network* network, training_data* datas, size_t data_len, float learning_rate, size_t warmup, size_t warmup_iterations, size_t iterations);
+
+double get_success_rate(neural_network* network, training_data* datas, size_t data_len);
+
+void print_network_activations(neural_network* network);
+
+void network_set_input_data(neural_network* network, training_data data);
+
+void print_network_state(neural_network *network);
+
+void print_training_debug(neural_network* network, training_data* data, size_t data_len);
+
+#endif
--- a/src/utils/Application/ApplicationUtils.c
+++ b/src/utils/Application/ApplicationUtils.c
@ -30,7 +30,7 @@ char* path_get_directory(char* path)
    return n_str;
 }

-char* combine_path(char* first_path, char* second_path)
+char* combine_path(const char* first_path, const char* second_path)
 {
    size_t f_len = strlen(first_path);

--- a/src/utils/Application/ApplicationUtils.h
+++ b/src/utils/Application/ApplicationUtils.h
@ -3,7 +3,7 @@

 char* path_get_directory(char* path);

-char* combine_path(char* first_path, char* second_path);
+char* combine_path(const char* first_path, const char* second_path);

 void mkpath(const char* file_path);