From 08ce87957669550df604b831d8f8f6487088ff94 Mon Sep 17 00:00:00 2001 From: Ethan Date: Thu, 29 Jun 2023 20:30:00 +0800 Subject: [PATCH] feat: add VecData structure --- inc/utils.h | 7 +++-- src/test.c | 21 ++++----------- src/utils.c | 88 +++++++++++++++++++++++-------------------------------------- 3 files changed, 43 insertions(+), 73 deletions(-) diff --git a/inc/utils.h b/inc/utils.h index 9a593b2..0f59a1b 100644 --- a/inc/utils.h +++ b/inc/utils.h @@ -1,5 +1,8 @@ -typedef char* VecData; +typedef struct { + int id; + float* vector; +} VecData; float vec_dist(VecData x, VecData y); -void fvecs_read(const char* filename, int* bound, float** vectors, int* num_vectors, int* vector_dimension); \ No newline at end of file +VecData* fvecs_read(const char* filename, int* bounds); \ No newline at end of file diff --git a/src/test.c b/src/test.c index 3209e1e..b4d8c0b 100644 --- a/src/test.c +++ b/src/test.c @@ -1,27 +1,16 @@ #include #include -#include "hnsw.h" #include "utils.h" int main() { - const char* filename = "../dataset/siftsmall_base.fvecs"; - int bound[2] = {2, 5}; - float* vectors; - int num_vectors, vector_dimension; - fvecs_read(filename, bound, &vectors, &num_vectors, &vector_dimension); - printf("Number of vectors: %d\n", num_vectors); - printf("Vector dimension: %d\n", vector_dimension); - int i, j; - for (i = 0; i < num_vectors; i++) { - printf("Vector %d: ", i); - for (j = 0; j < vector_dimension; j++) { - printf("%f ", vectors[i * vector_dimension + j]); - } - printf("\n"); + VecData* vecs = fvecs_read("../dataset/siftsmall_base.fvecs", NULL); + for (int i = 0; i < 10000; i++) { + printf("%f\n", vecs[i].vector[0]); } + free(vecs); - free(vectors); return 0; + } \ No newline at end of file diff --git a/src/utils.c b/src/utils.c index 80032ef..54bab56 100644 --- a/src/utils.c +++ b/src/utils.c @@ -6,87 +6,65 @@ #include float vec_dist(VecData x, VecData y) { - float dist = 0.0f; - while (*x != '\0' && *y != '\0') { - int xi = *x - '0'; - int yi = *y - '0'; - dist += pow(xi - yi, 2); - x++; - y++; + float sum = 0.0; + for (int i = 0; i < 128; i++) { + float diff = x.vector[i] - y.vector[i]; + sum += diff * diff; } - return sqrt(dist); + return sqrt(sum); } -void fvecs_read(const char* filename, int* bound, float** vectors, int* num_vectors, int* vector_dimension) { +VecData* fvecs_read(const char* filename, int* bounds) { FILE* fid = fopen(filename, "rb"); if (fid == NULL) { - fprintf(stderr, "I/O error: Unable to open the file %s\n", filename); + fprintf(stderr, "I/O error : Unable to open the file %s\n", filename); exit(EXIT_FAILURE); } - // Read the vector size - int d; + int d; fread(&d, sizeof(int), 1, fid); - // Get the file size + fseek(fid, 0, SEEK_END); long file_size = ftell(fid); fseek(fid, 0, SEEK_SET); - - // Get the number of vectors long vec_size = (long) d * sizeof(float); long vec_count = (file_size - sizeof(int)) / vec_size; - // Apply the bounds if specified int a = 1; - int b = vec_count; - if (bound != NULL) { - if (bound[1] == 1) { - b = bound[0]; - } else if (bound[1] == 2) { - a = bound[0]; - b = bound[1]; + int bmax = vec_count; + int b = bmax; + + if (bounds != NULL) { + if (bounds[1] == 1) { + b = bounds[0]; + } else if (bounds[1] == 2) { + a = bounds[0]; + b = bounds[1]; } } - // Check if the bounds are valid - if (a < 1 || b < a || b > vec_count) { - *vectors = NULL; + if (a < 1 || b > bmax || b < a) { + VecData* v = NULL; fclose(fid); - return; + return v; } - // Compute the number of vectors to read int n = b - a + 1; + fseek(fid, (a - 1) * vec_size, SEEK_SET); - // Read the vectors - *vectors = malloc(n * d * sizeof(float)); - float* ptr = *vectors; - for (int i = 0; i < vec_count; i++) { - // Read the vector size - int vec_d; - fread(&vec_d, sizeof(int), 1, fid); - - // Check if the vector size is correct - if (vec_d != d) { - fprintf(stderr, "Error: Vector %d has incorrect dimension %d (expected %d)\n", i + 1, vec_d, d); - fclose(fid); - free(*vectors); - *vectors = NULL; - return; - } - - // Read the vector data - fread(ptr, sizeof(float), d, fid); - ptr += d; - - // Stop reading if we have read enough vectors - if (i >= b - 1) { - break; - } + + // Read n vectors + VecData* v = malloc(n * sizeof(VecData)); + for (int i = 0; i < n; i++) { + VecData vec; + vec.id = i + a; + vec.vector = malloc(d * sizeof(float)); + fread(vec.vector, sizeof(float), d, fid); + v[i] = vec; } - *vector_dimension = d; - *num_vectors = n; + fclose(fid); + return v; } \ No newline at end of file