Procházet zdrojové kódy

feat: add read function

master
Ethan před 1 rokem
rodič
revize
81ae974102
7 změnil soubory, kde provedl 114 přidání a 4 odebrání
  1. +1
    -1
      CMakeLists.txt
  2. binární
      dataset/siftsmall_base.fvecs
  3. binární
      dataset/siftsmall_learn.fvecs
  4. binární
      dataset/siftsmall_query.fvecs
  5. +1
    -2
      inc/utils.h
  6. +20
    -1
      src/test.c
  7. +92
    -0
      src/utils.c

+ 1
- 1
CMakeLists.txt Zobrazit soubor

@ -15,4 +15,4 @@ SHARED
)
add_executable(hnsw_test src/test.c)
target_link_libraries(hnsw_test hnswc)
target_link_libraries(hnsw_test hnswc m)

binární
dataset/siftsmall_base.fvecs Zobrazit soubor


binární
dataset/siftsmall_learn.fvecs Zobrazit soubor


binární
dataset/siftsmall_query.fvecs Zobrazit soubor


+ 1
- 2
inc/utils.h Zobrazit soubor

@ -2,5 +2,4 @@
typedef char* VecData;
float vec_dist(VecData x, VecData y);
void fvecs_read(const char* filename, int* bound, float** vectors, int* num_vectors, int* vector_dimension);

+ 20
- 1
src/test.c Zobrazit soubor

@ -1,8 +1,27 @@
#include <stdio.h>
#include <stdlib.h>
#include "hnsw.h"
#include "utils.h"
int main() {
printf("Hello, world!\n");
const char* filename = "../dataset/siftsmall_base.fvecs";
int bound[2] = {2, 5};
float* vectors;
int num_vectors, vector_dimension;
fvecs_read(filename, bound, &vectors, &num_vectors, &vector_dimension);
printf("Number of vectors: %d\n", num_vectors);
printf("Vector dimension: %d\n", vector_dimension);
int i, j;
for (i = 0; i < num_vectors; i++) {
printf("Vector %d: ", i);
for (j = 0; j < vector_dimension; j++) {
printf("%f ", vectors[i * vector_dimension + j]);
}
printf("\n");
}
free(vectors);
return 0;
}

+ 92
- 0
src/utils.c Zobrazit soubor

@ -0,0 +1,92 @@
#include "utils.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
float vec_dist(VecData x, VecData y) {
float dist = 0.0f;
while (*x != '\0' && *y != '\0') {
int xi = *x - '0';
int yi = *y - '0';
dist += pow(xi - yi, 2);
x++;
y++;
}
return sqrt(dist);
}
void fvecs_read(const char* filename, int* bound, float** vectors, int* num_vectors, int* vector_dimension) {
FILE* fid = fopen(filename, "rb");
if (fid == NULL) {
fprintf(stderr, "I/O error: Unable to open the file %s\n", filename);
exit(EXIT_FAILURE);
}
// Read the vector size
int d;
fread(&d, sizeof(int), 1, fid);
// Get the file size
fseek(fid, 0, SEEK_END);
long file_size = ftell(fid);
fseek(fid, 0, SEEK_SET);
// Get the number of vectors
long vec_size = (long) d * sizeof(float);
long vec_count = (file_size - sizeof(int)) / vec_size;
// Apply the bounds if specified
int a = 1;
int b = vec_count;
if (bound != NULL) {
if (bound[1] == 1) {
b = bound[0];
} else if (bound[1] == 2) {
a = bound[0];
b = bound[1];
}
}
// Check if the bounds are valid
if (a < 1 || b < a || b > vec_count) {
*vectors = NULL;
fclose(fid);
return;
}
// Compute the number of vectors to read
int n = b - a + 1;
// Read the vectors
*vectors = malloc(n * d * sizeof(float));
float* ptr = *vectors;
for (int i = 0; i < vec_count; i++) {
// Read the vector size
int vec_d;
fread(&vec_d, sizeof(int), 1, fid);
// Check if the vector size is correct
if (vec_d != d) {
fprintf(stderr, "Error: Vector %d has incorrect dimension %d (expected %d)\n", i + 1, vec_d, d);
fclose(fid);
free(*vectors);
*vectors = NULL;
return;
}
// Read the vector data
fread(ptr, sizeof(float), d, fid);
ptr += d;
// Stop reading if we have read enough vectors
if (i >= b - 1) {
break;
}
}
*vector_dimension = d;
*num_vectors = n;
fclose(fid);
}

Načítá se…
Zrušit
Uložit