瀏覽代碼

feat: add VecData structure

master
Ethan 1 年之前
父節點
當前提交
08ce879576
共有 3 個文件被更改,包括 43 次插入73 次删除
  1. +5
    -2
      inc/utils.h
  2. +5
    -16
      src/test.c
  3. +33
    -55
      src/utils.c

+ 5
- 2
inc/utils.h 查看文件

@ -1,5 +1,8 @@
typedef char* VecData;
typedef struct {
int id;
float* vector;
} VecData;
float vec_dist(VecData x, VecData y);
void fvecs_read(const char* filename, int* bound, float** vectors, int* num_vectors, int* vector_dimension);
VecData* fvecs_read(const char* filename, int* bounds);

+ 5
- 16
src/test.c 查看文件

@ -1,27 +1,16 @@
#include <stdio.h>
#include <stdlib.h>
#include "hnsw.h"
#include "utils.h"
int main() {
const char* filename = "../dataset/siftsmall_base.fvecs";
int bound[2] = {2, 5};
float* vectors;
int num_vectors, vector_dimension;
fvecs_read(filename, bound, &vectors, &num_vectors, &vector_dimension);
printf("Number of vectors: %d\n", num_vectors);
printf("Vector dimension: %d\n", vector_dimension);
int i, j;
for (i = 0; i < num_vectors; i++) {
printf("Vector %d: ", i);
for (j = 0; j < vector_dimension; j++) {
printf("%f ", vectors[i * vector_dimension + j]);
}
printf("\n");
VecData* vecs = fvecs_read("../dataset/siftsmall_base.fvecs", NULL);
for (int i = 0; i < 10000; i++) {
printf("%f\n", vecs[i].vector[0]);
}
free(vecs);
free(vectors);
return 0;
}

+ 33
- 55
src/utils.c 查看文件

@ -6,87 +6,65 @@
#include <assert.h>
float vec_dist(VecData x, VecData y) {
float dist = 0.0f;
while (*x != '\0' && *y != '\0') {
int xi = *x - '0';
int yi = *y - '0';
dist += pow(xi - yi, 2);
x++;
y++;
float sum = 0.0;
for (int i = 0; i < 128; i++) {
float diff = x.vector[i] - y.vector[i];
sum += diff * diff;
}
return sqrt(dist);
return sqrt(sum);
}
void fvecs_read(const char* filename, int* bound, float** vectors, int* num_vectors, int* vector_dimension) {
VecData* fvecs_read(const char* filename, int* bounds) {
FILE* fid = fopen(filename, "rb");
if (fid == NULL) {
fprintf(stderr, "I/O error: Unable to open the file %s\n", filename);
fprintf(stderr, "I/O error : Unable to open the file %s\n", filename);
exit(EXIT_FAILURE);
}
// Read the vector size
int d;
int d;
fread(&d, sizeof(int), 1, fid);
// Get the file size
fseek(fid, 0, SEEK_END);
long file_size = ftell(fid);
fseek(fid, 0, SEEK_SET);
// Get the number of vectors
long vec_size = (long) d * sizeof(float);
long vec_count = (file_size - sizeof(int)) / vec_size;
// Apply the bounds if specified
int a = 1;
int b = vec_count;
if (bound != NULL) {
if (bound[1] == 1) {
b = bound[0];
} else if (bound[1] == 2) {
a = bound[0];
b = bound[1];
int bmax = vec_count;
int b = bmax;
if (bounds != NULL) {
if (bounds[1] == 1) {
b = bounds[0];
} else if (bounds[1] == 2) {
a = bounds[0];
b = bounds[1];
}
}
// Check if the bounds are valid
if (a < 1 || b < a || b > vec_count) {
*vectors = NULL;
if (a < 1 || b > bmax || b < a) {
VecData* v = NULL;
fclose(fid);
return;
return v;
}
// Compute the number of vectors to read
int n = b - a + 1;
fseek(fid, (a - 1) * vec_size, SEEK_SET);
// Read the vectors
*vectors = malloc(n * d * sizeof(float));
float* ptr = *vectors;
for (int i = 0; i < vec_count; i++) {
// Read the vector size
int vec_d;
fread(&vec_d, sizeof(int), 1, fid);
// Check if the vector size is correct
if (vec_d != d) {
fprintf(stderr, "Error: Vector %d has incorrect dimension %d (expected %d)\n", i + 1, vec_d, d);
fclose(fid);
free(*vectors);
*vectors = NULL;
return;
}
// Read the vector data
fread(ptr, sizeof(float), d, fid);
ptr += d;
// Stop reading if we have read enough vectors
if (i >= b - 1) {
break;
}
// Read n vectors
VecData* v = malloc(n * sizeof(VecData));
for (int i = 0; i < n; i++) {
VecData vec;
vec.id = i + a;
vec.vector = malloc(d * sizeof(float));
fread(vec.vector, sizeof(float), d, fid);
v[i] = vec;
}
*vector_dimension = d;
*num_vectors = n;
fclose(fid);
return v;
}

Loading…
取消
儲存