|
@ -6,87 +6,65 @@ |
|
|
#include <assert.h> |
|
|
#include <assert.h> |
|
|
|
|
|
|
|
|
float vec_dist(VecData x, VecData y) { |
|
|
float vec_dist(VecData x, VecData y) { |
|
|
float dist = 0.0f; |
|
|
|
|
|
while (*x != '\0' && *y != '\0') { |
|
|
|
|
|
int xi = *x - '0'; |
|
|
|
|
|
int yi = *y - '0'; |
|
|
|
|
|
dist += pow(xi - yi, 2); |
|
|
|
|
|
x++; |
|
|
|
|
|
y++; |
|
|
|
|
|
|
|
|
float sum = 0.0; |
|
|
|
|
|
for (int i = 0; i < 128; i++) { |
|
|
|
|
|
float diff = x.vector[i] - y.vector[i]; |
|
|
|
|
|
sum += diff * diff; |
|
|
} |
|
|
} |
|
|
return sqrt(dist); |
|
|
|
|
|
|
|
|
return sqrt(sum); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
void fvecs_read(const char* filename, int* bound, float** vectors, int* num_vectors, int* vector_dimension) { |
|
|
|
|
|
|
|
|
VecData* fvecs_read(const char* filename, int* bounds) { |
|
|
FILE* fid = fopen(filename, "rb"); |
|
|
FILE* fid = fopen(filename, "rb"); |
|
|
if (fid == NULL) { |
|
|
if (fid == NULL) { |
|
|
fprintf(stderr, "I/O error: Unable to open the file %s\n", filename); |
|
|
|
|
|
|
|
|
fprintf(stderr, "I/O error : Unable to open the file %s\n", filename); |
|
|
exit(EXIT_FAILURE); |
|
|
exit(EXIT_FAILURE); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// Read the vector size |
|
|
|
|
|
int d; |
|
|
|
|
|
|
|
|
int d; |
|
|
fread(&d, sizeof(int), 1, fid); |
|
|
fread(&d, sizeof(int), 1, fid); |
|
|
|
|
|
|
|
|
// Get the file size |
|
|
|
|
|
|
|
|
|
|
|
fseek(fid, 0, SEEK_END); |
|
|
fseek(fid, 0, SEEK_END); |
|
|
long file_size = ftell(fid); |
|
|
long file_size = ftell(fid); |
|
|
fseek(fid, 0, SEEK_SET); |
|
|
fseek(fid, 0, SEEK_SET); |
|
|
|
|
|
|
|
|
// Get the number of vectors |
|
|
|
|
|
long vec_size = (long) d * sizeof(float); |
|
|
long vec_size = (long) d * sizeof(float); |
|
|
long vec_count = (file_size - sizeof(int)) / vec_size; |
|
|
long vec_count = (file_size - sizeof(int)) / vec_size; |
|
|
|
|
|
|
|
|
// Apply the bounds if specified |
|
|
|
|
|
int a = 1; |
|
|
int a = 1; |
|
|
int b = vec_count; |
|
|
|
|
|
if (bound != NULL) { |
|
|
|
|
|
if (bound[1] == 1) { |
|
|
|
|
|
b = bound[0]; |
|
|
|
|
|
} else if (bound[1] == 2) { |
|
|
|
|
|
a = bound[0]; |
|
|
|
|
|
b = bound[1]; |
|
|
|
|
|
|
|
|
int bmax = vec_count; |
|
|
|
|
|
int b = bmax; |
|
|
|
|
|
|
|
|
|
|
|
if (bounds != NULL) { |
|
|
|
|
|
if (bounds[1] == 1) { |
|
|
|
|
|
b = bounds[0]; |
|
|
|
|
|
} else if (bounds[1] == 2) { |
|
|
|
|
|
a = bounds[0]; |
|
|
|
|
|
b = bounds[1]; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// Check if the bounds are valid |
|
|
|
|
|
if (a < 1 || b < a || b > vec_count) { |
|
|
|
|
|
*vectors = NULL; |
|
|
|
|
|
|
|
|
if (a < 1 || b > bmax || b < a) { |
|
|
|
|
|
VecData* v = NULL; |
|
|
fclose(fid); |
|
|
fclose(fid); |
|
|
return; |
|
|
|
|
|
|
|
|
return v; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// Compute the number of vectors to read |
|
|
|
|
|
int n = b - a + 1; |
|
|
int n = b - a + 1; |
|
|
|
|
|
fseek(fid, (a - 1) * vec_size, SEEK_SET); |
|
|
|
|
|
|
|
|
// Read the vectors |
|
|
|
|
|
*vectors = malloc(n * d * sizeof(float)); |
|
|
|
|
|
float* ptr = *vectors; |
|
|
|
|
|
for (int i = 0; i < vec_count; i++) { |
|
|
|
|
|
// Read the vector size |
|
|
|
|
|
int vec_d; |
|
|
|
|
|
fread(&vec_d, sizeof(int), 1, fid); |
|
|
|
|
|
|
|
|
|
|
|
// Check if the vector size is correct |
|
|
|
|
|
if (vec_d != d) { |
|
|
|
|
|
fprintf(stderr, "Error: Vector %d has incorrect dimension %d (expected %d)\n", i + 1, vec_d, d); |
|
|
|
|
|
fclose(fid); |
|
|
|
|
|
free(*vectors); |
|
|
|
|
|
*vectors = NULL; |
|
|
|
|
|
return; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Read the vector data |
|
|
|
|
|
fread(ptr, sizeof(float), d, fid); |
|
|
|
|
|
ptr += d; |
|
|
|
|
|
|
|
|
|
|
|
// Stop reading if we have read enough vectors |
|
|
|
|
|
if (i >= b - 1) { |
|
|
|
|
|
break; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Read n vectors |
|
|
|
|
|
VecData* v = malloc(n * sizeof(VecData)); |
|
|
|
|
|
for (int i = 0; i < n; i++) { |
|
|
|
|
|
VecData vec; |
|
|
|
|
|
vec.id = i + a; |
|
|
|
|
|
vec.vector = malloc(d * sizeof(float)); |
|
|
|
|
|
fread(vec.vector, sizeof(float), d, fid); |
|
|
|
|
|
v[i] = vec; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
*vector_dimension = d; |
|
|
|
|
|
*num_vectors = n; |
|
|
|
|
|
|
|
|
|
|
|
fclose(fid); |
|
|
fclose(fid); |
|
|
|
|
|
return v; |
|
|
} |
|
|
} |