Browse Source

init test framework

master
Chen Lixiang 1 year ago
parent
commit
30cc6ec288
7 changed files with 88 additions and 29 deletions
  1. +1
    -1
      CMakeLists.txt
  2. +18
    -11
      README.md
  3. +3
    -3
      inc/hnsw.h
  4. +1
    -0
      inc/utils.h
  5. +3
    -3
      src/hnsw.c
  6. +55
    -11
      src/test.c
  7. +7
    -0
      src/utils.c

+ 1
- 1
CMakeLists.txt View File

@ -15,4 +15,4 @@ SHARED
)
add_executable(hnsw_test src/test.c)
target_link_libraries(hnsw_test hnswc m)
target_link_libraries(hnsw_test hnswc)

+ 18
- 11
README.md View File

@ -3,9 +3,9 @@
### 1. Testing Steps
1. Make sure `CMake` is installed:
1. Make sure `CMake` and other build tools are installed:
```shell
sudo apt-get install cmake
sudo apt-get install cmake build-essentials
```
2. Create a `\build` folder inside the `hnswlab` directory.
@ -27,18 +27,25 @@
6. Run the test program:
```shell
./hnsw_tst
./hnsw_test data_file_path data_size query_file_path groundtruth_file_path
```
### 2. Interface Description
For example:
```shell
./hnsw_test ../dataset/siftsmall/siftsmall_base.fvecs 10000 ../dataset/siftsmall/siftsmall_query.fvecs 100 ../dataset/siftsmall/siftsmall_groundtruth.ivecs
```
### 2. Mission Description
To load the dataset, use the following code snippet in C:
```c
// Vector dimension
int num = 0;
// Read the dataset
VecData* vecs = fvecs_read("../dataset/siftsmall_base.fvecs", NULL, &num);
You need to implement two functions inside hnsw.h and hnsw.c in HNSW way:
```C
HNSWContext *hnsw_init_context(const char *filename, size_t dim, size_t len);
void hnsw_approximate_knn(HNSWContext *ctx, VecData *q, int *results, int k);
```
We have implemented data loading and provided a simplest KNN algorithm. But our implementation can only handle small batches of data(SIFTSMALL dataset), please implement a new approximate KNN algorithm based on the HNSW algorithm so that it can handle large batches of data(SIFT dataset).
### 3. Data Download
---
Please visit http://corpus-texmex.irisa.fr/

+ 3
- 3
inc/hnsw.h View File

@ -18,12 +18,12 @@ typedef struct HNSWContext
HNSWGraph *graph; // graph of HNSW
} HNSWContext;
// you can define some help functions here
// you can declare some help functions here, and implement them in 'hnsw.c'
// public functions
// public functions here
// Please do not modify these function signatures!
// To simply our program, we do not consider reclaiming memory space here.
// Please implement these functions according to HNSW algorithm.
// TODO: Please implement these functions according to HNSW algorithm.
HNSWContext *hnsw_init_context(const char *filename, size_t dim, size_t len);
void hnsw_approximate_knn(HNSWContext *ctx, VecData *q, int *results, int k);

+ 1
- 0
inc/utils.h View File

@ -19,4 +19,5 @@ float vec_dist(VecData x, VecData y);
FileContext* init_file_context(const char *filename);
void read_4bytes(FileContext* ctx, void* dst);
void read_vec_data(FileContext* ctx, void* dst);
void read_id_data(FileContext* ctx, void* dst, size_t n);
void free_file_context(FileContext* ctx);

+ 3
- 3
src/hnsw.c View File

@ -1,4 +1,4 @@
#include <limits.h>
#include <stdlib.h>
#include "hnsw.h"
HNSWContext *hnsw_init_context(const char *filename, size_t dim, size_t len)
@ -35,7 +35,7 @@ void hnsw_approximate_knn(HNSWContext *ctx, VecData *q, int *results, int k)
if (dist < min_dist)
{
min_dist = dist;
idx = i;
idx = j;
}
}
if (idx != i)
@ -47,7 +47,7 @@ void hnsw_approximate_knn(HNSWContext *ctx, VecData *q, int *results, int k)
}
// copy results
for (int i = 0; i < k; i++)
for (int i = 0; i < k && i < ctx->len; i++)
{
results[i] = ctx->data[i].id;
}

+ 55
- 11
src/test.c View File

@ -3,21 +3,65 @@
#include "hnsw.h"
#include "utils.h"
#define K 100
int main(int argc, char *argv[]) {
FileContext* f_ctx = init_file_context(argv[1]);
VecData data;
data.vec = malloc(sizeof(float) * GLOBAL_DIM);
for (int i = 0; i < 100; i++)
float cal_recall_value(int *results, int *trueset, int k)
{
int cnt = 0;
for (int i = 0; i < k; i++)
{
read_vec_data(f_ctx, data.vec);
for (int j = 0; j < GLOBAL_DIM; j++)
int val = results[i];
for (int j = 0; j < k; j++)
{
printf("%f ", data.vec[j]);
if (val == trueset[j])
{
cnt++;
break;
}
}
putchar('\n');
}
return 0;
return ((float) cnt) / ((float) k);
}
int main(int argc, char *argv[])
{
if (argc != 6)
{
printf("argc: %d\n", argc);
printf("./hnsw_test base_file data_size query_file groundtruth_file");
exit(1);
}
int data_size = atoi(argv[2]);
printf("data size: %d\n", data_size);
int query_size = atoi(argv[4]);
// init query and groundtruth files
FileContext *query_file_ctx = init_file_context(argv[3]);
FileContext *gt_file_ctx = init_file_context(argv[5]);
// TODO: report time cost here
HNSWContext* ctx = hnsw_init_context(argv[1], GLOBAL_DIM, data_size);
printf("HNSW context initialized!\n");
VecData q_vec;
q_vec.vec = (float *) malloc(sizeof(float) * GLOBAL_DIM);
int q_results[100];
int true_results[100];
float total_recall_values = 0.0;
for (int i = 0; i < query_size; i++)
{
read_vec_data(query_file_ctx, q_vec.vec);
read_id_data(gt_file_ctx, true_results, K);
hnsw_approximate_knn(ctx, &q_vec, q_results, K);
total_recall_values += cal_recall_value(q_results, true_results, K);
}
// report recall value
printf("Recall value: %.4f\n", total_recall_values / ((float) query_size));
free_file_context(query_file_ctx);
free_file_context(gt_file_ctx);
return 0;
}

+ 7
- 0
src/utils.c View File

@ -44,6 +44,13 @@ void read_vec_data(FileContext *ctx, void *dst)
assert(s == GLOBAL_DIM);
}
void read_id_data(FileContext *ctx, void *dst, size_t n)
{
read_4bytes(ctx, dst);
size_t s = fread(dst, 4L, n, ctx->stream);
assert(s == n);
}
void free_file_context(FileContext* ctx)
{
fclose(ctx->stream);

Loading…
Cancel
Save