From dfdaa316dc6da927384deaa3b65a02e975d3c5e5 Mon Sep 17 00:00:00 2001 From: Chen Lixiang Date: Fri, 30 Jun 2023 02:21:53 +0800 Subject: [PATCH] fix format --- README.md | 10 +++++++--- src/hnsw.c | 8 ++++---- src/test.c | 8 ++++---- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 631ce65..0099270 100644 --- a/README.md +++ b/README.md @@ -35,17 +35,21 @@ ./hnsw_test ../dataset/siftsmall/siftsmall_base.fvecs 10000 ../dataset/siftsmall/siftsmall_query.fvecs 100 ../dataset/siftsmall/siftsmall_groundtruth.ivecs ``` + Our test program will report the recall value and time costs of your algorithm. + ### 2. Mission Description You need to implement two functions inside hnsw.h and hnsw.c in HNSW way: ```C -HNSWContext *hnsw_init_context(const char *filename, size_t dim, size_t len); -void hnsw_approximate_knn(HNSWContext *ctx, VecData *q, int *results, int k); +HNSWContext *hnsw_init_context(const char *filename, size_t dim, size_t len); // load data and build graph +void hnsw_approximate_knn(HNSWContext *ctx, VecData *q, int *results, int k); // search KNN results ``` -We have implemented data loading and provided a simplest KNN algorithm. But our implementation can only handle small batches of data(SIFTSMALL dataset), please implement a new approximate KNN algorithm based on the HNSW algorithm so that it can handle large batches of data(SIFT dataset). +We have implemented data loading and provided a simplest KNN algorithm. But our implementation can only handle small batches of data(SIFTSMALL dataset), please implement a new approximate KNN algorithm based on the HNSW algorithm so that it can handle large batches of data(SIFT dataset) efficiently. ### 3. Data Download Please visit http://corpus-texmex.irisa.fr/ + +TODO: We should provide a script to download datasets automatically diff --git a/src/hnsw.c b/src/hnsw.c index fc8ab67..386f115 100644 --- a/src/hnsw.c +++ b/src/hnsw.c @@ -3,18 +3,18 @@ HNSWContext *hnsw_init_context(const char *filename, size_t dim, size_t len) { - HNSWContext *ctx = (HNSWContext *) malloc(sizeof(HNSWContext)); + HNSWContext *ctx = (HNSWContext *)malloc(sizeof(HNSWContext)); ctx->dim = dim; ctx->len = len; - ctx->data = (VecData *) malloc(sizeof(VecData) * len); + ctx->data = (VecData *)malloc(sizeof(VecData) * len); // init file context - FileContext* f_ctx = init_file_context(filename); + FileContext *f_ctx = init_file_context(filename); for (int i = 0; i < len; i++) { ctx->data[i].id = i; - ctx->data[i].vec = (float *) malloc(sizeof(float) * GLOBAL_DIM); + ctx->data[i].vec = (float *)malloc(sizeof(float) * GLOBAL_DIM); read_vec_data(f_ctx, ctx->data[i].vec); } diff --git a/src/test.c b/src/test.c index 5517b33..88bf23e 100644 --- a/src/test.c +++ b/src/test.c @@ -21,7 +21,7 @@ float cal_recall_value(int *results, int *trueset, int k) } } - return ((float) cnt) / ((float) k); + return ((float)cnt) / ((float)k); } int main(int argc, char *argv[]) @@ -43,10 +43,10 @@ int main(int argc, char *argv[]) // TODO: report time cost here - HNSWContext* ctx = hnsw_init_context(argv[1], GLOBAL_DIM, data_size); + HNSWContext *ctx = hnsw_init_context(argv[1], GLOBAL_DIM, data_size); printf("HNSW context initialized!\n"); VecData q_vec; - q_vec.vec = (float *) malloc(sizeof(float) * GLOBAL_DIM); + q_vec.vec = (float *)malloc(sizeof(float) * GLOBAL_DIM); int q_results[100]; int true_results[100]; float total_recall_values = 0.0; @@ -59,7 +59,7 @@ int main(int argc, char *argv[]) } // report recall value - printf("Recall value: %.4f\n", total_recall_values / ((float) query_size)); + printf("Recall value: %.4f\n", total_recall_values / ((float)query_size)); free_file_context(query_file_ctx); free_file_context(gt_file_ctx);