Browse Source

update the params for better understanding

main
PatricZhao 3 years ago
committed by GitHub
parent
commit
0f8f169892
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 4 additions and 4 deletions
  1. +4
    -4
      code/gemm_tile.cpp

+ 4
- 4
code/gemm_tile.cpp View File

@ -18,8 +18,8 @@ double gpu_kernel(float *A, float *B, float *C,
int BLOCK, sycl::queue &q) {
// define the workgroup size and mapping
auto grid_rows = M / tileX;
auto grid_cols = N / tileY;
auto grid_rows = M / tileY;
auto grid_cols = N / tileX;
auto local_ndrange = range<2>(BLOCK, BLOCK);
auto global_ndrange = range<2>(grid_rows, grid_cols);
@ -29,8 +29,8 @@ double gpu_kernel(float *A, float *B, float *C,
h.parallel_for<class k_name_t>(
sycl::nd_range<2>(global_ndrange, local_ndrange), [=](sycl::nd_item<2> index) {
int row = tileX * index.get_global_id(0);
int col = tileY * index.get_global_id(1);
int row = tileY * index.get_global_id(0);
int col = tileX * index.get_global_id(1);
float sum[tileY][tileX] = {0.0f};
float subA[tileY] = {0.0f};

Loading…
Cancel
Save