diff --git a/code/gemm_tile.cpp b/code/gemm_tile.cpp index 44c4ebc..1af1215 100644 --- a/code/gemm_tile.cpp +++ b/code/gemm_tile.cpp @@ -18,8 +18,8 @@ double gpu_kernel(float *A, float *B, float *C, int BLOCK, sycl::queue &q) { // define the workgroup size and mapping - auto grid_rows = M / tileX; - auto grid_cols = N / tileY; + auto grid_rows = M / tileY; + auto grid_cols = N / tileX; auto local_ndrange = range<2>(BLOCK, BLOCK); auto global_ndrange = range<2>(grid_rows, grid_cols); @@ -29,8 +29,8 @@ double gpu_kernel(float *A, float *B, float *C, h.parallel_for( sycl::nd_range<2>(global_ndrange, local_ndrange), [=](sycl::nd_item<2> index) { - int row = tileX * index.get_global_id(0); - int col = tileY * index.get_global_id(1); + int row = tileY * index.get_global_id(0); + int col = tileX * index.get_global_id(1); float sum[tileY][tileX] = {0.0f}; float subA[tileY] = {0.0f};