Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

183 wiersze
5.3 KiB

7 miesięcy temu
  1. /*
  2. * trans.c - Matrix transpose B = A^T
  3. *
  4. * Each transpose function must have a prototype of the form:
  5. * void trans(int M, int N, int A[N][M], int B[M][N]);
  6. *
  7. * A transpose function is evaluated by counting the number of misses
  8. * on a 1KB direct mapped cache with a block size of 32 bytes.
  9. */
  10. #include <stdio.h>
  11. #include "cachelab.h"
  12. int is_transpose(int M, int N, int A[N][M], int B[M][N]);
  13. /*
  14. * transpose_submit - This is the solution transpose function that you
  15. * will be graded on for Part B of the assignment. Do not change
  16. * the description string "Transpose submission", as the driver
  17. * searches for that string to identify the transpose function to
  18. * be graded.
  19. */
  20. char transpose_submit_desc[] = "Transpose submission";
  21. void transpose_submit(int M, int N, int A[N][M], int B[M][N])
  22. {
  23. int i, j, k, l, a0, a1, a2, a3, a4, a5, a6, a7;
  24. if(M == 32){
  25. for (i = 0; i < N; i+=8) {
  26. for (j = 0; j < M; j+=8) {
  27. if(i == j){
  28. for(k = i ;k < i + 8 && k<N;k++){
  29. a0 = A[k][j];
  30. a1 = A[k][j+1];
  31. a2 = A[k][j+2];
  32. a3 = A[k][j+3];
  33. a4 = A[k][j+4];
  34. a5 = A[k][j+5];
  35. a6 = A[k][j+6];
  36. a7 = A[k][j+7];
  37. B[j][k] = a0;
  38. B[j+1][k] = a1;
  39. B[j+2][k] = a2;
  40. B[j+3][k] = a3;
  41. B[j+4][k] = a4;
  42. B[j+5][k] = a5;
  43. B[j+6][k] = a6;
  44. B[j+7][k] = a7;
  45. }
  46. }
  47. else{
  48. for(k = i ;k < i + 8 && k<N;k++){
  49. for(l = j ; l < j + 8 && l < M;l++)
  50. B[l][k] = A[k][l];
  51. }
  52. }
  53. }
  54. }
  55. }
  56. else if(M == 64){
  57. for (i = 0; i < N; i += 8) {
  58. for (j = 0; j < M; j += 8) {
  59. for (k = i; k < i + 4; k++) {
  60. a0 = A[k][j];
  61. a1 = A[k][j + 1];
  62. a2 = A[k][j + 2];
  63. a3 = A[k][j + 3];
  64. a4 = A[k][j + 4];
  65. a5 = A[k][j + 5];
  66. a6 = A[k][j + 6];
  67. a7 = A[k][j + 7];
  68. B[j][k] = a0;
  69. B[j + 1][k] = a1;
  70. B[j + 2][k] = a2;
  71. B[j + 3][k] = a3;
  72. B[j][k + 4] = a4;
  73. B[j + 1][k + 4] = a5;
  74. B[j + 2][k + 4] = a6;
  75. B[j + 3][k + 4] = a7;
  76. }
  77. for (l = j + 4; l < j + 8; l++) {
  78. a4 = A[i + 4][l - 4]; // A left-down col
  79. a5 = A[i + 5][l - 4];
  80. a6 = A[i + 6][l - 4];
  81. a7 = A[i + 7][l - 4];
  82. a0 = B[l - 4][i + 4]; // B right-above line
  83. a1 = B[l - 4][i + 5];
  84. a2 = B[l - 4][i + 6];
  85. a3 = B[l - 4][i + 7];
  86. B[l - 4][i + 4] = a4; // set B right-above line
  87. B[l - 4][i + 5] = a5;
  88. B[l - 4][i + 6] = a6;
  89. B[l - 4][i + 7] = a7;
  90. B[l][i] = a0; // set B left-down line
  91. B[l][i + 1] = a1;
  92. B[l][i + 2] = a2;
  93. B[l][i + 3] = a3;
  94. B[l][i + 4] = A[i + 4][l];
  95. B[l][i + 5] = A[i + 5][l];
  96. B[l][i + 6] = A[i + 6][l];
  97. B[l][i + 7] = A[i + 7][l];
  98. }
  99. }
  100. }
  101. }
  102. else if (M == 61){
  103. for (i = 0; i < N; i += 16) {
  104. for (j = 0; j < M; j += 16) {
  105. for (k = i; k < i + 16&& k<N; k++) {
  106. for(l =j ;l<j+16&&l<M;l++)
  107. B[l][k] = A[k][l];
  108. }
  109. }
  110. }
  111. }
  112. }
  113. /*
  114. * You can define additional transpose functions below. We've defined
  115. * a simple one below to help you get started.
  116. */
  117. /*
  118. * trans - A simple baseline transpose function, not optimized for the cache.
  119. */
  120. char trans_desc[] = "Simple row-wise scan transpose";
  121. void trans(int M, int N, int A[N][M], int B[M][N])
  122. {
  123. int i, j, tmp;
  124. for (i = 0; i < N; i++) {
  125. for (j = 0; j < M; j++) {
  126. tmp = A[i][j];
  127. B[j][i] = tmp;
  128. }
  129. }
  130. }
  131. /*
  132. * registerFunctions - This function registers your transpose
  133. * functions with the driver. At runtime, the driver will
  134. * evaluate each of the registered functions and summarize their
  135. * performance. This is a handy way to experiment with different
  136. * transpose strategies.
  137. */
  138. void registerFunctions()
  139. {
  140. /* Register your solution function */
  141. registerTransFunction(transpose_submit, transpose_submit_desc);
  142. /* Register any additional transpose functions */
  143. registerTransFunction(trans, trans_desc);
  144. }
  145. /*
  146. * is_transpose - This helper function checks if B is the transpose of
  147. * A. You can check the correctness of your transpose by calling
  148. * it before returning from the transpose function.
  149. */
  150. int is_transpose(int M, int N, int A[N][M], int B[M][N])
  151. {
  152. int i, j;
  153. for (i = 0; i < N; i++) {
  154. for (j = 0; j < M; ++j) {
  155. if (A[i][j] != B[j][i]) {
  156. return 0;
  157. }
  158. }
  159. }
  160. return 1;
  161. }