You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

183 lines
5.3 KiB

/*
* trans.c - Matrix transpose B = A^T
*
* Each transpose function must have a prototype of the form:
* void trans(int M, int N, int A[N][M], int B[M][N]);
*
* A transpose function is evaluated by counting the number of misses
* on a 1KB direct mapped cache with a block size of 32 bytes.
*/
#include <stdio.h>
#include "cachelab.h"
int is_transpose(int M, int N, int A[N][M], int B[M][N]);
/*
* transpose_submit - This is the solution transpose function that you
* will be graded on for Part B of the assignment. Do not change
* the description string "Transpose submission", as the driver
* searches for that string to identify the transpose function to
* be graded.
*/
char transpose_submit_desc[] = "Transpose submission";
void transpose_submit(int M, int N, int A[N][M], int B[M][N])
{
int i, j, k, l, a0, a1, a2, a3, a4, a5, a6, a7;
if(M == 32){
for (i = 0; i < N; i+=8) {
for (j = 0; j < M; j+=8) {
if(i == j){
for(k = i ;k < i + 8 && k<N;k++){
a0 = A[k][j];
a1 = A[k][j+1];
a2 = A[k][j+2];
a3 = A[k][j+3];
a4 = A[k][j+4];
a5 = A[k][j+5];
a6 = A[k][j+6];
a7 = A[k][j+7];
B[j][k] = a0;
B[j+1][k] = a1;
B[j+2][k] = a2;
B[j+3][k] = a3;
B[j+4][k] = a4;
B[j+5][k] = a5;
B[j+6][k] = a6;
B[j+7][k] = a7;
}
}
else{
for(k = i ;k < i + 8 && k<N;k++){
for(l = j ; l < j + 8 && l < M;l++)
B[l][k] = A[k][l];
}
}
}
}
}
else if(M == 64){
for (i = 0; i < N; i += 8) {
for (j = 0; j < M; j += 8) {
for (k = i; k < i + 4; k++) {
a0 = A[k][j];
a1 = A[k][j + 1];
a2 = A[k][j + 2];
a3 = A[k][j + 3];
a4 = A[k][j + 4];
a5 = A[k][j + 5];
a6 = A[k][j + 6];
a7 = A[k][j + 7];
B[j][k] = a0;
B[j + 1][k] = a1;
B[j + 2][k] = a2;
B[j + 3][k] = a3;
B[j][k + 4] = a4;
B[j + 1][k + 4] = a5;
B[j + 2][k + 4] = a6;
B[j + 3][k + 4] = a7;
}
for (l = j + 4; l < j + 8; l++) {
a4 = A[i + 4][l - 4]; // A left-down col
a5 = A[i + 5][l - 4];
a6 = A[i + 6][l - 4];
a7 = A[i + 7][l - 4];
a0 = B[l - 4][i + 4]; // B right-above line
a1 = B[l - 4][i + 5];
a2 = B[l - 4][i + 6];
a3 = B[l - 4][i + 7];
B[l - 4][i + 4] = a4; // set B right-above line
B[l - 4][i + 5] = a5;
B[l - 4][i + 6] = a6;
B[l - 4][i + 7] = a7;
B[l][i] = a0; // set B left-down line
B[l][i + 1] = a1;
B[l][i + 2] = a2;
B[l][i + 3] = a3;
B[l][i + 4] = A[i + 4][l];
B[l][i + 5] = A[i + 5][l];
B[l][i + 6] = A[i + 6][l];
B[l][i + 7] = A[i + 7][l];
}
}
}
}
else if (M == 61){
for (i = 0; i < N; i += 16) {
for (j = 0; j < M; j += 16) {
for (k = i; k < i + 16&& k<N; k++) {
for(l =j ;l<j+16&&l<M;l++)
B[l][k] = A[k][l];
}
}
}
}
}
/*
* You can define additional transpose functions below. We've defined
* a simple one below to help you get started.
*/
/*
* trans - A simple baseline transpose function, not optimized for the cache.
*/
char trans_desc[] = "Simple row-wise scan transpose";
void trans(int M, int N, int A[N][M], int B[M][N])
{
int i, j, tmp;
for (i = 0; i < N; i++) {
for (j = 0; j < M; j++) {
tmp = A[i][j];
B[j][i] = tmp;
}
}
}
/*
* registerFunctions - This function registers your transpose
* functions with the driver. At runtime, the driver will
* evaluate each of the registered functions and summarize their
* performance. This is a handy way to experiment with different
* transpose strategies.
*/
void registerFunctions()
{
/* Register your solution function */
registerTransFunction(transpose_submit, transpose_submit_desc);
/* Register any additional transpose functions */
registerTransFunction(trans, trans_desc);
}
/*
* is_transpose - This helper function checks if B is the transpose of
* A. You can check the correctness of your transpose by calling
* it before returning from the transpose function.
*/
int is_transpose(int M, int N, int A[N][M], int B[M][N])
{
int i, j;
for (i = 0; i < N; i++) {
for (j = 0; j < M; ++j) {
if (A[i][j] != B[j][i]) {
return 0;
}
}
}
return 1;
}