@ -0,0 +1,30 @@ | |||||
# | |||||
# Students' Makefile for the Malloc Lab | |||||
# | |||||
TEAM = bovik | |||||
VERSION = 1 | |||||
HANDINDIR = /afs/cs.cmu.edu/academic/class/15213-f01/malloclab/handin | |||||
CC = gcc | |||||
CFLAGS = -Wall -O2 -m32 | |||||
OBJS = mdriver.o mm.o memlib.o fsecs.o fcyc.o clock.o ftimer.o | |||||
mdriver: $(OBJS) | |||||
$(CC) $(CFLAGS) -o mdriver $(OBJS) | |||||
mdriver.o: mdriver.c fsecs.h fcyc.h clock.h memlib.h config.h mm.h | |||||
memlib.o: memlib.c memlib.h | |||||
mm.o: mm.c mm.h memlib.h | |||||
fsecs.o: fsecs.c fsecs.h config.h | |||||
fcyc.o: fcyc.c fcyc.h | |||||
ftimer.o: ftimer.c ftimer.h config.h | |||||
clock.o: clock.c clock.h | |||||
handin: | |||||
cp mm.c $(HANDINDIR)/$(TEAM)-$(VERSION)-mm.c | |||||
clean: | |||||
rm -f *~ *.o mdriver | |||||
@ -0,0 +1,52 @@ | |||||
##################################################################### | |||||
# CS:APP Malloc Lab | |||||
# Handout files for students | |||||
# | |||||
# Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. | |||||
# May not be used, modified, or copied without permission. | |||||
# | |||||
###################################################################### | |||||
*********** | |||||
Main Files: | |||||
*********** | |||||
mm.{c,h} | |||||
Your solution malloc package. mm.c is the file that you | |||||
will be handing in, and is the only file you should modify. | |||||
mdriver.c | |||||
The malloc driver that tests your mm.c file | |||||
short{1,2}-bal.rep | |||||
Two tiny tracefiles to help you get started. | |||||
Makefile | |||||
Builds the driver | |||||
********************************** | |||||
Other support files for the driver | |||||
********************************** | |||||
config.h Configures the malloc lab driver | |||||
fsecs.{c,h} Wrapper function for the different timer packages | |||||
clock.{c,h} Routines for accessing the Pentium and Alpha cycle counters | |||||
fcyc.{c,h} Timer functions based on cycle counters | |||||
ftimer.{c,h} Timer functions based on interval timers and gettimeofday() | |||||
memlib.{c,h} Models the heap and sbrk function | |||||
******************************* | |||||
Building and running the driver | |||||
******************************* | |||||
To build the driver, type "make" to the shell. | |||||
To run the driver on a tiny test trace: | |||||
unix> mdriver -V -f short1-bal.rep | |||||
The -V option prints out helpful tracing and summary information. | |||||
To get a list of the driver flags: | |||||
unix> mdriver -h | |||||
@ -0,0 +1,279 @@ | |||||
/* | |||||
* clock.c - Routines for using the cycle counters on x86, | |||||
* Alpha, and Sparc boxes. | |||||
* | |||||
* Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. | |||||
* May not be used, modified, or copied without permission. | |||||
*/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#include <unistd.h> | |||||
#include <sys/times.h> | |||||
#include "clock.h" | |||||
/******************************************************* | |||||
* Machine dependent functions | |||||
* | |||||
* Note: the constants __i386__ and __alpha | |||||
* are set by GCC when it calls the C preprocessor | |||||
* You can verify this for yourself using gcc -v. | |||||
*******************************************************/ | |||||
#if defined(__i386__) | |||||
/******************************************************* | |||||
* Pentium versions of start_counter() and get_counter() | |||||
*******************************************************/ | |||||
/* $begin x86cyclecounter */ | |||||
/* Initialize the cycle counter */ | |||||
static unsigned cyc_hi = 0; | |||||
static unsigned cyc_lo = 0; | |||||
/* Set *hi and *lo to the high and low order bits of the cycle counter. | |||||
Implementation requires assembly code to use the rdtsc instruction. */ | |||||
void access_counter(unsigned *hi, unsigned *lo) | |||||
{ | |||||
asm("rdtsc; movl %%edx,%0; movl %%eax,%1" /* Read cycle counter */ | |||||
: "=r" (*hi), "=r" (*lo) /* and move results to */ | |||||
: /* No input */ /* the two outputs */ | |||||
: "%edx", "%eax"); | |||||
} | |||||
/* Record the current value of the cycle counter. */ | |||||
void start_counter() | |||||
{ | |||||
access_counter(&cyc_hi, &cyc_lo); | |||||
} | |||||
/* Return the number of cycles since the last call to start_counter. */ | |||||
double get_counter() | |||||
{ | |||||
unsigned ncyc_hi, ncyc_lo; | |||||
unsigned hi, lo, borrow; | |||||
double result; | |||||
/* Get cycle counter */ | |||||
access_counter(&ncyc_hi, &ncyc_lo); | |||||
/* Do double precision subtraction */ | |||||
lo = ncyc_lo - cyc_lo; | |||||
borrow = lo > ncyc_lo; | |||||
hi = ncyc_hi - cyc_hi - borrow; | |||||
result = (double) hi * (1 << 30) * 4 + lo; | |||||
if (result < 0) { | |||||
fprintf(stderr, "Error: counter returns neg value: %.0f\n", result); | |||||
} | |||||
return result; | |||||
} | |||||
/* $end x86cyclecounter */ | |||||
#elif defined(__alpha) | |||||
/**************************************************** | |||||
* Alpha versions of start_counter() and get_counter() | |||||
***************************************************/ | |||||
/* Initialize the cycle counter */ | |||||
static unsigned cyc_hi = 0; | |||||
static unsigned cyc_lo = 0; | |||||
/* Use Alpha cycle timer to compute cycles. Then use | |||||
measured clock speed to compute seconds | |||||
*/ | |||||
/* | |||||
* counterRoutine is an array of Alpha instructions to access | |||||
* the Alpha's processor cycle counter. It uses the rpcc | |||||
* instruction to access the counter. This 64 bit register is | |||||
* divided into two parts. The lower 32 bits are the cycles | |||||
* used by the current process. The upper 32 bits are wall | |||||
* clock cycles. These instructions read the counter, and | |||||
* convert the lower 32 bits into an unsigned int - this is the | |||||
* user space counter value. | |||||
* NOTE: The counter has a very limited time span. With a | |||||
* 450MhZ clock the counter can time things for about 9 | |||||
* seconds. */ | |||||
static unsigned int counterRoutine[] = | |||||
{ | |||||
0x601fc000u, | |||||
0x401f0000u, | |||||
0x6bfa8001u | |||||
}; | |||||
/* Cast the above instructions into a function. */ | |||||
static unsigned int (*counter)(void)= (void *)counterRoutine; | |||||
void start_counter() | |||||
{ | |||||
/* Get cycle counter */ | |||||
cyc_hi = 0; | |||||
cyc_lo = counter(); | |||||
} | |||||
double get_counter() | |||||
{ | |||||
unsigned ncyc_hi, ncyc_lo; | |||||
unsigned hi, lo, borrow; | |||||
double result; | |||||
ncyc_lo = counter(); | |||||
ncyc_hi = 0; | |||||
lo = ncyc_lo - cyc_lo; | |||||
borrow = lo > ncyc_lo; | |||||
hi = ncyc_hi - cyc_hi - borrow; | |||||
result = (double) hi * (1 << 30) * 4 + lo; | |||||
if (result < 0) { | |||||
fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result); | |||||
} | |||||
return result; | |||||
} | |||||
#else | |||||
/**************************************************************** | |||||
* All the other platforms for which we haven't implemented cycle | |||||
* counter routines. Newer models of sparcs (v8plus) have cycle | |||||
* counters that can be accessed from user programs, but since there | |||||
* are still many sparc boxes out there that don't support this, we | |||||
* haven't provided a Sparc version here. | |||||
***************************************************************/ | |||||
void start_counter() | |||||
{ | |||||
printf("ERROR: You are trying to use a start_counter routine in clock.c\n"); | |||||
printf("that has not been implemented yet on this platform.\n"); | |||||
printf("Please choose another timing package in config.h.\n"); | |||||
exit(1); | |||||
} | |||||
double get_counter() | |||||
{ | |||||
printf("ERROR: You are trying to use a get_counter routine in clock.c\n"); | |||||
printf("that has not been implemented yet on this platform.\n"); | |||||
printf("Please choose another timing package in config.h.\n"); | |||||
exit(1); | |||||
} | |||||
#endif | |||||
/******************************* | |||||
* Machine-independent functions | |||||
******************************/ | |||||
double ovhd() | |||||
{ | |||||
/* Do it twice to eliminate cache effects */ | |||||
int i; | |||||
double result; | |||||
for (i = 0; i < 2; i++) { | |||||
start_counter(); | |||||
result = get_counter(); | |||||
} | |||||
return result; | |||||
} | |||||
/* $begin mhz */ | |||||
/* Estimate the clock rate by measuring the cycles that elapse */ | |||||
/* while sleeping for sleeptime seconds */ | |||||
double mhz_full(int verbose, int sleeptime) | |||||
{ | |||||
double rate; | |||||
start_counter(); | |||||
sleep(sleeptime); | |||||
rate = get_counter() / (1e6*sleeptime); | |||||
if (verbose) | |||||
printf("Processor clock rate ~= %.1f MHz\n", rate); | |||||
return rate; | |||||
} | |||||
/* $end mhz */ | |||||
/* Version using a default sleeptime */ | |||||
double mhz(int verbose) | |||||
{ | |||||
return mhz_full(verbose, 2); | |||||
} | |||||
/** Special counters that compensate for timer interrupt overhead */ | |||||
static double cyc_per_tick = 0.0; | |||||
#define NEVENT 100 | |||||
#define THRESHOLD 1000 | |||||
#define RECORDTHRESH 3000 | |||||
/* Attempt to see how much time is used by timer interrupt */ | |||||
static void callibrate(int verbose) | |||||
{ | |||||
double oldt; | |||||
struct tms t; | |||||
clock_t oldc; | |||||
int e = 0; | |||||
times(&t); | |||||
oldc = t.tms_utime; | |||||
start_counter(); | |||||
oldt = get_counter(); | |||||
while (e <NEVENT) { | |||||
double newt = get_counter(); | |||||
if (newt-oldt >= THRESHOLD) { | |||||
clock_t newc; | |||||
times(&t); | |||||
newc = t.tms_utime; | |||||
if (newc > oldc) { | |||||
double cpt = (newt-oldt)/(newc-oldc); | |||||
if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH) | |||||
cyc_per_tick = cpt; | |||||
/* | |||||
if (verbose) | |||||
printf("Saw event lasting %.0f cycles and %d ticks. Ratio = %f\n", | |||||
newt-oldt, (int) (newc-oldc), cpt); | |||||
*/ | |||||
e++; | |||||
oldc = newc; | |||||
} | |||||
oldt = newt; | |||||
} | |||||
} | |||||
if (verbose) | |||||
printf("Setting cyc_per_tick to %f\n", cyc_per_tick); | |||||
} | |||||
static clock_t start_tick = 0; | |||||
void start_comp_counter() | |||||
{ | |||||
struct tms t; | |||||
if (cyc_per_tick == 0.0) | |||||
callibrate(0); | |||||
times(&t); | |||||
start_tick = t.tms_utime; | |||||
start_counter(); | |||||
} | |||||
double get_comp_counter() | |||||
{ | |||||
double time = get_counter(); | |||||
double ctime; | |||||
struct tms t; | |||||
clock_t ticks; | |||||
times(&t); | |||||
ticks = t.tms_utime - start_tick; | |||||
ctime = time - ticks*cyc_per_tick; | |||||
/* | |||||
printf("Measured %.0f cycles. Ticks = %d. Corrected %.0f cycles\n", | |||||
time, (int) ticks, ctime); | |||||
*/ | |||||
return ctime; | |||||
} | |||||
@ -0,0 +1,22 @@ | |||||
/* Routines for using cycle counter */ | |||||
/* Start the counter */ | |||||
void start_counter(); | |||||
/* Get # cycles since counter started */ | |||||
double get_counter(); | |||||
/* Measure overhead for counter */ | |||||
double ovhd(); | |||||
/* Determine clock rate of processor (using a default sleeptime) */ | |||||
double mhz(int verbose); | |||||
/* Determine clock rate of processor, having more control over accuracy */ | |||||
double mhz_full(int verbose, int sleeptime); | |||||
/** Special counters that compensate for timer interrupt overhead */ | |||||
void start_comp_counter(); | |||||
double get_comp_counter(); |
@ -0,0 +1,72 @@ | |||||
#ifndef __CONFIG_H_ | |||||
#define __CONFIG_H_ | |||||
/* | |||||
* config.h - malloc lab configuration file | |||||
* | |||||
* Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. | |||||
* May not be used, modified, or copied without permission. | |||||
*/ | |||||
/* | |||||
* This is the default path where the driver will look for the | |||||
* default tracefiles. You can override it at runtime with the -t flag. | |||||
*/ | |||||
#define TRACEDIR "/afs/cs/project/ics2/im/labs/malloclab/traces/" | |||||
/* | |||||
* This is the list of default tracefiles in TRACEDIR that the driver | |||||
* will use for testing. Modify this if you want to add or delete | |||||
* traces from the driver's test suite. For example, if you don't want | |||||
* your students to implement realloc, you can delete the last two | |||||
* traces. | |||||
*/ | |||||
#define DEFAULT_TRACEFILES \ | |||||
"amptjp-bal.rep",\ | |||||
"cccp-bal.rep",\ | |||||
"cp-decl-bal.rep",\ | |||||
"expr-bal.rep",\ | |||||
"coalescing-bal.rep",\ | |||||
"random-bal.rep",\ | |||||
"random2-bal.rep",\ | |||||
"binary-bal.rep",\ | |||||
"binary2-bal.rep",\ | |||||
"realloc-bal.rep",\ | |||||
"realloc2-bal.rep" | |||||
/* | |||||
* This constant gives the estimated performance of the libc malloc | |||||
* package using our traces on some reference system, typically the | |||||
* same kind of system the students use. Its purpose is to cap the | |||||
* contribution of throughput to the performance index. Once the | |||||
* students surpass the AVG_LIBC_THRUPUT, they get no further benefit | |||||
* to their score. This deters students from building extremely fast, | |||||
* but extremely stupid malloc packages. | |||||
*/ | |||||
#define AVG_LIBC_THRUPUT 600E3 /* 600 Kops/sec */ | |||||
/* | |||||
* This constant determines the contributions of space utilization | |||||
* (UTIL_WEIGHT) and throughput (1 - UTIL_WEIGHT) to the performance | |||||
* index. | |||||
*/ | |||||
#define UTIL_WEIGHT .60 | |||||
/* | |||||
* Alignment requirement in bytes (either 4 or 8) | |||||
*/ | |||||
#define ALIGNMENT 8 | |||||
/* | |||||
* Maximum heap size in bytes | |||||
*/ | |||||
#define MAX_HEAP (20*(1<<20)) /* 20 MB */ | |||||
/***************************************************************************** | |||||
* Set exactly one of these USE_xxx constants to "1" to select a timing method | |||||
*****************************************************************************/ | |||||
#define USE_FCYC 0 /* cycle counter w/K-best scheme (x86 & Alpha only) */ | |||||
#define USE_ITIMER 0 /* interval timer (any Unix box) */ | |||||
#define USE_GETTOD 1 /* gettimeofday (any Unix box) */ | |||||
#endif /* __CONFIG_H */ |
@ -0,0 +1,251 @@ | |||||
/* | |||||
* fcyc.c - Estimate the time (in CPU cycles) used by a function f | |||||
* | |||||
* Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. | |||||
* May not be used, modified, or copied without permission. | |||||
* | |||||
* Uses the cycle timer routines in clock.c to estimate the | |||||
* the time in CPU cycles for a function f. | |||||
*/ | |||||
#include <stdlib.h> | |||||
#include <sys/times.h> | |||||
#include <stdio.h> | |||||
#include "fcyc.h" | |||||
#include "clock.h" | |||||
/* Default values */ | |||||
#define K 3 /* Value of K in K-best scheme */ | |||||
#define MAXSAMPLES 20 /* Give up after MAXSAMPLES */ | |||||
#define EPSILON 0.01 /* K samples should be EPSILON of each other*/ | |||||
#define COMPENSATE 0 /* 1-> try to compensate for clock ticks */ | |||||
#define CLEAR_CACHE 0 /* Clear cache before running test function */ | |||||
#define CACHE_BYTES (1<<19) /* Max cache size in bytes */ | |||||
#define CACHE_BLOCK 32 /* Cache block size in bytes */ | |||||
static int kbest = K; | |||||
static int maxsamples = MAXSAMPLES; | |||||
static double epsilon = EPSILON; | |||||
static int compensate = COMPENSATE; | |||||
static int clear_cache = CLEAR_CACHE; | |||||
static int cache_bytes = CACHE_BYTES; | |||||
static int cache_block = CACHE_BLOCK; | |||||
static int *cache_buf = NULL; | |||||
static double *values = NULL; | |||||
static int samplecount = 0; | |||||
/* for debugging only */ | |||||
#define KEEP_VALS 0 | |||||
#define KEEP_SAMPLES 0 | |||||
#if KEEP_SAMPLES | |||||
static double *samples = NULL; | |||||
#endif | |||||
/* | |||||
* init_sampler - Start new sampling process | |||||
*/ | |||||
static void init_sampler() | |||||
{ | |||||
if (values) | |||||
free(values); | |||||
values = calloc(kbest, sizeof(double)); | |||||
#if KEEP_SAMPLES | |||||
if (samples) | |||||
free(samples); | |||||
/* Allocate extra for wraparound analysis */ | |||||
samples = calloc(maxsamples+kbest, sizeof(double)); | |||||
#endif | |||||
samplecount = 0; | |||||
} | |||||
/* | |||||
* add_sample - Add new sample | |||||
*/ | |||||
static void add_sample(double val) | |||||
{ | |||||
int pos = 0; | |||||
if (samplecount < kbest) { | |||||
pos = samplecount; | |||||
values[pos] = val; | |||||
} else if (val < values[kbest-1]) { | |||||
pos = kbest-1; | |||||
values[pos] = val; | |||||
} | |||||
#if KEEP_SAMPLES | |||||
samples[samplecount] = val; | |||||
#endif | |||||
samplecount++; | |||||
/* Insertion sort */ | |||||
while (pos > 0 && values[pos-1] > values[pos]) { | |||||
double temp = values[pos-1]; | |||||
values[pos-1] = values[pos]; | |||||
values[pos] = temp; | |||||
pos--; | |||||
} | |||||
} | |||||
/* | |||||
* has_converged- Have kbest minimum measurements converged within epsilon? | |||||
*/ | |||||
static int has_converged() | |||||
{ | |||||
return | |||||
(samplecount >= kbest) && | |||||
((1 + epsilon)*values[0] >= values[kbest-1]); | |||||
} | |||||
/* | |||||
* clear - Code to clear cache | |||||
*/ | |||||
static volatile int sink = 0; | |||||
static void clear() | |||||
{ | |||||
int x = sink; | |||||
int *cptr, *cend; | |||||
int incr = cache_block/sizeof(int); | |||||
if (!cache_buf) { | |||||
cache_buf = malloc(cache_bytes); | |||||
if (!cache_buf) { | |||||
fprintf(stderr, "Fatal error. Malloc returned null when trying to clear cache\n"); | |||||
exit(1); | |||||
} | |||||
} | |||||
cptr = (int *) cache_buf; | |||||
cend = cptr + cache_bytes/sizeof(int); | |||||
while (cptr < cend) { | |||||
x += *cptr; | |||||
cptr += incr; | |||||
} | |||||
sink = x; | |||||
} | |||||
/* | |||||
* fcyc - Use K-best scheme to estimate the running time of function f | |||||
*/ | |||||
double fcyc(test_funct f, void *argp) | |||||
{ | |||||
double result; | |||||
init_sampler(); | |||||
if (compensate) { | |||||
do { | |||||
double cyc; | |||||
if (clear_cache) | |||||
clear(); | |||||
start_comp_counter(); | |||||
f(argp); | |||||
cyc = get_comp_counter(); | |||||
add_sample(cyc); | |||||
} while (!has_converged() && samplecount < maxsamples); | |||||
} else { | |||||
do { | |||||
double cyc; | |||||
if (clear_cache) | |||||
clear(); | |||||
start_counter(); | |||||
f(argp); | |||||
cyc = get_counter(); | |||||
add_sample(cyc); | |||||
} while (!has_converged() && samplecount < maxsamples); | |||||
} | |||||
#ifdef DEBUG | |||||
{ | |||||
int i; | |||||
printf(" %d smallest values: [", kbest); | |||||
for (i = 0; i < kbest; i++) | |||||
printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", "); | |||||
} | |||||
#endif | |||||
result = values[0]; | |||||
#if !KEEP_VALS | |||||
free(values); | |||||
values = NULL; | |||||
#endif | |||||
return result; | |||||
} | |||||
/************************************************************* | |||||
* Set the various parameters used by the measurement routines | |||||
************************************************************/ | |||||
/* | |||||
* set_fcyc_clear_cache - When set, will run code to clear cache | |||||
* before each measurement. | |||||
* Default = 0 | |||||
*/ | |||||
void set_fcyc_clear_cache(int clear) | |||||
{ | |||||
clear_cache = clear; | |||||
} | |||||
/* | |||||
* set_fcyc_cache_size - Set size of cache to use when clearing cache | |||||
* Default = 1<<19 (512KB) | |||||
*/ | |||||
void set_fcyc_cache_size(int bytes) | |||||
{ | |||||
if (bytes != cache_bytes) { | |||||
cache_bytes = bytes; | |||||
if (cache_buf) { | |||||
free(cache_buf); | |||||
cache_buf = NULL; | |||||
} | |||||
} | |||||
} | |||||
/* | |||||
* set_fcyc_cache_block - Set size of cache block | |||||
* Default = 32 | |||||
*/ | |||||
void set_fcyc_cache_block(int bytes) { | |||||
cache_block = bytes; | |||||
} | |||||
/* | |||||
* set_fcyc_compensate- When set, will attempt to compensate for | |||||
* timer interrupt overhead | |||||
* Default = 0 | |||||
*/ | |||||
void set_fcyc_compensate(int compensate_arg) | |||||
{ | |||||
compensate = compensate_arg; | |||||
} | |||||
/* | |||||
* set_fcyc_k - Value of K in K-best measurement scheme | |||||
* Default = 3 | |||||
*/ | |||||
void set_fcyc_k(int k) | |||||
{ | |||||
kbest = k; | |||||
} | |||||
/* | |||||
* set_fcyc_maxsamples - Maximum number of samples attempting to find | |||||
* K-best within some tolerance. | |||||
* When exceeded, just return best sample found. | |||||
* Default = 20 | |||||
*/ | |||||
void set_fcyc_maxsamples(int maxsamples_arg) | |||||
{ | |||||
maxsamples = maxsamples_arg; | |||||
} | |||||
/* | |||||
* set_fcyc_epsilon - Tolerance required for K-best | |||||
* Default = 0.01 | |||||
*/ | |||||
void set_fcyc_epsilon(double epsilon_arg) | |||||
{ | |||||
epsilon = epsilon_arg; | |||||
} | |||||
@ -0,0 +1,68 @@ | |||||
/* | |||||
* fcyc.h - prototypes for the routines in fcyc.c that estimate the | |||||
* time in CPU cycles used by a test function f | |||||
* | |||||
* Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. | |||||
* May not be used, modified, or copied without permission. | |||||
* | |||||
*/ | |||||
/* The test function takes a generic pointer as input */ | |||||
typedef void (*test_funct)(void *); | |||||
/* Compute number of cycles used by test function f */ | |||||
double fcyc(test_funct f, void* argp); | |||||
/********************************************************* | |||||
* Set the various parameters used by measurement routines | |||||
*********************************************************/ | |||||
/* | |||||
* set_fcyc_clear_cache - When set, will run code to clear cache | |||||
* before each measurement. | |||||
* Default = 0 | |||||
*/ | |||||
void set_fcyc_clear_cache(int clear); | |||||
/* | |||||
* set_fcyc_cache_size - Set size of cache to use when clearing cache | |||||
* Default = 1<<19 (512KB) | |||||
*/ | |||||
void set_fcyc_cache_size(int bytes); | |||||
/* | |||||
* set_fcyc_cache_block - Set size of cache block | |||||
* Default = 32 | |||||
*/ | |||||
void set_fcyc_cache_block(int bytes); | |||||
/* | |||||
* set_fcyc_compensate- When set, will attempt to compensate for | |||||
* timer interrupt overhead | |||||
* Default = 0 | |||||
*/ | |||||
void set_fcyc_compensate(int compensate_arg); | |||||
/* | |||||
* set_fcyc_k - Value of K in K-best measurement scheme | |||||
* Default = 3 | |||||
*/ | |||||
void set_fcyc_k(int k); | |||||
/* | |||||
* set_fcyc_maxsamples - Maximum number of samples attempting to find | |||||
* K-best within some tolerance. | |||||
* When exceeded, just return best sample found. | |||||
* Default = 20 | |||||
*/ | |||||
void set_fcyc_maxsamples(int maxsamples_arg); | |||||
/* | |||||
* set_fcyc_epsilon - Tolerance required for K-best | |||||
* Default = 0.01 | |||||
*/ | |||||
void set_fcyc_epsilon(double epsilon_arg); | |||||
@ -0,0 +1,57 @@ | |||||
/**************************** | |||||
* High-level timing wrappers | |||||
****************************/ | |||||
#include <stdio.h> | |||||
#include "fsecs.h" | |||||
#include "fcyc.h" | |||||
#include "clock.h" | |||||
#include "ftimer.h" | |||||
#include "config.h" | |||||
static double Mhz; /* estimated CPU clock frequency */ | |||||
extern int verbose; /* -v option in mdriver.c */ | |||||
/* | |||||
* init_fsecs - initialize the timing package | |||||
*/ | |||||
void init_fsecs(void) | |||||
{ | |||||
Mhz = 0; /* keep gcc -Wall happy */ | |||||
#if USE_FCYC | |||||
if (verbose) | |||||
printf("Measuring performance with a cycle counter.\n"); | |||||
/* set key parameters for the fcyc package */ | |||||
set_fcyc_maxsamples(20); | |||||
set_fcyc_clear_cache(1); | |||||
set_fcyc_compensate(1); | |||||
set_fcyc_epsilon(0.01); | |||||
set_fcyc_k(3); | |||||
Mhz = mhz(verbose > 0); | |||||
#elif USE_ITIMER | |||||
if (verbose) | |||||
printf("Measuring performance with the interval timer.\n"); | |||||
#elif USE_GETTOD | |||||
if (verbose) | |||||
printf("Measuring performance with gettimeofday().\n"); | |||||
#endif | |||||
} | |||||
/* | |||||
* fsecs - Return the running time of a function f (in seconds) | |||||
*/ | |||||
double fsecs(fsecs_test_funct f, void *argp) | |||||
{ | |||||
#if USE_FCYC | |||||
double cycles = fcyc(f, argp); | |||||
return cycles/(Mhz*1e6); | |||||
#elif USE_ITIMER | |||||
return ftimer_itimer(f, argp, 10); | |||||
#elif USE_GETTOD | |||||
return ftimer_gettod(f, argp, 10); | |||||
#endif | |||||
} | |||||
@ -0,0 +1,4 @@ | |||||
typedef void (*fsecs_test_funct)(void *); | |||||
void init_fsecs(void); | |||||
double fsecs(fsecs_test_funct f, void *argp); |
@ -0,0 +1,106 @@ | |||||
/* | |||||
* ftimer.c - Estimate the time (in seconds) used by a function f | |||||
* | |||||
* Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. | |||||
* May not be used, modified, or copied without permission. | |||||
* | |||||
* Function timers that estimate the running time (in seconds) of a function f. | |||||
* ftimer_itimer: version that uses the interval timer | |||||
* ftimer_gettod: version that uses gettimeofday | |||||
*/ | |||||
#include <stdio.h> | |||||
#include <sys/time.h> | |||||
#include "ftimer.h" | |||||
/* function prototypes */ | |||||
static void init_etime(void); | |||||
static double get_etime(void); | |||||
/* | |||||
* ftimer_itimer - Use the interval timer to estimate the running time | |||||
* of f(argp). Return the average of n runs. | |||||
*/ | |||||
double ftimer_itimer(ftimer_test_funct f, void *argp, int n) | |||||
{ | |||||
double start, tmeas; | |||||
int i; | |||||
init_etime(); | |||||
start = get_etime(); | |||||
for (i = 0; i < n; i++) | |||||
f(argp); | |||||
tmeas = get_etime() - start; | |||||
return tmeas / n; | |||||
} | |||||
/* | |||||
* ftimer_gettod - Use gettimeofday to estimate the running time of | |||||
* f(argp). Return the average of n runs. | |||||
*/ | |||||
double ftimer_gettod(ftimer_test_funct f, void *argp, int n) | |||||
{ | |||||
int i; | |||||
struct timeval stv, etv; | |||||
double diff; | |||||
gettimeofday(&stv, NULL); | |||||
for (i = 0; i < n; i++) | |||||
f(argp); | |||||
gettimeofday(&etv,NULL); | |||||
diff = 1E3*(etv.tv_sec - stv.tv_sec) + 1E-3*(etv.tv_usec-stv.tv_usec); | |||||
diff /= n; | |||||
return (1E-3*diff); | |||||
} | |||||
/* | |||||
* Routines for manipulating the Unix interval timer | |||||
*/ | |||||
/* The initial value of the interval timer */ | |||||
#define MAX_ETIME 86400 | |||||
/* static variables that hold the initial value of the interval timer */ | |||||
static struct itimerval first_u; /* user time */ | |||||
static struct itimerval first_r; /* real time */ | |||||
static struct itimerval first_p; /* prof time*/ | |||||
/* init the timer */ | |||||
static void init_etime(void) | |||||
{ | |||||
first_u.it_interval.tv_sec = 0; | |||||
first_u.it_interval.tv_usec = 0; | |||||
first_u.it_value.tv_sec = MAX_ETIME; | |||||
first_u.it_value.tv_usec = 0; | |||||
setitimer(ITIMER_VIRTUAL, &first_u, NULL); | |||||
first_r.it_interval.tv_sec = 0; | |||||
first_r.it_interval.tv_usec = 0; | |||||
first_r.it_value.tv_sec = MAX_ETIME; | |||||
first_r.it_value.tv_usec = 0; | |||||
setitimer(ITIMER_REAL, &first_r, NULL); | |||||
first_p.it_interval.tv_sec = 0; | |||||
first_p.it_interval.tv_usec = 0; | |||||
first_p.it_value.tv_sec = MAX_ETIME; | |||||
first_p.it_value.tv_usec = 0; | |||||
setitimer(ITIMER_PROF, &first_p, NULL); | |||||
} | |||||
/* return elapsed real seconds since call to init_etime */ | |||||
static double get_etime(void) { | |||||
struct itimerval v_curr; | |||||
struct itimerval r_curr; | |||||
struct itimerval p_curr; | |||||
getitimer(ITIMER_VIRTUAL, &v_curr); | |||||
getitimer(ITIMER_REAL,&r_curr); | |||||
getitimer(ITIMER_PROF,&p_curr); | |||||
return (double) ((first_p.it_value.tv_sec - r_curr.it_value.tv_sec) + | |||||
(first_p.it_value.tv_usec - r_curr.it_value.tv_usec)*1e-6); | |||||
} | |||||
@ -0,0 +1,14 @@ | |||||
/* | |||||
* Function timers | |||||
*/ | |||||
typedef void (*ftimer_test_funct)(void *); | |||||
/* Estimate the running time of f(argp) using the Unix interval timer. | |||||
Return the average of n runs */ | |||||
double ftimer_itimer(ftimer_test_funct f, void *argp, int n); | |||||
/* Estimate the running time of f(argp) using gettimeofday | |||||
Return the average of n runs */ | |||||
double ftimer_gettod(ftimer_test_funct f, void *argp, int n); | |||||
@ -0,0 +1,101 @@ | |||||
/* | |||||
* memlib.c - a module that simulates the memory system. Needed because it | |||||
* allows us to interleave calls from the student's malloc package | |||||
* with the system's malloc package in libc. | |||||
*/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#include <assert.h> | |||||
#include <unistd.h> | |||||
#include <sys/mman.h> | |||||
#include <string.h> | |||||
#include <errno.h> | |||||
#include "memlib.h" | |||||
#include "config.h" | |||||
/* private variables */ | |||||
static char *mem_start_brk; /* points to first byte of heap */ | |||||
static char *mem_brk; /* points to last byte of heap */ | |||||
static char *mem_max_addr; /* largest legal heap address */ | |||||
/* | |||||
* mem_init - initialize the memory system model | |||||
*/ | |||||
void mem_init(void) | |||||
{ | |||||
/* allocate the storage we will use to model the available VM */ | |||||
if ((mem_start_brk = (char *)malloc(MAX_HEAP)) == NULL) { | |||||
fprintf(stderr, "mem_init_vm: malloc error\n"); | |||||
exit(1); | |||||
} | |||||
mem_max_addr = mem_start_brk + MAX_HEAP; /* max legal heap address */ | |||||
mem_brk = mem_start_brk; /* heap is empty initially */ | |||||
} | |||||
/* | |||||
* mem_deinit - free the storage used by the memory system model | |||||
*/ | |||||
void mem_deinit(void) | |||||
{ | |||||
free(mem_start_brk); | |||||
} | |||||
/* | |||||
* mem_reset_brk - reset the simulated brk pointer to make an empty heap | |||||
*/ | |||||
void mem_reset_brk() | |||||
{ | |||||
mem_brk = mem_start_brk; | |||||
} | |||||
/* | |||||
* mem_sbrk - simple model of the sbrk function. Extends the heap | |||||
* by incr bytes and returns the start address of the new area. In | |||||
* this model, the heap cannot be shrunk. | |||||
*/ | |||||
void *mem_sbrk(int incr) | |||||
{ | |||||
char *old_brk = mem_brk; | |||||
if ( (incr < 0) || ((mem_brk + incr) > mem_max_addr)) { | |||||
errno = ENOMEM; | |||||
fprintf(stderr, "ERROR: mem_sbrk failed. Ran out of memory...\n"); | |||||
return (void *)-1; | |||||
} | |||||
mem_brk += incr; | |||||
return (void *)old_brk; | |||||
} | |||||
/* | |||||
* mem_heap_lo - return address of the first heap byte | |||||
*/ | |||||
void *mem_heap_lo() | |||||
{ | |||||
return (void *)mem_start_brk; | |||||
} | |||||
/* | |||||
* mem_heap_hi - return address of last heap byte | |||||
*/ | |||||
void *mem_heap_hi() | |||||
{ | |||||
return (void *)(mem_brk - 1); | |||||
} | |||||
/* | |||||
* mem_heapsize() - returns the heap size in bytes | |||||
*/ | |||||
size_t mem_heapsize() | |||||
{ | |||||
return (size_t)(mem_brk - mem_start_brk); | |||||
} | |||||
/* | |||||
* mem_pagesize() - returns the page size of the system | |||||
*/ | |||||
size_t mem_pagesize() | |||||
{ | |||||
return (size_t)getpagesize(); | |||||
} |
@ -0,0 +1,11 @@ | |||||
#include <unistd.h> | |||||
void mem_init(void); | |||||
void mem_deinit(void); | |||||
void *mem_sbrk(int incr); | |||||
void mem_reset_brk(void); | |||||
void *mem_heap_lo(void); | |||||
void *mem_heap_hi(void); | |||||
size_t mem_heapsize(void); | |||||
size_t mem_pagesize(void); | |||||
@ -0,0 +1,110 @@ | |||||
/* | |||||
* mm-naive.c - The fastest, least memory-efficient malloc package. | |||||
* | |||||
* In this naive approach, a block is allocated by simply incrementing | |||||
* the brk pointer. A block is pure payload. There are no headers or | |||||
* footers. Blocks are never coalesced or reused. Realloc is | |||||
* implemented directly using mm_malloc and mm_free. | |||||
* | |||||
* NOTE TO STUDENTS: Replace this header comment with your own header | |||||
* comment that gives a high level description of your solution. | |||||
*/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#include <assert.h> | |||||
#include <unistd.h> | |||||
#include <string.h> | |||||
#include "mm.h" | |||||
#include "memlib.h" | |||||
/********************************************************* | |||||
* NOTE TO STUDENTS: Before you do anything else, please | |||||
* provide your team information in the following struct. | |||||
********************************************************/ | |||||
team_t team = { | |||||
/* Team name */ | |||||
"ateam", | |||||
/* First member's full name */ | |||||
"Harry Bovik", | |||||
/* First member's email address */ | |||||
"bovik@cs.cmu.edu", | |||||
/* Second member's full name (leave blank if none) */ | |||||
"", | |||||
/* Second member's email address (leave blank if none) */ | |||||
"" | |||||
}; | |||||
/* single word (4) or double word (8) alignment */ | |||||
#define ALIGNMENT 8 | |||||
/* rounds up to the nearest multiple of ALIGNMENT */ | |||||
#define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~0x7) | |||||
#define SIZE_T_SIZE (ALIGN(sizeof(size_t))) | |||||
/* | |||||
* mm_init - initialize the malloc package. | |||||
*/ | |||||
int mm_init(void) | |||||
{ | |||||
return 0; | |||||
} | |||||
/* | |||||
* mm_malloc - Allocate a block by incrementing the brk pointer. | |||||
* Always allocate a block whose size is a multiple of the alignment. | |||||
*/ | |||||
void *mm_malloc(size_t size) | |||||
{ | |||||
int newsize = ALIGN(size + SIZE_T_SIZE); | |||||
void *p = mem_sbrk(newsize); | |||||
if (p == (void *)-1) | |||||
return NULL; | |||||
else { | |||||
*(size_t *)p = size; | |||||
return (void *)((char *)p + SIZE_T_SIZE); | |||||
} | |||||
} | |||||
/* | |||||
* mm_free - Freeing a block does nothing. | |||||
*/ | |||||
void mm_free(void *ptr) | |||||
{ | |||||
} | |||||
/* | |||||
* mm_realloc - Implemented simply in terms of mm_malloc and mm_free | |||||
*/ | |||||
void *mm_realloc(void *ptr, size_t size) | |||||
{ | |||||
void *oldptr = ptr; | |||||
void *newptr; | |||||
size_t copySize; | |||||
newptr = mm_malloc(size); | |||||
if (newptr == NULL) | |||||
return NULL; | |||||
copySize = *(size_t *)((char *)oldptr - SIZE_T_SIZE); | |||||
if (size < copySize) | |||||
copySize = size; | |||||
memcpy(newptr, oldptr, copySize); | |||||
mm_free(oldptr); | |||||
return newptr; | |||||
} | |||||
@ -0,0 +1,23 @@ | |||||
#include <stdio.h> | |||||
extern int mm_init (void); | |||||
extern void *mm_malloc (size_t size); | |||||
extern void mm_free (void *ptr); | |||||
extern void *mm_realloc(void *ptr, size_t size); | |||||
/* | |||||
* Students work in teams of one or two. Teams enter their team name, | |||||
* personal names and login IDs in a struct of this | |||||
* type in their bits.c file. | |||||
*/ | |||||
typedef struct { | |||||
char *teamname; /* ID1+ID2 or ID1 */ | |||||
char *name1; /* full name of first member */ | |||||
char *id1; /* login ID of first member */ | |||||
char *name2; /* full name of second member (if any) */ | |||||
char *id2; /* login ID of second member */ | |||||
} team_t; | |||||
extern team_t team; | |||||
@ -0,0 +1,16 @@ | |||||
20000 | |||||
6 | |||||
12 | |||||
1 | |||||
a 0 2040 | |||||
a 1 2040 | |||||
f 1 | |||||
a 2 48 | |||||
a 3 4072 | |||||
f 3 | |||||
a 4 4072 | |||||
f 0 | |||||
f 2 | |||||
a 5 4072 | |||||
f 4 | |||||
f 5 |
@ -0,0 +1,16 @@ | |||||
20000 | |||||
6 | |||||
12 | |||||
1 | |||||
a 0 2040 | |||||
a 1 4010 | |||||
a 2 48 | |||||
a 3 4072 | |||||
a 4 4072 | |||||
a 5 4072 | |||||
f 0 | |||||
f 1 | |||||
f 2 | |||||
f 3 | |||||
f 4 | |||||
f 5 |
@ -0,0 +1,188 @@ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#include <unistd.h> | |||||
#include <sys/times.h> | |||||
#include <string.h> | |||||
#include "clock.h" | |||||
/* Keep track of most recent reading of cycle counter */ | |||||
static unsigned cyc_hi = 0; | |||||
static unsigned cyc_lo = 0; | |||||
void access_counter(unsigned *hi, unsigned *lo) | |||||
{ | |||||
/* Get cycle counter */ | |||||
asm("rdtsc; movl %%edx,%0; movl %%eax,%1" | |||||
: "=r" (*hi), "=r" (*lo) | |||||
: /* No input */ | |||||
: "%edx", "%eax"); | |||||
} | |||||
void start_counter() | |||||
{ | |||||
access_counter(&cyc_hi, &cyc_lo); | |||||
} | |||||
double get_counter() | |||||
{ | |||||
unsigned ncyc_hi, ncyc_lo; | |||||
unsigned hi, lo, borrow; | |||||
double result; | |||||
/* Get cycle counter */ | |||||
access_counter(&ncyc_hi, &ncyc_lo); | |||||
/* Do double precision subtraction */ | |||||
lo = ncyc_lo - cyc_lo; | |||||
borrow = lo > ncyc_lo; | |||||
hi = ncyc_hi - cyc_hi - borrow; | |||||
result = (double) hi * (1 << 30) * 4 + lo; | |||||
if (result < 0) { | |||||
fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result); | |||||
} | |||||
return result; | |||||
} | |||||
double ovhd() | |||||
{ | |||||
/* Do it twice to eliminate cache effects */ | |||||
int i; | |||||
double result; | |||||
for (i = 0; i < 2; i++) { | |||||
start_counter(); | |||||
result = get_counter(); | |||||
} | |||||
return result; | |||||
} | |||||
/* Keep track of clock speed */ | |||||
double cpu_ghz = 0.0; | |||||
/* Get megahertz from /etc/proc */ | |||||
#define MAXBUF 512 | |||||
double core_mhz(int verbose) { | |||||
static char buf[MAXBUF]; | |||||
FILE *fp = fopen("/proc/cpuinfo", "r"); | |||||
cpu_ghz = 0.0; | |||||
if (!fp) { | |||||
fprintf(stderr, "Can't open /proc/cpuinfo to get clock information\n"); | |||||
cpu_ghz = 1.0; | |||||
return cpu_ghz * 1000.0; | |||||
} | |||||
while (fgets(buf, MAXBUF, fp)) { | |||||
if (strstr(buf, "cpu MHz")) { | |||||
double cpu_mhz = 0.0; | |||||
sscanf(buf, "cpu MHz\t: %lf", &cpu_mhz); | |||||
cpu_ghz = cpu_mhz / 1000.0; | |||||
break; | |||||
} | |||||
} | |||||
fclose(fp); | |||||
if (cpu_ghz == 0.0) { | |||||
fprintf(stderr, "Can't open /proc/cpuinfo to get clock information\n"); | |||||
cpu_ghz = 1.0; | |||||
return cpu_ghz * 1000.0; | |||||
} | |||||
if (verbose) { | |||||
printf("Processor Clock Rate ~= %.4f GHz (extracted from file)\n", cpu_ghz); | |||||
} | |||||
return cpu_ghz * 1000; | |||||
} | |||||
double mhz(int verbose) { | |||||
double val = core_mhz(verbose); | |||||
return val; | |||||
} | |||||
/* Determine clock rate by measuring cycles | |||||
elapsed while sleeping for sleeptime seconds */ | |||||
double mhz_full(int verbose, int sleeptime) | |||||
{ | |||||
double rate; | |||||
start_counter(); | |||||
sleep(sleeptime); | |||||
rate = get_counter()/(1e6*sleeptime); | |||||
if (verbose) | |||||
printf("Processor Clock Rate ~= %.1f MHz\n", rate); | |||||
return rate; | |||||
} | |||||
///* Version using a default sleeptime */ | |||||
//double mhz(int verbose) | |||||
//{ | |||||
// return mhz_full(verbose, 2); | |||||
//} | |||||
/** Special counters that compensate for timer interrupt overhead */ | |||||
static double cyc_per_tick = 0.0; | |||||
#define NEVENT 100 | |||||
#define THRESHOLD 1000 | |||||
#define RECORDTHRESH 3000 | |||||
/* Attempt to see how much time is used by timer interrupt */ | |||||
static void callibrate(int verbose) | |||||
{ | |||||
double oldt; | |||||
struct tms t; | |||||
clock_t oldc; | |||||
int e = 0; | |||||
times(&t); | |||||
oldc = t.tms_utime; | |||||
start_counter(); | |||||
oldt = get_counter(); | |||||
while (e <NEVENT) { | |||||
double newt = get_counter(); | |||||
if (newt-oldt >= THRESHOLD) { | |||||
clock_t newc; | |||||
times(&t); | |||||
newc = t.tms_utime; | |||||
if (newc > oldc) { | |||||
double cpt = (newt-oldt)/(newc-oldc); | |||||
if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH) | |||||
cyc_per_tick = cpt; | |||||
/* | |||||
if (verbose) | |||||
printf("Saw event lasting %.0f cycles and %d ticks. Ratio = %f\n", | |||||
newt-oldt, (int) (newc-oldc), cpt); | |||||
*/ | |||||
e++; | |||||
oldc = newc; | |||||
} | |||||
oldt = newt; | |||||
} | |||||
} | |||||
if (verbose) | |||||
printf("Setting cyc_per_tick to %f\n", cyc_per_tick); | |||||
} | |||||
static clock_t start_tick = 0; | |||||
void start_comp_counter() { | |||||
struct tms t; | |||||
if (cyc_per_tick == 0.0) | |||||
callibrate(1); | |||||
times(&t); | |||||
start_tick = t.tms_utime; | |||||
start_counter(); | |||||
} | |||||
double get_comp_counter() { | |||||
double time = get_counter(); | |||||
double ctime; | |||||
struct tms t; | |||||
clock_t ticks; | |||||
times(&t); | |||||
ticks = t.tms_utime - start_tick; | |||||
ctime = time - ticks*cyc_per_tick; | |||||
/* | |||||
printf("Measured %.0f cycles. Ticks = %d. Corrected %.0f cycles\n", | |||||
time, (int) ticks, ctime); | |||||
*/ | |||||
return ctime; | |||||
} |
@ -0,0 +1,23 @@ | |||||
/* Routines for using cycle counter */ | |||||
/* Start the counter */ | |||||
void start_counter(); | |||||
/* Get # cycles since counter started */ | |||||
double get_counter(); | |||||
/* Measure overhead for counter */ | |||||
double ovhd(); | |||||
/* Determine clock rate of processor */ | |||||
double mhz(int verbose); | |||||
/* Determine clock rate of processor, having more control over accuracy */ | |||||
double mhz_full(int verbose, int sleeptime); | |||||
/** Special counters that compensate for timer interrupt overhead */ | |||||
void start_comp_counter(); | |||||
double get_comp_counter(); |
@ -0,0 +1,299 @@ | |||||
/* Compute time used by a function f that takes two integer args */ | |||||
#include <stdlib.h> | |||||
#include <sys/times.h> | |||||
#include <stdio.h> | |||||
#include "clock.h" | |||||
#include "fcyc2.h" | |||||
static double *values = NULL; | |||||
int samplecount = 0; | |||||
#define KEEP_VALS 1 | |||||
#define KEEP_SAMPLES 1 | |||||
#if KEEP_SAMPLES | |||||
double *samples = NULL; | |||||
#endif | |||||
/* Start new sampling process */ | |||||
static void init_sampler(int k, int maxsamples) | |||||
{ | |||||
if (values) | |||||
free(values); | |||||
values = calloc(k, sizeof(double)); | |||||
#if KEEP_SAMPLES | |||||
if (samples) | |||||
free(samples); | |||||
/* Allocate extra for wraparound analysis */ | |||||
samples = calloc(maxsamples+k, sizeof(double)); | |||||
#endif | |||||
samplecount = 0; | |||||
} | |||||
/* Add new sample. */ | |||||
void add_sample(double val, int k) | |||||
{ | |||||
int pos = 0; | |||||
if (samplecount < k) { | |||||
pos = samplecount; | |||||
values[pos] = val; | |||||
} else if (val < values[k-1]) { | |||||
pos = k-1; | |||||
values[pos] = val; | |||||
} | |||||
#if KEEP_SAMPLES | |||||
samples[samplecount] = val; | |||||
#endif | |||||
samplecount++; | |||||
/* Insertion sort */ | |||||
while (pos > 0 && values[pos-1] > values[pos]) { | |||||
double temp = values[pos-1]; | |||||
values[pos-1] = values[pos]; | |||||
values[pos] = temp; | |||||
pos--; | |||||
} | |||||
} | |||||
/* Get current minimum */ | |||||
double get_min() | |||||
{ | |||||
return values[0]; | |||||
} | |||||
/* What is relative error for kth smallest sample */ | |||||
double err(int k) | |||||
{ | |||||
if (samplecount < k) | |||||
return 1000.0; | |||||
return (values[k-1] - values[0])/values[0]; | |||||
} | |||||
/* Have k minimum measurements converged within epsilon? */ | |||||
int has_converged(int k_arg, double epsilon_arg, int maxsamples) | |||||
{ | |||||
if ((samplecount >= k_arg) && | |||||
((1 + epsilon_arg)*values[0] >= values[k_arg-1])) | |||||
return samplecount; | |||||
if ((samplecount >= maxsamples)) | |||||
return -1; | |||||
return 0; | |||||
} | |||||
/* Code to clear cache */ | |||||
/* Pentium III has 512K L2 cache, which is 128K ints */ | |||||
#define ASIZE (1 << 17) | |||||
/* Cache block size is 32 bytes */ | |||||
#define STRIDE 8 | |||||
static int stuff[ASIZE]; | |||||
static int sink; | |||||
static void clear() | |||||
{ | |||||
int x = sink; | |||||
int i; | |||||
for (i = 0; i < ASIZE; i += STRIDE) | |||||
x += stuff[i]; | |||||
sink = x; | |||||
} | |||||
double fcyc2_full(test_funct f, int param1, int param2, int clear_cache, | |||||
int k, double epsilon, int maxsamples, int compensate) | |||||
{ | |||||
double result; | |||||
init_sampler(k, maxsamples); | |||||
if (compensate) { | |||||
do { | |||||
double cyc; | |||||
if (clear_cache) | |||||
clear(); | |||||
f(param1, param2); /* warm cache */ | |||||
start_comp_counter(); | |||||
f(param1, param2); | |||||
cyc = get_comp_counter(); | |||||
add_sample(cyc, k); | |||||
} while (!has_converged(k, epsilon, maxsamples) && samplecount < maxsamples); | |||||
} else { | |||||
do { | |||||
double cyc; | |||||
if (clear_cache) | |||||
clear(); | |||||
f(param1, param2); /* warm cache */ | |||||
start_counter(); | |||||
f(param1, param2); | |||||
cyc = get_counter(); | |||||
add_sample(cyc, k); | |||||
} while (!has_converged(k, epsilon, maxsamples) && samplecount < maxsamples); | |||||
} | |||||
#ifdef DEBUG | |||||
{ | |||||
int i; | |||||
printf(" %d smallest values: [", k); | |||||
for (i = 0; i < k; i++) | |||||
printf("%.0f%s", values[i], i==k-1 ? "]\n" : ", "); | |||||
} | |||||
#endif | |||||
result = values[0]; | |||||
#if !KEEP_VALS | |||||
free(values); | |||||
values = NULL; | |||||
#endif | |||||
return result; | |||||
} | |||||
double fcyc2(test_funct f, int param1, int param2, int clear_cache) | |||||
{ | |||||
return fcyc2_full(f, param1, param2, clear_cache, 3, 0.01, 500, 0); | |||||
} | |||||
/******************* Version that uses gettimeofday *************/ | |||||
static double Mhz = 0.0; | |||||
#include <sys/time.h> | |||||
static struct timeval tstart; | |||||
/* Record current time */ | |||||
void start_counter_tod() | |||||
{ | |||||
if (Mhz == 0) | |||||
Mhz = mhz_full(0, 10); | |||||
gettimeofday(&tstart, NULL); | |||||
} | |||||
/* Get number of seconds since last call to start_timer */ | |||||
double get_counter_tod() | |||||
{ | |||||
struct timeval tfinish; | |||||
long sec, usec; | |||||
gettimeofday(&tfinish, NULL); | |||||
sec = tfinish.tv_sec - tstart.tv_sec; | |||||
usec = tfinish.tv_usec - tstart.tv_usec; | |||||
return (1e6 * sec + usec)*Mhz; | |||||
} | |||||
/** Special counters that compensate for timer interrupt overhead */ | |||||
static double cyc_per_tick = 0.0; | |||||
#define NEVENT 100 | |||||
#define THRESHOLD 1000 | |||||
#define RECORDTHRESH 3000 | |||||
/* Attempt to see how much time is used by timer interrupt */ | |||||
static void callibrate(int verbose) | |||||
{ | |||||
double oldt; | |||||
struct tms t; | |||||
clock_t oldc; | |||||
int e = 0; | |||||
times(&t); | |||||
oldc = t.tms_utime; | |||||
start_counter_tod(); | |||||
oldt = get_counter_tod(); | |||||
while (e <NEVENT) { | |||||
double newt = get_counter_tod(); | |||||
if (newt-oldt >= THRESHOLD) { | |||||
clock_t newc; | |||||
times(&t); | |||||
newc = t.tms_utime; | |||||
if (newc > oldc) { | |||||
double cpt = (newt-oldt)/(newc-oldc); | |||||
if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH) | |||||
cyc_per_tick = cpt; | |||||
/* | |||||
if (verbose) | |||||
printf("Saw event lasting %.0f cycles and %d ticks. Ratio = %f\n", | |||||
newt-oldt, (int) (newc-oldc), cpt); | |||||
*/ | |||||
e++; | |||||
oldc = newc; | |||||
} | |||||
oldt = newt; | |||||
} | |||||
} | |||||
if (verbose) | |||||
printf("Setting cyc_per_tick to %f\n", cyc_per_tick); | |||||
} | |||||
static clock_t start_tick = 0; | |||||
void start_comp_counter_tod() { | |||||
struct tms t; | |||||
if (cyc_per_tick == 0.0) | |||||
callibrate(0); | |||||
times(&t); | |||||
start_tick = t.tms_utime; | |||||
start_counter_tod(); | |||||
} | |||||
double get_comp_counter_tod() { | |||||
double time = get_counter_tod(); | |||||
double ctime; | |||||
struct tms t; | |||||
clock_t ticks; | |||||
times(&t); | |||||
ticks = t.tms_utime - start_tick; | |||||
ctime = time - ticks*cyc_per_tick; | |||||
/* | |||||
printf("Measured %.0f cycles. Ticks = %d. Corrected %.0f cycles\n", | |||||
time, (int) ticks, ctime); | |||||
*/ | |||||
return ctime; | |||||
} | |||||
double fcyc2_full_tod(test_funct f, int param1, int param2, int clear_cache, | |||||
int k, double epsilon, int maxsamples, int compensate) | |||||
{ | |||||
double result; | |||||
init_sampler(k, maxsamples); | |||||
if (compensate) { | |||||
do { | |||||
double cyc; | |||||
if (clear_cache) | |||||
clear(); | |||||
start_comp_counter_tod(); | |||||
f(param1, param2); | |||||
cyc = get_comp_counter_tod(); | |||||
add_sample(cyc, k); | |||||
} while (!has_converged(k, epsilon, maxsamples) && samplecount < maxsamples); | |||||
} else { | |||||
do { | |||||
double cyc; | |||||
if (clear_cache) | |||||
clear(); | |||||
start_counter_tod(); | |||||
f(param1, param2); | |||||
cyc = get_counter_tod(); | |||||
add_sample(cyc, k); | |||||
} while (!has_converged(k, epsilon, maxsamples) && samplecount < maxsamples); | |||||
} | |||||
#ifdef DEBUG | |||||
{ | |||||
int i; | |||||
printf(" %d smallest values: [", k); | |||||
for (i = 0; i < k; i++) | |||||
printf("%.0f%s", values[i], i==k-1 ? "]\n" : ", "); | |||||
} | |||||
#endif | |||||
result = values[0]; | |||||
#if !KEEP_VALS | |||||
free(values); | |||||
values = NULL; | |||||
#endif | |||||
return result; | |||||
} | |||||
double fcyc2_tod(test_funct f, int param1, int param2, int clear_cache) | |||||
{ | |||||
return fcyc2_full_tod(f, param1, param2, clear_cache, 3, 0.01, 20, 0); | |||||
} | |||||
@ -0,0 +1,41 @@ | |||||
/* Find number of cycles used by function that takes 2 arguments */ | |||||
/* Function to be tested takes two integer arguments */ | |||||
typedef int (*test_funct)(int, int); | |||||
/* Compute time used by function f */ | |||||
double fcyc2(test_funct f, int param1, int param2, int clear_cache); | |||||
/********* These routines are used to help with the analysis *********/ | |||||
/* | |||||
Parameters: | |||||
k: How many samples must be within epsilon for convergence | |||||
epsilon: What is tolerance | |||||
maxsamples: How many samples until give up? | |||||
*/ | |||||
/* Full version of fcyc with control over parameters */ | |||||
double fcyc2_full(test_funct f, int param1, int param2, int clear_cache, | |||||
int k, double epsilon, int maxsamples, int compensate); | |||||
/* Get current minimum */ | |||||
double get_min(); | |||||
/* What is convergence status for k minimum measurements within epsilon | |||||
Returns 0 if not converged, #samples if converged, and -1 if can't | |||||
reach convergence | |||||
*/ | |||||
int has_converged(int k, double epsilon, int maxsamples); | |||||
/* What is error of current measurement */ | |||||
double err(int k); | |||||
/************* Try other clocking methods *****************/ | |||||
/* Full version that uses the time of day clock */ | |||||
double fcyc2_full_tod(test_funct f, int param1, int param2, int clear_cache, | |||||
int k, double epsilon, int maxsamples, int compensate); | |||||
double fcyc2_tod(test_funct f, int param1, int param2, int clear_cache); |
@ -0,0 +1,116 @@ | |||||
/* mountain.c - Generate the memory mountain. */ | |||||
/* $begin mountainmain */ | |||||
#include <stdlib.h> | |||||
#include <stdio.h> | |||||
#include "fcyc2.h" /* measurement routines */ | |||||
#include "clock.h" /* routines to access the cycle counter */ | |||||
#define MINBYTES (1 << 14) /* First working set size */ | |||||
#define MAXBYTES (1 << 27) /* Last working set size */ | |||||
#define MAXSTRIDE 15 /* Stride x8 bytes */ | |||||
#define MAXELEMS MAXBYTES/sizeof(long) | |||||
/* $begin mountainfuns */ | |||||
long data[MAXELEMS]; /* The global array we'll be traversing */ | |||||
/* $end mountainfuns */ | |||||
/* $end mountainmain */ | |||||
void init_data(long *data, int n); | |||||
int test(int elems, int stride); | |||||
double run(int size, int stride, double Mhz); | |||||
/* $begin mountainmain */ | |||||
int main() | |||||
{ | |||||
int size; /* Working set size (in bytes) */ | |||||
int stride; /* Stride (in array elements) */ | |||||
double Mhz; /* Clock frequency */ | |||||
FILE *fp = NULL; | |||||
fp = fopen("mountain.txt", "w+"); | |||||
init_data(data, MAXELEMS); /* Initialize each element in data */ | |||||
Mhz = mhz(0); /* Estimate the clock frequency */ | |||||
/* $end mountainmain */ | |||||
/* Not shown in the text */ | |||||
fprintf(fp, "Clock frequency is approx. %.1f MHz\n", Mhz); | |||||
fprintf(fp, "Memory mountain (MB/sec)\n"); | |||||
fprintf(fp, "\t"); | |||||
for (stride = 1; stride <= MAXSTRIDE; stride++) | |||||
fprintf(fp, "s%d\t", stride); | |||||
fprintf(fp, "\n"); | |||||
/* $begin mountainmain */ | |||||
for (size = MAXBYTES; size >= MINBYTES; size >>= 1) { | |||||
/* $end mountainmain */ | |||||
/* Not shown in the text */ | |||||
if (size > (1 << 20)) | |||||
fprintf(fp, "%dm\t", size / (1 << 20)); | |||||
else | |||||
fprintf(fp, "%dk\t", size / 1024); | |||||
/* $begin mountainmain */ | |||||
for (stride = 1; stride <= MAXSTRIDE; stride++) { | |||||
fprintf(fp, "%.0f\t", run(size, stride, Mhz)); | |||||
} | |||||
fprintf(fp, "\n"); | |||||
} | |||||
fclose(fp); | |||||
exit(0); | |||||
} | |||||
/* $end mountainmain */ | |||||
/* init_data - initializes the array */ | |||||
void init_data(long *data, int n) | |||||
{ | |||||
int i; | |||||
for (i = 0; i < n; i++) | |||||
data[i] = i; | |||||
} | |||||
/* $begin mountainfuns */ | |||||
/* test - Iterate over first "elems" elements of array "data" with | |||||
* stride of "stride", using 4x4 loop unrolling. | |||||
*/ | |||||
int test(int elems, int stride) | |||||
{ | |||||
long i, sx2 = stride*2, sx3 = stride*3, sx4 = stride*4; | |||||
long acc0 = 0, acc1 = 0, acc2 = 0, acc3 = 0; | |||||
long length = elems; | |||||
long limit = length - sx4; | |||||
/* Combine 4 elements at a time */ | |||||
for (i = 0; i < limit; i += sx4) { | |||||
acc0 = acc0 + data[i]; | |||||
acc1 = acc1 + data[i+stride]; | |||||
acc2 = acc2 + data[i+sx2]; | |||||
acc3 = acc3 + data[i+sx3]; | |||||
} | |||||
/* Finish any remaining elements */ | |||||
for (; i < length; i += stride) { | |||||
acc0 = acc0 + data[i]; | |||||
} | |||||
return ((acc0 + acc1) + (acc2 + acc3)); | |||||
} | |||||
/* run - Run test(elems, stride) and return read throughput (MB/s). | |||||
* "size" is in bytes, "stride" is in array elements, and Mhz is | |||||
* CPU clock frequency in Mhz. | |||||
*/ | |||||
double run(int size, int stride, double Mhz) | |||||
{ | |||||
double cycles; | |||||
int elems = size / sizeof(double); | |||||
test(elems, stride); /* Warm up the cache */ //line:mem:warmup | |||||
cycles = fcyc2(test, elems, stride, 0); /* Call test(elems,stride) */ //line:mem:fcyc | |||||
return (size / stride) / (cycles / Mhz); /* Convert cycles to MB/s */ //line:mem:bwcompute | |||||
} | |||||
/* $end mountainfuns */ | |||||
@ -0,0 +1,45 @@ | |||||
// | |||||
// Created by GentleCold on 2022/11/7. | |||||
// | |||||
#ifndef CSAPPLEARNING_MOUNTAIN_H | |||||
#define CSAPPLEARNING_MOUNTAIN_H | |||||
#include <stdio.h> | |||||
#define MAXELEMS 10000 | |||||
long data[MAXELEMS]; | |||||
int read(int elems, int stride) { | |||||
long i, sx2 = stride * 2, sx3 = stride * 3, sx4 = stride * 4; | |||||
long acc0 = 0, acc1 = 0, acc2 = 0, acc3 = 0; | |||||
long length = elems; | |||||
long limit = length - sx4; | |||||
for (i = 0; i < limit; i += sx4) { | |||||
acc0 += data[i]; | |||||
acc1 += data[i + stride]; | |||||
acc2 += data[i + sx2]; | |||||
acc3 += data[i + sx3]; | |||||
} | |||||
for (; i < length; i += stride) { | |||||
acc0 += data[i]; | |||||
} | |||||
return ((acc0 + acc1) + (acc2 + acc3)); | |||||
} | |||||
double run(int size, int stride, double Mhz) { | |||||
double cycles; | |||||
int elems = size / sizeof(double); | |||||
read(elems, stride); | |||||
cycles = fcyc2(); | |||||
} | |||||
int mountain() { | |||||
} | |||||
#endif //CSAPPLEARNING_MOUNTAIN_H |