@ -0,0 +1,30 @@ | |||
# | |||
# Students' Makefile for the Malloc Lab | |||
# | |||
TEAM = bovik | |||
VERSION = 1 | |||
HANDINDIR = /afs/cs.cmu.edu/academic/class/15213-f01/malloclab/handin | |||
CC = gcc | |||
CFLAGS = -Wall -O2 -m32 | |||
OBJS = mdriver.o mm.o memlib.o fsecs.o fcyc.o clock.o ftimer.o | |||
mdriver: $(OBJS) | |||
$(CC) $(CFLAGS) -o mdriver $(OBJS) | |||
mdriver.o: mdriver.c fsecs.h fcyc.h clock.h memlib.h config.h mm.h | |||
memlib.o: memlib.c memlib.h | |||
mm.o: mm.c mm.h memlib.h | |||
fsecs.o: fsecs.c fsecs.h config.h | |||
fcyc.o: fcyc.c fcyc.h | |||
ftimer.o: ftimer.c ftimer.h config.h | |||
clock.o: clock.c clock.h | |||
handin: | |||
cp mm.c $(HANDINDIR)/$(TEAM)-$(VERSION)-mm.c | |||
clean: | |||
rm -f *~ *.o mdriver | |||
@ -0,0 +1,52 @@ | |||
##################################################################### | |||
# CS:APP Malloc Lab | |||
# Handout files for students | |||
# | |||
# Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. | |||
# May not be used, modified, or copied without permission. | |||
# | |||
###################################################################### | |||
*********** | |||
Main Files: | |||
*********** | |||
mm.{c,h} | |||
Your solution malloc package. mm.c is the file that you | |||
will be handing in, and is the only file you should modify. | |||
mdriver.c | |||
The malloc driver that tests your mm.c file | |||
short{1,2}-bal.rep | |||
Two tiny tracefiles to help you get started. | |||
Makefile | |||
Builds the driver | |||
********************************** | |||
Other support files for the driver | |||
********************************** | |||
config.h Configures the malloc lab driver | |||
fsecs.{c,h} Wrapper function for the different timer packages | |||
clock.{c,h} Routines for accessing the Pentium and Alpha cycle counters | |||
fcyc.{c,h} Timer functions based on cycle counters | |||
ftimer.{c,h} Timer functions based on interval timers and gettimeofday() | |||
memlib.{c,h} Models the heap and sbrk function | |||
******************************* | |||
Building and running the driver | |||
******************************* | |||
To build the driver, type "make" to the shell. | |||
To run the driver on a tiny test trace: | |||
unix> mdriver -V -f short1-bal.rep | |||
The -V option prints out helpful tracing and summary information. | |||
To get a list of the driver flags: | |||
unix> mdriver -h | |||
@ -0,0 +1,279 @@ | |||
/* | |||
* clock.c - Routines for using the cycle counters on x86, | |||
* Alpha, and Sparc boxes. | |||
* | |||
* Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. | |||
* May not be used, modified, or copied without permission. | |||
*/ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <unistd.h> | |||
#include <sys/times.h> | |||
#include "clock.h" | |||
/******************************************************* | |||
* Machine dependent functions | |||
* | |||
* Note: the constants __i386__ and __alpha | |||
* are set by GCC when it calls the C preprocessor | |||
* You can verify this for yourself using gcc -v. | |||
*******************************************************/ | |||
#if defined(__i386__) | |||
/******************************************************* | |||
* Pentium versions of start_counter() and get_counter() | |||
*******************************************************/ | |||
/* $begin x86cyclecounter */ | |||
/* Initialize the cycle counter */ | |||
static unsigned cyc_hi = 0; | |||
static unsigned cyc_lo = 0; | |||
/* Set *hi and *lo to the high and low order bits of the cycle counter. | |||
Implementation requires assembly code to use the rdtsc instruction. */ | |||
void access_counter(unsigned *hi, unsigned *lo) | |||
{ | |||
asm("rdtsc; movl %%edx,%0; movl %%eax,%1" /* Read cycle counter */ | |||
: "=r" (*hi), "=r" (*lo) /* and move results to */ | |||
: /* No input */ /* the two outputs */ | |||
: "%edx", "%eax"); | |||
} | |||
/* Record the current value of the cycle counter. */ | |||
void start_counter() | |||
{ | |||
access_counter(&cyc_hi, &cyc_lo); | |||
} | |||
/* Return the number of cycles since the last call to start_counter. */ | |||
double get_counter() | |||
{ | |||
unsigned ncyc_hi, ncyc_lo; | |||
unsigned hi, lo, borrow; | |||
double result; | |||
/* Get cycle counter */ | |||
access_counter(&ncyc_hi, &ncyc_lo); | |||
/* Do double precision subtraction */ | |||
lo = ncyc_lo - cyc_lo; | |||
borrow = lo > ncyc_lo; | |||
hi = ncyc_hi - cyc_hi - borrow; | |||
result = (double) hi * (1 << 30) * 4 + lo; | |||
if (result < 0) { | |||
fprintf(stderr, "Error: counter returns neg value: %.0f\n", result); | |||
} | |||
return result; | |||
} | |||
/* $end x86cyclecounter */ | |||
#elif defined(__alpha) | |||
/**************************************************** | |||
* Alpha versions of start_counter() and get_counter() | |||
***************************************************/ | |||
/* Initialize the cycle counter */ | |||
static unsigned cyc_hi = 0; | |||
static unsigned cyc_lo = 0; | |||
/* Use Alpha cycle timer to compute cycles. Then use | |||
measured clock speed to compute seconds | |||
*/ | |||
/* | |||
* counterRoutine is an array of Alpha instructions to access | |||
* the Alpha's processor cycle counter. It uses the rpcc | |||
* instruction to access the counter. This 64 bit register is | |||
* divided into two parts. The lower 32 bits are the cycles | |||
* used by the current process. The upper 32 bits are wall | |||
* clock cycles. These instructions read the counter, and | |||
* convert the lower 32 bits into an unsigned int - this is the | |||
* user space counter value. | |||
* NOTE: The counter has a very limited time span. With a | |||
* 450MhZ clock the counter can time things for about 9 | |||
* seconds. */ | |||
static unsigned int counterRoutine[] = | |||
{ | |||
0x601fc000u, | |||
0x401f0000u, | |||
0x6bfa8001u | |||
}; | |||
/* Cast the above instructions into a function. */ | |||
static unsigned int (*counter)(void)= (void *)counterRoutine; | |||
void start_counter() | |||
{ | |||
/* Get cycle counter */ | |||
cyc_hi = 0; | |||
cyc_lo = counter(); | |||
} | |||
double get_counter() | |||
{ | |||
unsigned ncyc_hi, ncyc_lo; | |||
unsigned hi, lo, borrow; | |||
double result; | |||
ncyc_lo = counter(); | |||
ncyc_hi = 0; | |||
lo = ncyc_lo - cyc_lo; | |||
borrow = lo > ncyc_lo; | |||
hi = ncyc_hi - cyc_hi - borrow; | |||
result = (double) hi * (1 << 30) * 4 + lo; | |||
if (result < 0) { | |||
fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result); | |||
} | |||
return result; | |||
} | |||
#else | |||
/**************************************************************** | |||
* All the other platforms for which we haven't implemented cycle | |||
* counter routines. Newer models of sparcs (v8plus) have cycle | |||
* counters that can be accessed from user programs, but since there | |||
* are still many sparc boxes out there that don't support this, we | |||
* haven't provided a Sparc version here. | |||
***************************************************************/ | |||
void start_counter() | |||
{ | |||
printf("ERROR: You are trying to use a start_counter routine in clock.c\n"); | |||
printf("that has not been implemented yet on this platform.\n"); | |||
printf("Please choose another timing package in config.h.\n"); | |||
exit(1); | |||
} | |||
double get_counter() | |||
{ | |||
printf("ERROR: You are trying to use a get_counter routine in clock.c\n"); | |||
printf("that has not been implemented yet on this platform.\n"); | |||
printf("Please choose another timing package in config.h.\n"); | |||
exit(1); | |||
} | |||
#endif | |||
/******************************* | |||
* Machine-independent functions | |||
******************************/ | |||
double ovhd() | |||
{ | |||
/* Do it twice to eliminate cache effects */ | |||
int i; | |||
double result; | |||
for (i = 0; i < 2; i++) { | |||
start_counter(); | |||
result = get_counter(); | |||
} | |||
return result; | |||
} | |||
/* $begin mhz */ | |||
/* Estimate the clock rate by measuring the cycles that elapse */ | |||
/* while sleeping for sleeptime seconds */ | |||
double mhz_full(int verbose, int sleeptime) | |||
{ | |||
double rate; | |||
start_counter(); | |||
sleep(sleeptime); | |||
rate = get_counter() / (1e6*sleeptime); | |||
if (verbose) | |||
printf("Processor clock rate ~= %.1f MHz\n", rate); | |||
return rate; | |||
} | |||
/* $end mhz */ | |||
/* Version using a default sleeptime */ | |||
double mhz(int verbose) | |||
{ | |||
return mhz_full(verbose, 2); | |||
} | |||
/** Special counters that compensate for timer interrupt overhead */ | |||
static double cyc_per_tick = 0.0; | |||
#define NEVENT 100 | |||
#define THRESHOLD 1000 | |||
#define RECORDTHRESH 3000 | |||
/* Attempt to see how much time is used by timer interrupt */ | |||
static void callibrate(int verbose) | |||
{ | |||
double oldt; | |||
struct tms t; | |||
clock_t oldc; | |||
int e = 0; | |||
times(&t); | |||
oldc = t.tms_utime; | |||
start_counter(); | |||
oldt = get_counter(); | |||
while (e <NEVENT) { | |||
double newt = get_counter(); | |||
if (newt-oldt >= THRESHOLD) { | |||
clock_t newc; | |||
times(&t); | |||
newc = t.tms_utime; | |||
if (newc > oldc) { | |||
double cpt = (newt-oldt)/(newc-oldc); | |||
if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH) | |||
cyc_per_tick = cpt; | |||
/* | |||
if (verbose) | |||
printf("Saw event lasting %.0f cycles and %d ticks. Ratio = %f\n", | |||
newt-oldt, (int) (newc-oldc), cpt); | |||
*/ | |||
e++; | |||
oldc = newc; | |||
} | |||
oldt = newt; | |||
} | |||
} | |||
if (verbose) | |||
printf("Setting cyc_per_tick to %f\n", cyc_per_tick); | |||
} | |||
static clock_t start_tick = 0; | |||
void start_comp_counter() | |||
{ | |||
struct tms t; | |||
if (cyc_per_tick == 0.0) | |||
callibrate(0); | |||
times(&t); | |||
start_tick = t.tms_utime; | |||
start_counter(); | |||
} | |||
double get_comp_counter() | |||
{ | |||
double time = get_counter(); | |||
double ctime; | |||
struct tms t; | |||
clock_t ticks; | |||
times(&t); | |||
ticks = t.tms_utime - start_tick; | |||
ctime = time - ticks*cyc_per_tick; | |||
/* | |||
printf("Measured %.0f cycles. Ticks = %d. Corrected %.0f cycles\n", | |||
time, (int) ticks, ctime); | |||
*/ | |||
return ctime; | |||
} | |||
@ -0,0 +1,22 @@ | |||
/* Routines for using cycle counter */ | |||
/* Start the counter */ | |||
void start_counter(); | |||
/* Get # cycles since counter started */ | |||
double get_counter(); | |||
/* Measure overhead for counter */ | |||
double ovhd(); | |||
/* Determine clock rate of processor (using a default sleeptime) */ | |||
double mhz(int verbose); | |||
/* Determine clock rate of processor, having more control over accuracy */ | |||
double mhz_full(int verbose, int sleeptime); | |||
/** Special counters that compensate for timer interrupt overhead */ | |||
void start_comp_counter(); | |||
double get_comp_counter(); |
@ -0,0 +1,72 @@ | |||
#ifndef __CONFIG_H_ | |||
#define __CONFIG_H_ | |||
/* | |||
* config.h - malloc lab configuration file | |||
* | |||
* Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. | |||
* May not be used, modified, or copied without permission. | |||
*/ | |||
/* | |||
* This is the default path where the driver will look for the | |||
* default tracefiles. You can override it at runtime with the -t flag. | |||
*/ | |||
#define TRACEDIR "/afs/cs/project/ics2/im/labs/malloclab/traces/" | |||
/* | |||
* This is the list of default tracefiles in TRACEDIR that the driver | |||
* will use for testing. Modify this if you want to add or delete | |||
* traces from the driver's test suite. For example, if you don't want | |||
* your students to implement realloc, you can delete the last two | |||
* traces. | |||
*/ | |||
#define DEFAULT_TRACEFILES \ | |||
"amptjp-bal.rep",\ | |||
"cccp-bal.rep",\ | |||
"cp-decl-bal.rep",\ | |||
"expr-bal.rep",\ | |||
"coalescing-bal.rep",\ | |||
"random-bal.rep",\ | |||
"random2-bal.rep",\ | |||
"binary-bal.rep",\ | |||
"binary2-bal.rep",\ | |||
"realloc-bal.rep",\ | |||
"realloc2-bal.rep" | |||
/* | |||
* This constant gives the estimated performance of the libc malloc | |||
* package using our traces on some reference system, typically the | |||
* same kind of system the students use. Its purpose is to cap the | |||
* contribution of throughput to the performance index. Once the | |||
* students surpass the AVG_LIBC_THRUPUT, they get no further benefit | |||
* to their score. This deters students from building extremely fast, | |||
* but extremely stupid malloc packages. | |||
*/ | |||
#define AVG_LIBC_THRUPUT 600E3 /* 600 Kops/sec */ | |||
/* | |||
* This constant determines the contributions of space utilization | |||
* (UTIL_WEIGHT) and throughput (1 - UTIL_WEIGHT) to the performance | |||
* index. | |||
*/ | |||
#define UTIL_WEIGHT .60 | |||
/* | |||
* Alignment requirement in bytes (either 4 or 8) | |||
*/ | |||
#define ALIGNMENT 8 | |||
/* | |||
* Maximum heap size in bytes | |||
*/ | |||
#define MAX_HEAP (20*(1<<20)) /* 20 MB */ | |||
/***************************************************************************** | |||
* Set exactly one of these USE_xxx constants to "1" to select a timing method | |||
*****************************************************************************/ | |||
#define USE_FCYC 0 /* cycle counter w/K-best scheme (x86 & Alpha only) */ | |||
#define USE_ITIMER 0 /* interval timer (any Unix box) */ | |||
#define USE_GETTOD 1 /* gettimeofday (any Unix box) */ | |||
#endif /* __CONFIG_H */ |
@ -0,0 +1,251 @@ | |||
/* | |||
* fcyc.c - Estimate the time (in CPU cycles) used by a function f | |||
* | |||
* Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. | |||
* May not be used, modified, or copied without permission. | |||
* | |||
* Uses the cycle timer routines in clock.c to estimate the | |||
* the time in CPU cycles for a function f. | |||
*/ | |||
#include <stdlib.h> | |||
#include <sys/times.h> | |||
#include <stdio.h> | |||
#include "fcyc.h" | |||
#include "clock.h" | |||
/* Default values */ | |||
#define K 3 /* Value of K in K-best scheme */ | |||
#define MAXSAMPLES 20 /* Give up after MAXSAMPLES */ | |||
#define EPSILON 0.01 /* K samples should be EPSILON of each other*/ | |||
#define COMPENSATE 0 /* 1-> try to compensate for clock ticks */ | |||
#define CLEAR_CACHE 0 /* Clear cache before running test function */ | |||
#define CACHE_BYTES (1<<19) /* Max cache size in bytes */ | |||
#define CACHE_BLOCK 32 /* Cache block size in bytes */ | |||
static int kbest = K; | |||
static int maxsamples = MAXSAMPLES; | |||
static double epsilon = EPSILON; | |||
static int compensate = COMPENSATE; | |||
static int clear_cache = CLEAR_CACHE; | |||
static int cache_bytes = CACHE_BYTES; | |||
static int cache_block = CACHE_BLOCK; | |||
static int *cache_buf = NULL; | |||
static double *values = NULL; | |||
static int samplecount = 0; | |||
/* for debugging only */ | |||
#define KEEP_VALS 0 | |||
#define KEEP_SAMPLES 0 | |||
#if KEEP_SAMPLES | |||
static double *samples = NULL; | |||
#endif | |||
/* | |||
* init_sampler - Start new sampling process | |||
*/ | |||
static void init_sampler() | |||
{ | |||
if (values) | |||
free(values); | |||
values = calloc(kbest, sizeof(double)); | |||
#if KEEP_SAMPLES | |||
if (samples) | |||
free(samples); | |||
/* Allocate extra for wraparound analysis */ | |||
samples = calloc(maxsamples+kbest, sizeof(double)); | |||
#endif | |||
samplecount = 0; | |||
} | |||
/* | |||
* add_sample - Add new sample | |||
*/ | |||
static void add_sample(double val) | |||
{ | |||
int pos = 0; | |||
if (samplecount < kbest) { | |||
pos = samplecount; | |||
values[pos] = val; | |||
} else if (val < values[kbest-1]) { | |||
pos = kbest-1; | |||
values[pos] = val; | |||
} | |||
#if KEEP_SAMPLES | |||
samples[samplecount] = val; | |||
#endif | |||
samplecount++; | |||
/* Insertion sort */ | |||
while (pos > 0 && values[pos-1] > values[pos]) { | |||
double temp = values[pos-1]; | |||
values[pos-1] = values[pos]; | |||
values[pos] = temp; | |||
pos--; | |||
} | |||
} | |||
/* | |||
* has_converged- Have kbest minimum measurements converged within epsilon? | |||
*/ | |||
static int has_converged() | |||
{ | |||
return | |||
(samplecount >= kbest) && | |||
((1 + epsilon)*values[0] >= values[kbest-1]); | |||
} | |||
/* | |||
* clear - Code to clear cache | |||
*/ | |||
static volatile int sink = 0; | |||
static void clear() | |||
{ | |||
int x = sink; | |||
int *cptr, *cend; | |||
int incr = cache_block/sizeof(int); | |||
if (!cache_buf) { | |||
cache_buf = malloc(cache_bytes); | |||
if (!cache_buf) { | |||
fprintf(stderr, "Fatal error. Malloc returned null when trying to clear cache\n"); | |||
exit(1); | |||
} | |||
} | |||
cptr = (int *) cache_buf; | |||
cend = cptr + cache_bytes/sizeof(int); | |||
while (cptr < cend) { | |||
x += *cptr; | |||
cptr += incr; | |||
} | |||
sink = x; | |||
} | |||
/* | |||
* fcyc - Use K-best scheme to estimate the running time of function f | |||
*/ | |||
double fcyc(test_funct f, void *argp) | |||
{ | |||
double result; | |||
init_sampler(); | |||
if (compensate) { | |||
do { | |||
double cyc; | |||
if (clear_cache) | |||
clear(); | |||
start_comp_counter(); | |||
f(argp); | |||
cyc = get_comp_counter(); | |||
add_sample(cyc); | |||
} while (!has_converged() && samplecount < maxsamples); | |||
} else { | |||
do { | |||
double cyc; | |||
if (clear_cache) | |||
clear(); | |||
start_counter(); | |||
f(argp); | |||
cyc = get_counter(); | |||
add_sample(cyc); | |||
} while (!has_converged() && samplecount < maxsamples); | |||
} | |||
#ifdef DEBUG | |||
{ | |||
int i; | |||
printf(" %d smallest values: [", kbest); | |||
for (i = 0; i < kbest; i++) | |||
printf("%.0f%s", values[i], i==kbest-1 ? "]\n" : ", "); | |||
} | |||
#endif | |||
result = values[0]; | |||
#if !KEEP_VALS | |||
free(values); | |||
values = NULL; | |||
#endif | |||
return result; | |||
} | |||
/************************************************************* | |||
* Set the various parameters used by the measurement routines | |||
************************************************************/ | |||
/* | |||
* set_fcyc_clear_cache - When set, will run code to clear cache | |||
* before each measurement. | |||
* Default = 0 | |||
*/ | |||
void set_fcyc_clear_cache(int clear) | |||
{ | |||
clear_cache = clear; | |||
} | |||
/* | |||
* set_fcyc_cache_size - Set size of cache to use when clearing cache | |||
* Default = 1<<19 (512KB) | |||
*/ | |||
void set_fcyc_cache_size(int bytes) | |||
{ | |||
if (bytes != cache_bytes) { | |||
cache_bytes = bytes; | |||
if (cache_buf) { | |||
free(cache_buf); | |||
cache_buf = NULL; | |||
} | |||
} | |||
} | |||
/* | |||
* set_fcyc_cache_block - Set size of cache block | |||
* Default = 32 | |||
*/ | |||
void set_fcyc_cache_block(int bytes) { | |||
cache_block = bytes; | |||
} | |||
/* | |||
* set_fcyc_compensate- When set, will attempt to compensate for | |||
* timer interrupt overhead | |||
* Default = 0 | |||
*/ | |||
void set_fcyc_compensate(int compensate_arg) | |||
{ | |||
compensate = compensate_arg; | |||
} | |||
/* | |||
* set_fcyc_k - Value of K in K-best measurement scheme | |||
* Default = 3 | |||
*/ | |||
void set_fcyc_k(int k) | |||
{ | |||
kbest = k; | |||
} | |||
/* | |||
* set_fcyc_maxsamples - Maximum number of samples attempting to find | |||
* K-best within some tolerance. | |||
* When exceeded, just return best sample found. | |||
* Default = 20 | |||
*/ | |||
void set_fcyc_maxsamples(int maxsamples_arg) | |||
{ | |||
maxsamples = maxsamples_arg; | |||
} | |||
/* | |||
* set_fcyc_epsilon - Tolerance required for K-best | |||
* Default = 0.01 | |||
*/ | |||
void set_fcyc_epsilon(double epsilon_arg) | |||
{ | |||
epsilon = epsilon_arg; | |||
} | |||
@ -0,0 +1,68 @@ | |||
/* | |||
* fcyc.h - prototypes for the routines in fcyc.c that estimate the | |||
* time in CPU cycles used by a test function f | |||
* | |||
* Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. | |||
* May not be used, modified, or copied without permission. | |||
* | |||
*/ | |||
/* The test function takes a generic pointer as input */ | |||
typedef void (*test_funct)(void *); | |||
/* Compute number of cycles used by test function f */ | |||
double fcyc(test_funct f, void* argp); | |||
/********************************************************* | |||
* Set the various parameters used by measurement routines | |||
*********************************************************/ | |||
/* | |||
* set_fcyc_clear_cache - When set, will run code to clear cache | |||
* before each measurement. | |||
* Default = 0 | |||
*/ | |||
void set_fcyc_clear_cache(int clear); | |||
/* | |||
* set_fcyc_cache_size - Set size of cache to use when clearing cache | |||
* Default = 1<<19 (512KB) | |||
*/ | |||
void set_fcyc_cache_size(int bytes); | |||
/* | |||
* set_fcyc_cache_block - Set size of cache block | |||
* Default = 32 | |||
*/ | |||
void set_fcyc_cache_block(int bytes); | |||
/* | |||
* set_fcyc_compensate- When set, will attempt to compensate for | |||
* timer interrupt overhead | |||
* Default = 0 | |||
*/ | |||
void set_fcyc_compensate(int compensate_arg); | |||
/* | |||
* set_fcyc_k - Value of K in K-best measurement scheme | |||
* Default = 3 | |||
*/ | |||
void set_fcyc_k(int k); | |||
/* | |||
* set_fcyc_maxsamples - Maximum number of samples attempting to find | |||
* K-best within some tolerance. | |||
* When exceeded, just return best sample found. | |||
* Default = 20 | |||
*/ | |||
void set_fcyc_maxsamples(int maxsamples_arg); | |||
/* | |||
* set_fcyc_epsilon - Tolerance required for K-best | |||
* Default = 0.01 | |||
*/ | |||
void set_fcyc_epsilon(double epsilon_arg); | |||
@ -0,0 +1,57 @@ | |||
/**************************** | |||
* High-level timing wrappers | |||
****************************/ | |||
#include <stdio.h> | |||
#include "fsecs.h" | |||
#include "fcyc.h" | |||
#include "clock.h" | |||
#include "ftimer.h" | |||
#include "config.h" | |||
static double Mhz; /* estimated CPU clock frequency */ | |||
extern int verbose; /* -v option in mdriver.c */ | |||
/* | |||
* init_fsecs - initialize the timing package | |||
*/ | |||
void init_fsecs(void) | |||
{ | |||
Mhz = 0; /* keep gcc -Wall happy */ | |||
#if USE_FCYC | |||
if (verbose) | |||
printf("Measuring performance with a cycle counter.\n"); | |||
/* set key parameters for the fcyc package */ | |||
set_fcyc_maxsamples(20); | |||
set_fcyc_clear_cache(1); | |||
set_fcyc_compensate(1); | |||
set_fcyc_epsilon(0.01); | |||
set_fcyc_k(3); | |||
Mhz = mhz(verbose > 0); | |||
#elif USE_ITIMER | |||
if (verbose) | |||
printf("Measuring performance with the interval timer.\n"); | |||
#elif USE_GETTOD | |||
if (verbose) | |||
printf("Measuring performance with gettimeofday().\n"); | |||
#endif | |||
} | |||
/* | |||
* fsecs - Return the running time of a function f (in seconds) | |||
*/ | |||
double fsecs(fsecs_test_funct f, void *argp) | |||
{ | |||
#if USE_FCYC | |||
double cycles = fcyc(f, argp); | |||
return cycles/(Mhz*1e6); | |||
#elif USE_ITIMER | |||
return ftimer_itimer(f, argp, 10); | |||
#elif USE_GETTOD | |||
return ftimer_gettod(f, argp, 10); | |||
#endif | |||
} | |||
@ -0,0 +1,4 @@ | |||
typedef void (*fsecs_test_funct)(void *); | |||
void init_fsecs(void); | |||
double fsecs(fsecs_test_funct f, void *argp); |
@ -0,0 +1,106 @@ | |||
/* | |||
* ftimer.c - Estimate the time (in seconds) used by a function f | |||
* | |||
* Copyright (c) 2002, R. Bryant and D. O'Hallaron, All rights reserved. | |||
* May not be used, modified, or copied without permission. | |||
* | |||
* Function timers that estimate the running time (in seconds) of a function f. | |||
* ftimer_itimer: version that uses the interval timer | |||
* ftimer_gettod: version that uses gettimeofday | |||
*/ | |||
#include <stdio.h> | |||
#include <sys/time.h> | |||
#include "ftimer.h" | |||
/* function prototypes */ | |||
static void init_etime(void); | |||
static double get_etime(void); | |||
/* | |||
* ftimer_itimer - Use the interval timer to estimate the running time | |||
* of f(argp). Return the average of n runs. | |||
*/ | |||
double ftimer_itimer(ftimer_test_funct f, void *argp, int n) | |||
{ | |||
double start, tmeas; | |||
int i; | |||
init_etime(); | |||
start = get_etime(); | |||
for (i = 0; i < n; i++) | |||
f(argp); | |||
tmeas = get_etime() - start; | |||
return tmeas / n; | |||
} | |||
/* | |||
* ftimer_gettod - Use gettimeofday to estimate the running time of | |||
* f(argp). Return the average of n runs. | |||
*/ | |||
double ftimer_gettod(ftimer_test_funct f, void *argp, int n) | |||
{ | |||
int i; | |||
struct timeval stv, etv; | |||
double diff; | |||
gettimeofday(&stv, NULL); | |||
for (i = 0; i < n; i++) | |||
f(argp); | |||
gettimeofday(&etv,NULL); | |||
diff = 1E3*(etv.tv_sec - stv.tv_sec) + 1E-3*(etv.tv_usec-stv.tv_usec); | |||
diff /= n; | |||
return (1E-3*diff); | |||
} | |||
/* | |||
* Routines for manipulating the Unix interval timer | |||
*/ | |||
/* The initial value of the interval timer */ | |||
#define MAX_ETIME 86400 | |||
/* static variables that hold the initial value of the interval timer */ | |||
static struct itimerval first_u; /* user time */ | |||
static struct itimerval first_r; /* real time */ | |||
static struct itimerval first_p; /* prof time*/ | |||
/* init the timer */ | |||
static void init_etime(void) | |||
{ | |||
first_u.it_interval.tv_sec = 0; | |||
first_u.it_interval.tv_usec = 0; | |||
first_u.it_value.tv_sec = MAX_ETIME; | |||
first_u.it_value.tv_usec = 0; | |||
setitimer(ITIMER_VIRTUAL, &first_u, NULL); | |||
first_r.it_interval.tv_sec = 0; | |||
first_r.it_interval.tv_usec = 0; | |||
first_r.it_value.tv_sec = MAX_ETIME; | |||
first_r.it_value.tv_usec = 0; | |||
setitimer(ITIMER_REAL, &first_r, NULL); | |||
first_p.it_interval.tv_sec = 0; | |||
first_p.it_interval.tv_usec = 0; | |||
first_p.it_value.tv_sec = MAX_ETIME; | |||
first_p.it_value.tv_usec = 0; | |||
setitimer(ITIMER_PROF, &first_p, NULL); | |||
} | |||
/* return elapsed real seconds since call to init_etime */ | |||
static double get_etime(void) { | |||
struct itimerval v_curr; | |||
struct itimerval r_curr; | |||
struct itimerval p_curr; | |||
getitimer(ITIMER_VIRTUAL, &v_curr); | |||
getitimer(ITIMER_REAL,&r_curr); | |||
getitimer(ITIMER_PROF,&p_curr); | |||
return (double) ((first_p.it_value.tv_sec - r_curr.it_value.tv_sec) + | |||
(first_p.it_value.tv_usec - r_curr.it_value.tv_usec)*1e-6); | |||
} | |||
@ -0,0 +1,14 @@ | |||
/* | |||
* Function timers | |||
*/ | |||
typedef void (*ftimer_test_funct)(void *); | |||
/* Estimate the running time of f(argp) using the Unix interval timer. | |||
Return the average of n runs */ | |||
double ftimer_itimer(ftimer_test_funct f, void *argp, int n); | |||
/* Estimate the running time of f(argp) using gettimeofday | |||
Return the average of n runs */ | |||
double ftimer_gettod(ftimer_test_funct f, void *argp, int n); | |||
@ -0,0 +1,101 @@ | |||
/* | |||
* memlib.c - a module that simulates the memory system. Needed because it | |||
* allows us to interleave calls from the student's malloc package | |||
* with the system's malloc package in libc. | |||
*/ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <assert.h> | |||
#include <unistd.h> | |||
#include <sys/mman.h> | |||
#include <string.h> | |||
#include <errno.h> | |||
#include "memlib.h" | |||
#include "config.h" | |||
/* private variables */ | |||
static char *mem_start_brk; /* points to first byte of heap */ | |||
static char *mem_brk; /* points to last byte of heap */ | |||
static char *mem_max_addr; /* largest legal heap address */ | |||
/* | |||
* mem_init - initialize the memory system model | |||
*/ | |||
void mem_init(void) | |||
{ | |||
/* allocate the storage we will use to model the available VM */ | |||
if ((mem_start_brk = (char *)malloc(MAX_HEAP)) == NULL) { | |||
fprintf(stderr, "mem_init_vm: malloc error\n"); | |||
exit(1); | |||
} | |||
mem_max_addr = mem_start_brk + MAX_HEAP; /* max legal heap address */ | |||
mem_brk = mem_start_brk; /* heap is empty initially */ | |||
} | |||
/* | |||
* mem_deinit - free the storage used by the memory system model | |||
*/ | |||
void mem_deinit(void) | |||
{ | |||
free(mem_start_brk); | |||
} | |||
/* | |||
* mem_reset_brk - reset the simulated brk pointer to make an empty heap | |||
*/ | |||
void mem_reset_brk() | |||
{ | |||
mem_brk = mem_start_brk; | |||
} | |||
/* | |||
* mem_sbrk - simple model of the sbrk function. Extends the heap | |||
* by incr bytes and returns the start address of the new area. In | |||
* this model, the heap cannot be shrunk. | |||
*/ | |||
void *mem_sbrk(int incr) | |||
{ | |||
char *old_brk = mem_brk; | |||
if ( (incr < 0) || ((mem_brk + incr) > mem_max_addr)) { | |||
errno = ENOMEM; | |||
fprintf(stderr, "ERROR: mem_sbrk failed. Ran out of memory...\n"); | |||
return (void *)-1; | |||
} | |||
mem_brk += incr; | |||
return (void *)old_brk; | |||
} | |||
/* | |||
* mem_heap_lo - return address of the first heap byte | |||
*/ | |||
void *mem_heap_lo() | |||
{ | |||
return (void *)mem_start_brk; | |||
} | |||
/* | |||
* mem_heap_hi - return address of last heap byte | |||
*/ | |||
void *mem_heap_hi() | |||
{ | |||
return (void *)(mem_brk - 1); | |||
} | |||
/* | |||
* mem_heapsize() - returns the heap size in bytes | |||
*/ | |||
size_t mem_heapsize() | |||
{ | |||
return (size_t)(mem_brk - mem_start_brk); | |||
} | |||
/* | |||
* mem_pagesize() - returns the page size of the system | |||
*/ | |||
size_t mem_pagesize() | |||
{ | |||
return (size_t)getpagesize(); | |||
} |
@ -0,0 +1,11 @@ | |||
#include <unistd.h> | |||
void mem_init(void); | |||
void mem_deinit(void); | |||
void *mem_sbrk(int incr); | |||
void mem_reset_brk(void); | |||
void *mem_heap_lo(void); | |||
void *mem_heap_hi(void); | |||
size_t mem_heapsize(void); | |||
size_t mem_pagesize(void); | |||
@ -0,0 +1,110 @@ | |||
/* | |||
* mm-naive.c - The fastest, least memory-efficient malloc package. | |||
* | |||
* In this naive approach, a block is allocated by simply incrementing | |||
* the brk pointer. A block is pure payload. There are no headers or | |||
* footers. Blocks are never coalesced or reused. Realloc is | |||
* implemented directly using mm_malloc and mm_free. | |||
* | |||
* NOTE TO STUDENTS: Replace this header comment with your own header | |||
* comment that gives a high level description of your solution. | |||
*/ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <assert.h> | |||
#include <unistd.h> | |||
#include <string.h> | |||
#include "mm.h" | |||
#include "memlib.h" | |||
/********************************************************* | |||
* NOTE TO STUDENTS: Before you do anything else, please | |||
* provide your team information in the following struct. | |||
********************************************************/ | |||
team_t team = { | |||
/* Team name */ | |||
"ateam", | |||
/* First member's full name */ | |||
"Harry Bovik", | |||
/* First member's email address */ | |||
"bovik@cs.cmu.edu", | |||
/* Second member's full name (leave blank if none) */ | |||
"", | |||
/* Second member's email address (leave blank if none) */ | |||
"" | |||
}; | |||
/* single word (4) or double word (8) alignment */ | |||
#define ALIGNMENT 8 | |||
/* rounds up to the nearest multiple of ALIGNMENT */ | |||
#define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~0x7) | |||
#define SIZE_T_SIZE (ALIGN(sizeof(size_t))) | |||
/* | |||
* mm_init - initialize the malloc package. | |||
*/ | |||
int mm_init(void) | |||
{ | |||
return 0; | |||
} | |||
/* | |||
* mm_malloc - Allocate a block by incrementing the brk pointer. | |||
* Always allocate a block whose size is a multiple of the alignment. | |||
*/ | |||
void *mm_malloc(size_t size) | |||
{ | |||
int newsize = ALIGN(size + SIZE_T_SIZE); | |||
void *p = mem_sbrk(newsize); | |||
if (p == (void *)-1) | |||
return NULL; | |||
else { | |||
*(size_t *)p = size; | |||
return (void *)((char *)p + SIZE_T_SIZE); | |||
} | |||
} | |||
/* | |||
* mm_free - Freeing a block does nothing. | |||
*/ | |||
void mm_free(void *ptr) | |||
{ | |||
} | |||
/* | |||
* mm_realloc - Implemented simply in terms of mm_malloc and mm_free | |||
*/ | |||
void *mm_realloc(void *ptr, size_t size) | |||
{ | |||
void *oldptr = ptr; | |||
void *newptr; | |||
size_t copySize; | |||
newptr = mm_malloc(size); | |||
if (newptr == NULL) | |||
return NULL; | |||
copySize = *(size_t *)((char *)oldptr - SIZE_T_SIZE); | |||
if (size < copySize) | |||
copySize = size; | |||
memcpy(newptr, oldptr, copySize); | |||
mm_free(oldptr); | |||
return newptr; | |||
} | |||
@ -0,0 +1,23 @@ | |||
#include <stdio.h> | |||
extern int mm_init (void); | |||
extern void *mm_malloc (size_t size); | |||
extern void mm_free (void *ptr); | |||
extern void *mm_realloc(void *ptr, size_t size); | |||
/* | |||
* Students work in teams of one or two. Teams enter their team name, | |||
* personal names and login IDs in a struct of this | |||
* type in their bits.c file. | |||
*/ | |||
typedef struct { | |||
char *teamname; /* ID1+ID2 or ID1 */ | |||
char *name1; /* full name of first member */ | |||
char *id1; /* login ID of first member */ | |||
char *name2; /* full name of second member (if any) */ | |||
char *id2; /* login ID of second member */ | |||
} team_t; | |||
extern team_t team; | |||
@ -0,0 +1,16 @@ | |||
20000 | |||
6 | |||
12 | |||
1 | |||
a 0 2040 | |||
a 1 2040 | |||
f 1 | |||
a 2 48 | |||
a 3 4072 | |||
f 3 | |||
a 4 4072 | |||
f 0 | |||
f 2 | |||
a 5 4072 | |||
f 4 | |||
f 5 |
@ -0,0 +1,16 @@ | |||
20000 | |||
6 | |||
12 | |||
1 | |||
a 0 2040 | |||
a 1 4010 | |||
a 2 48 | |||
a 3 4072 | |||
a 4 4072 | |||
a 5 4072 | |||
f 0 | |||
f 1 | |||
f 2 | |||
f 3 | |||
f 4 | |||
f 5 |
@ -0,0 +1,188 @@ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <unistd.h> | |||
#include <sys/times.h> | |||
#include <string.h> | |||
#include "clock.h" | |||
/* Keep track of most recent reading of cycle counter */ | |||
static unsigned cyc_hi = 0; | |||
static unsigned cyc_lo = 0; | |||
void access_counter(unsigned *hi, unsigned *lo) | |||
{ | |||
/* Get cycle counter */ | |||
asm("rdtsc; movl %%edx,%0; movl %%eax,%1" | |||
: "=r" (*hi), "=r" (*lo) | |||
: /* No input */ | |||
: "%edx", "%eax"); | |||
} | |||
void start_counter() | |||
{ | |||
access_counter(&cyc_hi, &cyc_lo); | |||
} | |||
double get_counter() | |||
{ | |||
unsigned ncyc_hi, ncyc_lo; | |||
unsigned hi, lo, borrow; | |||
double result; | |||
/* Get cycle counter */ | |||
access_counter(&ncyc_hi, &ncyc_lo); | |||
/* Do double precision subtraction */ | |||
lo = ncyc_lo - cyc_lo; | |||
borrow = lo > ncyc_lo; | |||
hi = ncyc_hi - cyc_hi - borrow; | |||
result = (double) hi * (1 << 30) * 4 + lo; | |||
if (result < 0) { | |||
fprintf(stderr, "Error: Cycle counter returning negative value: %.0f\n", result); | |||
} | |||
return result; | |||
} | |||
double ovhd() | |||
{ | |||
/* Do it twice to eliminate cache effects */ | |||
int i; | |||
double result; | |||
for (i = 0; i < 2; i++) { | |||
start_counter(); | |||
result = get_counter(); | |||
} | |||
return result; | |||
} | |||
/* Keep track of clock speed */ | |||
double cpu_ghz = 0.0; | |||
/* Get megahertz from /etc/proc */ | |||
#define MAXBUF 512 | |||
double core_mhz(int verbose) { | |||
static char buf[MAXBUF]; | |||
FILE *fp = fopen("/proc/cpuinfo", "r"); | |||
cpu_ghz = 0.0; | |||
if (!fp) { | |||
fprintf(stderr, "Can't open /proc/cpuinfo to get clock information\n"); | |||
cpu_ghz = 1.0; | |||
return cpu_ghz * 1000.0; | |||
} | |||
while (fgets(buf, MAXBUF, fp)) { | |||
if (strstr(buf, "cpu MHz")) { | |||
double cpu_mhz = 0.0; | |||
sscanf(buf, "cpu MHz\t: %lf", &cpu_mhz); | |||
cpu_ghz = cpu_mhz / 1000.0; | |||
break; | |||
} | |||
} | |||
fclose(fp); | |||
if (cpu_ghz == 0.0) { | |||
fprintf(stderr, "Can't open /proc/cpuinfo to get clock information\n"); | |||
cpu_ghz = 1.0; | |||
return cpu_ghz * 1000.0; | |||
} | |||
if (verbose) { | |||
printf("Processor Clock Rate ~= %.4f GHz (extracted from file)\n", cpu_ghz); | |||
} | |||
return cpu_ghz * 1000; | |||
} | |||
double mhz(int verbose) { | |||
double val = core_mhz(verbose); | |||
return val; | |||
} | |||
/* Determine clock rate by measuring cycles | |||
elapsed while sleeping for sleeptime seconds */ | |||
double mhz_full(int verbose, int sleeptime) | |||
{ | |||
double rate; | |||
start_counter(); | |||
sleep(sleeptime); | |||
rate = get_counter()/(1e6*sleeptime); | |||
if (verbose) | |||
printf("Processor Clock Rate ~= %.1f MHz\n", rate); | |||
return rate; | |||
} | |||
///* Version using a default sleeptime */ | |||
//double mhz(int verbose) | |||
//{ | |||
// return mhz_full(verbose, 2); | |||
//} | |||
/** Special counters that compensate for timer interrupt overhead */ | |||
static double cyc_per_tick = 0.0; | |||
#define NEVENT 100 | |||
#define THRESHOLD 1000 | |||
#define RECORDTHRESH 3000 | |||
/* Attempt to see how much time is used by timer interrupt */ | |||
static void callibrate(int verbose) | |||
{ | |||
double oldt; | |||
struct tms t; | |||
clock_t oldc; | |||
int e = 0; | |||
times(&t); | |||
oldc = t.tms_utime; | |||
start_counter(); | |||
oldt = get_counter(); | |||
while (e <NEVENT) { | |||
double newt = get_counter(); | |||
if (newt-oldt >= THRESHOLD) { | |||
clock_t newc; | |||
times(&t); | |||
newc = t.tms_utime; | |||
if (newc > oldc) { | |||
double cpt = (newt-oldt)/(newc-oldc); | |||
if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH) | |||
cyc_per_tick = cpt; | |||
/* | |||
if (verbose) | |||
printf("Saw event lasting %.0f cycles and %d ticks. Ratio = %f\n", | |||
newt-oldt, (int) (newc-oldc), cpt); | |||
*/ | |||
e++; | |||
oldc = newc; | |||
} | |||
oldt = newt; | |||
} | |||
} | |||
if (verbose) | |||
printf("Setting cyc_per_tick to %f\n", cyc_per_tick); | |||
} | |||
static clock_t start_tick = 0; | |||
void start_comp_counter() { | |||
struct tms t; | |||
if (cyc_per_tick == 0.0) | |||
callibrate(1); | |||
times(&t); | |||
start_tick = t.tms_utime; | |||
start_counter(); | |||
} | |||
double get_comp_counter() { | |||
double time = get_counter(); | |||
double ctime; | |||
struct tms t; | |||
clock_t ticks; | |||
times(&t); | |||
ticks = t.tms_utime - start_tick; | |||
ctime = time - ticks*cyc_per_tick; | |||
/* | |||
printf("Measured %.0f cycles. Ticks = %d. Corrected %.0f cycles\n", | |||
time, (int) ticks, ctime); | |||
*/ | |||
return ctime; | |||
} |
@ -0,0 +1,23 @@ | |||
/* Routines for using cycle counter */ | |||
/* Start the counter */ | |||
void start_counter(); | |||
/* Get # cycles since counter started */ | |||
double get_counter(); | |||
/* Measure overhead for counter */ | |||
double ovhd(); | |||
/* Determine clock rate of processor */ | |||
double mhz(int verbose); | |||
/* Determine clock rate of processor, having more control over accuracy */ | |||
double mhz_full(int verbose, int sleeptime); | |||
/** Special counters that compensate for timer interrupt overhead */ | |||
void start_comp_counter(); | |||
double get_comp_counter(); |
@ -0,0 +1,299 @@ | |||
/* Compute time used by a function f that takes two integer args */ | |||
#include <stdlib.h> | |||
#include <sys/times.h> | |||
#include <stdio.h> | |||
#include "clock.h" | |||
#include "fcyc2.h" | |||
static double *values = NULL; | |||
int samplecount = 0; | |||
#define KEEP_VALS 1 | |||
#define KEEP_SAMPLES 1 | |||
#if KEEP_SAMPLES | |||
double *samples = NULL; | |||
#endif | |||
/* Start new sampling process */ | |||
static void init_sampler(int k, int maxsamples) | |||
{ | |||
if (values) | |||
free(values); | |||
values = calloc(k, sizeof(double)); | |||
#if KEEP_SAMPLES | |||
if (samples) | |||
free(samples); | |||
/* Allocate extra for wraparound analysis */ | |||
samples = calloc(maxsamples+k, sizeof(double)); | |||
#endif | |||
samplecount = 0; | |||
} | |||
/* Add new sample. */ | |||
void add_sample(double val, int k) | |||
{ | |||
int pos = 0; | |||
if (samplecount < k) { | |||
pos = samplecount; | |||
values[pos] = val; | |||
} else if (val < values[k-1]) { | |||
pos = k-1; | |||
values[pos] = val; | |||
} | |||
#if KEEP_SAMPLES | |||
samples[samplecount] = val; | |||
#endif | |||
samplecount++; | |||
/* Insertion sort */ | |||
while (pos > 0 && values[pos-1] > values[pos]) { | |||
double temp = values[pos-1]; | |||
values[pos-1] = values[pos]; | |||
values[pos] = temp; | |||
pos--; | |||
} | |||
} | |||
/* Get current minimum */ | |||
double get_min() | |||
{ | |||
return values[0]; | |||
} | |||
/* What is relative error for kth smallest sample */ | |||
double err(int k) | |||
{ | |||
if (samplecount < k) | |||
return 1000.0; | |||
return (values[k-1] - values[0])/values[0]; | |||
} | |||
/* Have k minimum measurements converged within epsilon? */ | |||
int has_converged(int k_arg, double epsilon_arg, int maxsamples) | |||
{ | |||
if ((samplecount >= k_arg) && | |||
((1 + epsilon_arg)*values[0] >= values[k_arg-1])) | |||
return samplecount; | |||
if ((samplecount >= maxsamples)) | |||
return -1; | |||
return 0; | |||
} | |||
/* Code to clear cache */ | |||
/* Pentium III has 512K L2 cache, which is 128K ints */ | |||
#define ASIZE (1 << 17) | |||
/* Cache block size is 32 bytes */ | |||
#define STRIDE 8 | |||
static int stuff[ASIZE]; | |||
static int sink; | |||
static void clear() | |||
{ | |||
int x = sink; | |||
int i; | |||
for (i = 0; i < ASIZE; i += STRIDE) | |||
x += stuff[i]; | |||
sink = x; | |||
} | |||
double fcyc2_full(test_funct f, int param1, int param2, int clear_cache, | |||
int k, double epsilon, int maxsamples, int compensate) | |||
{ | |||
double result; | |||
init_sampler(k, maxsamples); | |||
if (compensate) { | |||
do { | |||
double cyc; | |||
if (clear_cache) | |||
clear(); | |||
f(param1, param2); /* warm cache */ | |||
start_comp_counter(); | |||
f(param1, param2); | |||
cyc = get_comp_counter(); | |||
add_sample(cyc, k); | |||
} while (!has_converged(k, epsilon, maxsamples) && samplecount < maxsamples); | |||
} else { | |||
do { | |||
double cyc; | |||
if (clear_cache) | |||
clear(); | |||
f(param1, param2); /* warm cache */ | |||
start_counter(); | |||
f(param1, param2); | |||
cyc = get_counter(); | |||
add_sample(cyc, k); | |||
} while (!has_converged(k, epsilon, maxsamples) && samplecount < maxsamples); | |||
} | |||
#ifdef DEBUG | |||
{ | |||
int i; | |||
printf(" %d smallest values: [", k); | |||
for (i = 0; i < k; i++) | |||
printf("%.0f%s", values[i], i==k-1 ? "]\n" : ", "); | |||
} | |||
#endif | |||
result = values[0]; | |||
#if !KEEP_VALS | |||
free(values); | |||
values = NULL; | |||
#endif | |||
return result; | |||
} | |||
double fcyc2(test_funct f, int param1, int param2, int clear_cache) | |||
{ | |||
return fcyc2_full(f, param1, param2, clear_cache, 3, 0.01, 500, 0); | |||
} | |||
/******************* Version that uses gettimeofday *************/ | |||
static double Mhz = 0.0; | |||
#include <sys/time.h> | |||
static struct timeval tstart; | |||
/* Record current time */ | |||
void start_counter_tod() | |||
{ | |||
if (Mhz == 0) | |||
Mhz = mhz_full(0, 10); | |||
gettimeofday(&tstart, NULL); | |||
} | |||
/* Get number of seconds since last call to start_timer */ | |||
double get_counter_tod() | |||
{ | |||
struct timeval tfinish; | |||
long sec, usec; | |||
gettimeofday(&tfinish, NULL); | |||
sec = tfinish.tv_sec - tstart.tv_sec; | |||
usec = tfinish.tv_usec - tstart.tv_usec; | |||
return (1e6 * sec + usec)*Mhz; | |||
} | |||
/** Special counters that compensate for timer interrupt overhead */ | |||
static double cyc_per_tick = 0.0; | |||
#define NEVENT 100 | |||
#define THRESHOLD 1000 | |||
#define RECORDTHRESH 3000 | |||
/* Attempt to see how much time is used by timer interrupt */ | |||
static void callibrate(int verbose) | |||
{ | |||
double oldt; | |||
struct tms t; | |||
clock_t oldc; | |||
int e = 0; | |||
times(&t); | |||
oldc = t.tms_utime; | |||
start_counter_tod(); | |||
oldt = get_counter_tod(); | |||
while (e <NEVENT) { | |||
double newt = get_counter_tod(); | |||
if (newt-oldt >= THRESHOLD) { | |||
clock_t newc; | |||
times(&t); | |||
newc = t.tms_utime; | |||
if (newc > oldc) { | |||
double cpt = (newt-oldt)/(newc-oldc); | |||
if ((cyc_per_tick == 0.0 || cyc_per_tick > cpt) && cpt > RECORDTHRESH) | |||
cyc_per_tick = cpt; | |||
/* | |||
if (verbose) | |||
printf("Saw event lasting %.0f cycles and %d ticks. Ratio = %f\n", | |||
newt-oldt, (int) (newc-oldc), cpt); | |||
*/ | |||
e++; | |||
oldc = newc; | |||
} | |||
oldt = newt; | |||
} | |||
} | |||
if (verbose) | |||
printf("Setting cyc_per_tick to %f\n", cyc_per_tick); | |||
} | |||
static clock_t start_tick = 0; | |||
void start_comp_counter_tod() { | |||
struct tms t; | |||
if (cyc_per_tick == 0.0) | |||
callibrate(0); | |||
times(&t); | |||
start_tick = t.tms_utime; | |||
start_counter_tod(); | |||
} | |||
double get_comp_counter_tod() { | |||
double time = get_counter_tod(); | |||
double ctime; | |||
struct tms t; | |||
clock_t ticks; | |||
times(&t); | |||
ticks = t.tms_utime - start_tick; | |||
ctime = time - ticks*cyc_per_tick; | |||
/* | |||
printf("Measured %.0f cycles. Ticks = %d. Corrected %.0f cycles\n", | |||
time, (int) ticks, ctime); | |||
*/ | |||
return ctime; | |||
} | |||
double fcyc2_full_tod(test_funct f, int param1, int param2, int clear_cache, | |||
int k, double epsilon, int maxsamples, int compensate) | |||
{ | |||
double result; | |||
init_sampler(k, maxsamples); | |||
if (compensate) { | |||
do { | |||
double cyc; | |||
if (clear_cache) | |||
clear(); | |||
start_comp_counter_tod(); | |||
f(param1, param2); | |||
cyc = get_comp_counter_tod(); | |||
add_sample(cyc, k); | |||
} while (!has_converged(k, epsilon, maxsamples) && samplecount < maxsamples); | |||
} else { | |||
do { | |||
double cyc; | |||
if (clear_cache) | |||
clear(); | |||
start_counter_tod(); | |||
f(param1, param2); | |||
cyc = get_counter_tod(); | |||
add_sample(cyc, k); | |||
} while (!has_converged(k, epsilon, maxsamples) && samplecount < maxsamples); | |||
} | |||
#ifdef DEBUG | |||
{ | |||
int i; | |||
printf(" %d smallest values: [", k); | |||
for (i = 0; i < k; i++) | |||
printf("%.0f%s", values[i], i==k-1 ? "]\n" : ", "); | |||
} | |||
#endif | |||
result = values[0]; | |||
#if !KEEP_VALS | |||
free(values); | |||
values = NULL; | |||
#endif | |||
return result; | |||
} | |||
double fcyc2_tod(test_funct f, int param1, int param2, int clear_cache) | |||
{ | |||
return fcyc2_full_tod(f, param1, param2, clear_cache, 3, 0.01, 20, 0); | |||
} | |||
@ -0,0 +1,41 @@ | |||
/* Find number of cycles used by function that takes 2 arguments */ | |||
/* Function to be tested takes two integer arguments */ | |||
typedef int (*test_funct)(int, int); | |||
/* Compute time used by function f */ | |||
double fcyc2(test_funct f, int param1, int param2, int clear_cache); | |||
/********* These routines are used to help with the analysis *********/ | |||
/* | |||
Parameters: | |||
k: How many samples must be within epsilon for convergence | |||
epsilon: What is tolerance | |||
maxsamples: How many samples until give up? | |||
*/ | |||
/* Full version of fcyc with control over parameters */ | |||
double fcyc2_full(test_funct f, int param1, int param2, int clear_cache, | |||
int k, double epsilon, int maxsamples, int compensate); | |||
/* Get current minimum */ | |||
double get_min(); | |||
/* What is convergence status for k minimum measurements within epsilon | |||
Returns 0 if not converged, #samples if converged, and -1 if can't | |||
reach convergence | |||
*/ | |||
int has_converged(int k, double epsilon, int maxsamples); | |||
/* What is error of current measurement */ | |||
double err(int k); | |||
/************* Try other clocking methods *****************/ | |||
/* Full version that uses the time of day clock */ | |||
double fcyc2_full_tod(test_funct f, int param1, int param2, int clear_cache, | |||
int k, double epsilon, int maxsamples, int compensate); | |||
double fcyc2_tod(test_funct f, int param1, int param2, int clear_cache); |
@ -0,0 +1,116 @@ | |||
/* mountain.c - Generate the memory mountain. */ | |||
/* $begin mountainmain */ | |||
#include <stdlib.h> | |||
#include <stdio.h> | |||
#include "fcyc2.h" /* measurement routines */ | |||
#include "clock.h" /* routines to access the cycle counter */ | |||
#define MINBYTES (1 << 14) /* First working set size */ | |||
#define MAXBYTES (1 << 27) /* Last working set size */ | |||
#define MAXSTRIDE 15 /* Stride x8 bytes */ | |||
#define MAXELEMS MAXBYTES/sizeof(long) | |||
/* $begin mountainfuns */ | |||
long data[MAXELEMS]; /* The global array we'll be traversing */ | |||
/* $end mountainfuns */ | |||
/* $end mountainmain */ | |||
void init_data(long *data, int n); | |||
int test(int elems, int stride); | |||
double run(int size, int stride, double Mhz); | |||
/* $begin mountainmain */ | |||
int main() | |||
{ | |||
int size; /* Working set size (in bytes) */ | |||
int stride; /* Stride (in array elements) */ | |||
double Mhz; /* Clock frequency */ | |||
FILE *fp = NULL; | |||
fp = fopen("mountain.txt", "w+"); | |||
init_data(data, MAXELEMS); /* Initialize each element in data */ | |||
Mhz = mhz(0); /* Estimate the clock frequency */ | |||
/* $end mountainmain */ | |||
/* Not shown in the text */ | |||
fprintf(fp, "Clock frequency is approx. %.1f MHz\n", Mhz); | |||
fprintf(fp, "Memory mountain (MB/sec)\n"); | |||
fprintf(fp, "\t"); | |||
for (stride = 1; stride <= MAXSTRIDE; stride++) | |||
fprintf(fp, "s%d\t", stride); | |||
fprintf(fp, "\n"); | |||
/* $begin mountainmain */ | |||
for (size = MAXBYTES; size >= MINBYTES; size >>= 1) { | |||
/* $end mountainmain */ | |||
/* Not shown in the text */ | |||
if (size > (1 << 20)) | |||
fprintf(fp, "%dm\t", size / (1 << 20)); | |||
else | |||
fprintf(fp, "%dk\t", size / 1024); | |||
/* $begin mountainmain */ | |||
for (stride = 1; stride <= MAXSTRIDE; stride++) { | |||
fprintf(fp, "%.0f\t", run(size, stride, Mhz)); | |||
} | |||
fprintf(fp, "\n"); | |||
} | |||
fclose(fp); | |||
exit(0); | |||
} | |||
/* $end mountainmain */ | |||
/* init_data - initializes the array */ | |||
void init_data(long *data, int n) | |||
{ | |||
int i; | |||
for (i = 0; i < n; i++) | |||
data[i] = i; | |||
} | |||
/* $begin mountainfuns */ | |||
/* test - Iterate over first "elems" elements of array "data" with | |||
* stride of "stride", using 4x4 loop unrolling. | |||
*/ | |||
int test(int elems, int stride) | |||
{ | |||
long i, sx2 = stride*2, sx3 = stride*3, sx4 = stride*4; | |||
long acc0 = 0, acc1 = 0, acc2 = 0, acc3 = 0; | |||
long length = elems; | |||
long limit = length - sx4; | |||
/* Combine 4 elements at a time */ | |||
for (i = 0; i < limit; i += sx4) { | |||
acc0 = acc0 + data[i]; | |||
acc1 = acc1 + data[i+stride]; | |||
acc2 = acc2 + data[i+sx2]; | |||
acc3 = acc3 + data[i+sx3]; | |||
} | |||
/* Finish any remaining elements */ | |||
for (; i < length; i += stride) { | |||
acc0 = acc0 + data[i]; | |||
} | |||
return ((acc0 + acc1) + (acc2 + acc3)); | |||
} | |||
/* run - Run test(elems, stride) and return read throughput (MB/s). | |||
* "size" is in bytes, "stride" is in array elements, and Mhz is | |||
* CPU clock frequency in Mhz. | |||
*/ | |||
double run(int size, int stride, double Mhz) | |||
{ | |||
double cycles; | |||
int elems = size / sizeof(double); | |||
test(elems, stride); /* Warm up the cache */ //line:mem:warmup | |||
cycles = fcyc2(test, elems, stride, 0); /* Call test(elems,stride) */ //line:mem:fcyc | |||
return (size / stride) / (cycles / Mhz); /* Convert cycles to MB/s */ //line:mem:bwcompute | |||
} | |||
/* $end mountainfuns */ | |||
@ -0,0 +1,45 @@ | |||
// | |||
// Created by GentleCold on 2022/11/7. | |||
// | |||
#ifndef CSAPPLEARNING_MOUNTAIN_H | |||
#define CSAPPLEARNING_MOUNTAIN_H | |||
#include <stdio.h> | |||
#define MAXELEMS 10000 | |||
long data[MAXELEMS]; | |||
int read(int elems, int stride) { | |||
long i, sx2 = stride * 2, sx3 = stride * 3, sx4 = stride * 4; | |||
long acc0 = 0, acc1 = 0, acc2 = 0, acc3 = 0; | |||
long length = elems; | |||
long limit = length - sx4; | |||
for (i = 0; i < limit; i += sx4) { | |||
acc0 += data[i]; | |||
acc1 += data[i + stride]; | |||
acc2 += data[i + sx2]; | |||
acc3 += data[i + sx3]; | |||
} | |||
for (; i < length; i += stride) { | |||
acc0 += data[i]; | |||
} | |||
return ((acc0 + acc1) + (acc2 + acc3)); | |||
} | |||
double run(int size, int stride, double Mhz) { | |||
double cycles; | |||
int elems = size / sizeof(double); | |||
read(elems, stride); | |||
cycles = fcyc2(); | |||
} | |||
int mountain() { | |||
} | |||
#endif //CSAPPLEARNING_MOUNTAIN_H |