diff --git a/.gitignore b/.gitignore index 259148f..40f2379 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,7 @@ *.exe *.out *.app + +stats.txt +build/ +output/ diff --git a/LESMEG b/LESMEG new file mode 100755 index 0000000..8ea84f7 --- /dev/null +++ b/LESMEG @@ -0,0 +1,52 @@ + +sudo apt-get install g++-4.8 python-dev scons swig zlib1g-dev m4 + tar xvzf prefetcher.tgz + cd prefetcher/prefetcher + make + +Should provide this output + + OVERALL PERFORMANCE +---------------------------------------- + PREFETCHER SPEEDUP +---------------------------------------- + adaptive_sequential 1.01 + dcpt 1.05 + dcpt-p 1.08 + none 1.00 + rpt 1.06 + sequential_on_access 1.01 + sequential_on_miss 1.00 + tagged 1.01 + user 1.01 +---------------------------------------- + + + +Installing M5 on Linux +First, download the modified M5 simulator and SPEC CPU2000 benchmarks suite from https://goo.gl/Jg7oFm (NB! 564 MiB large file). Located on OneDrive and requires NTNU login, +Software requirements (specific Debian/Ubuntu packages mentioned in paren- theses): + 3.4.6 <= g++ <= 4.8 + Python and libpython >= 2.4 (python and python-dev) Scons > 0.98.1 (scons) + SWIG >= 1.3.31 (swig) + zlib (zlib1g-dev) + m4 (m4) +To install all required packages in one go, issue instructions to apt-get: + sudo apt-get install g++-4.8 python-dev scons swig zlib1g-dev m4 +The simulator framework comes packaged as a gzipped tarball. Start the adventure by unpacking with + tar xvzf prefetcher.tgz +This will create a directory named framework. +Build +M5 uses the scons build system: + cd prefetcher/m5/ + scons -j2 ./build/ALPHA_SE/m5.opt +builds the optimized version of the M5 binaries. +-j2 specifies that the build process should build two targets in parallel. This is a useful option to cut down on compile time if your machine has several processors or cores. +The included build script compile.sh encapsulates the necessary build commands and options. +CPU2000 benchmark tests +The test prefetcher.py script can be used to evaluate the performance of your prefetcher against the SPEC CPU2000 benchmarks. It runs a selected suite of CPU2000 tests with your prefetcher, and compares the results to some reference prefetchers. + cd ../prefetcher/ + make test +The per-test statistics that M5 generates are written to output//stats.txt. The statistics most relevant for hardware prefetching are then filtered and aggregated to a stats.txt file in the framework base directory. + cat output/*/stats.txt + diff --git a/Makefile b/Makefile new file mode 100755 index 0000000..6853cd5 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +include ./scripts/FRAMEWORK + +CXX=g++-4.8 +CC=gcc + +export + +all: compile test + +compile: + ./scripts/compile.sh + +test: + ./scripts/test_prefetcher.py + + +clean: + rm -Rf build + rm -Rf output + rm -Rf stats.txt diff --git a/scripts/FRAMEWORK b/scripts/FRAMEWORK new file mode 100644 index 0000000..d3de8ae --- /dev/null +++ b/scripts/FRAMEWORK @@ -0,0 +1 @@ +PREFETCHER_FRAMEWORK=$(realpath ../) diff --git a/scripts/compile.sh b/scripts/compile.sh new file mode 100755 index 0000000..73e735b --- /dev/null +++ b/scripts/compile.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +SCRIPT_DIR=$(dirname $(readlink -f $0)) + +[ ! -d "${PREFETCHER_FRAMEWORK}/m5" ] && { + echo "Cannot locate m5 framework" >&2 + exit 1 +} + +cd ${PREFETCHER_FRAMEWORK}/m5 +scons -j2 NO_FAST_ALLOC=False EXTRAS="${SCRIPT_DIR}/../src" "${SCRIPT_DIR}/../build/ALPHA_SE/m5.opt" diff --git a/scripts/test_prefetcher.py b/scripts/test_prefetcher.py new file mode 100755 index 0000000..815f8bc --- /dev/null +++ b/scripts/test_prefetcher.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python2 +""" +Run the simulator and record statistics. +""" + +import sys, os, os.path, glob + +frameDir = os.environ['PREFETCHER_FRAMEWORK'] +homeDir = os.path.realpath(os.path.dirname(os.path.realpath(__file__))+ '/..') + +sys.path.append(frameDir) + +from lib.run_util import * +import lib.stats as stats + +# Uncomment this to print commands instead of executing them. +#dry_run() + + +# Set paths +m5_path(homeDir + '/build/ALPHA_SE/m5.opt') +se_path(frameDir + '/m5/configs/example/se.py') + +# Check that M5 is compiled +if not os.path.exists(homeDir + '/build/ALPHA_SE/m5.opt'): + print >>sys.stderr, "Could not find the M5 binary, run compile.sh to compile with your prefetcher." + sys.exit(1) +print "Remember to recompile after making changes." + +# Set output directory +global_prefix(homeDir + '/output/') + +# Configure +global_args( + '--checkpoint-dir=' + frameDir + '/lib/cp', + '--checkpoint-restore=%d' % 1e9, '--at-instruction', + '--caches', '--l2cache', + '--standard-switch', '--warmup-insts=%d' % 1e7, + '--max-inst=%d' % 1e7, + + '--l2size=1MB', + '--membus-width=8', '--membus-clock=400MHz', '--mem-latency=30ns', +) + +# Prefetchers to run +prefetchers = Config('user', ['--prefetcher=on_access=true:policy=proxy']) + +# Tests to run +tests = spec_configs +#tests = spec_configs[:2] + +configs = cross(tests, prefetchers) + +# Run tests +os.chdir(homeDir) +os.environ['M5_CPU2000'] = homeDir + '/data/cpu2000' +run_configs(configs) + +# Read statistics +stats.BASELINE_PF = 'none' +pf_stats = stats.read_stats(*glob.glob(frameDir + '/lib/stats/*_1e7')) + +pf_stats.update(stats.build_stats(homeDir + '/output')) + +# Write statistics +stats_file = open(homeDir + '/stats.txt', 'w') +def save_stats(pf, test, echo): + table = stats.format_stats(pf_stats, pf, test) + stats_file.write(table) + if echo: + print table + +# Prefetcher comparison for each test +for test in sorted(pf_stats['user']): + save_stats('all', test, False) +# User prefetcher results. +save_stats('user', 'all', True) +# Summary +save_stats('all', 'all', True) + +stats_file.close() diff --git a/src/SConscript b/src/SConscript new file mode 100755 index 0000000..9ad8b4a --- /dev/null +++ b/src/SConscript @@ -0,0 +1,6 @@ +# -*- mode:python -*- + +# This tells SCons where to find the prefetcher file. + +Import('*') +Source('prefetcher.cc') diff --git a/src/interface.hh b/src/interface.hh new file mode 100755 index 0000000..cb5a96c --- /dev/null +++ b/src/interface.hh @@ -0,0 +1,99 @@ +/* C interface for prefetchers. */ +/* DO NOT MODIFY THIS FILE */ + +#include + +/* + * This makes the DPRINT macro and all trace flags available. + * DPRINTF is a print macro that takes a trace flag, a format string and + * a variable number of print parameters (like regular printf), and prints + * them to stdout if the trace flag in question is enabled on the command + * line with --trace-flags=. + * + * For prefetcher use, the relevant flag i HWPrefetch. + * Example (which prints out the address of a cache access): + * + * DPRINTF(HWPrefetch, "Address %#x was accessed\n", stat.mem_addr) + * + */ +#include "base/trace.hh" + +/* Size of cache blocks (cache lines) in bytes. */ +#define BLOCK_SIZE 64 + +/* Maximum number of pending prefetch requests. */ +#define MAX_QUEUE_SIZE 100 + +/* The largest possible physical memory address. */ +#define MAX_PHYS_MEM_ADDR ((uint64_t)(256*1024*1024) - 1) + +/* M5 note: must match typedefs in in base/types.hh */ +typedef uint64_t Addr; +typedef int64_t Tick; + + +/* + * This is the information provided to the prefetcher on each call to + * prefetch_access by the simulator. + */ +struct AccessStat { + Addr pc; /* The address of the instruction that caused the access */ + Addr mem_addr; /* The memory address that was requested */ + Tick time; /* The simulator time cycle when the request was sent */ + int miss; /* Was this demand access a cache hit (0) or miss (1)? */ +}; + + +/* + * Functions that are called by the simulator, with implementation + * provided by the user. The implementation may be an empty function. + */ + +/* + * The simulator calls this before any memory access to let the prefetcher + * initialize itself. + */ +extern "C" void prefetch_init(void); + +/* + * The simulator calls this function to notify the prefetcher about + * a cache access (both hits and misses). + */ +extern "C" void prefetch_access(AccessStat stat); + +/* + * The simulator calls this function to notify the prefetcher that + * a prefetch load to address addr has just completed. + */ +extern "C" void prefetch_complete(Addr addr); + + + +/* Functions callable from the user-defined prefetcher. */ + +/* + * The prefetcher calls this function to notify the simulator that + * a prefetch for address addr should be added to the prefetch queue. + */ +extern "C" void issue_prefetch(Addr addr); + + +/* Is the prefetch bit set for the cache block corresponding to addr? */ +extern "C" int get_prefetch_bit(Addr addr); + +/* Set the prefetch bit for the cache block corresponding to addr. */ +extern "C" void set_prefetch_bit(Addr addr); + +/* Clear the prefetch bit for the cache block corresponding to addr. */ +extern "C" void clear_prefetch_bit(Addr addr); + + + +/* Is this address already in the cache? */ +extern "C" int in_cache(Addr addr); + +/* Is this address already in the MSHR queue? */ +extern "C" int in_mshr_queue(Addr addr); + +/* Number of occupied slots in the prefetch request queue */ +extern "C" int current_queue_size(void); diff --git a/src/prefetcher.cc b/src/prefetcher.cc new file mode 100755 index 0000000..58be4e6 --- /dev/null +++ b/src/prefetcher.cc @@ -0,0 +1,36 @@ +/* + * A sample prefetcher which does sequential one-block lookahead. + * This means that the prefetcher fetches the next block _after_ the one that + * was just accessed. It also ignores requests to blocks already in the cache. + */ + +#include "interface.hh" + + +void prefetch_init(void) +{ + /* Called before any calls to prefetch_access. */ + /* This is the place to initialize data structures. */ + + //DPRINTF(HWPrefetch, "Initialized sequential-on-access prefetcher\n"); +} + +void prefetch_access(AccessStat stat) +{ + /* pf_addr is now an address within the _next_ cache block */ + Addr pf_addr = stat.mem_addr + BLOCK_SIZE; + + /* + * Issue a prefetch request if a demand miss occured, + * and the block is not already in cache. + */ + if (stat.miss && !in_cache(pf_addr)) { + issue_prefetch(pf_addr); + } +} + +void prefetch_complete(Addr addr) { + /* + * Called when a block requested by the prefetcher has been loaded. + */ +}