| @@ -0,0 +1,52 @@ | |||
| sudo apt-get install g++-4.8 python-dev scons swig zlib1g-dev m4 | |||
| tar xvzf prefetcher.tgz | |||
| cd prefetcher/prefetcher | |||
| make | |||
| Should provide this output | |||
| OVERALL PERFORMANCE | |||
| ---------------------------------------- | |||
| PREFETCHER SPEEDUP | |||
| ---------------------------------------- | |||
| adaptive_sequential 1.01 | |||
| dcpt 1.05 | |||
| dcpt-p 1.08 | |||
| none 1.00 | |||
| rpt 1.06 | |||
| sequential_on_access 1.01 | |||
| sequential_on_miss 1.00 | |||
| tagged 1.01 | |||
| user 1.01 | |||
| ---------------------------------------- | |||
| Installing M5 on Linux | |||
| First, download the modified M5 simulator and SPEC CPU2000 benchmarks suite from https://goo.gl/Jg7oFm (NB! 564 MiB large file). Located on OneDrive and requires NTNU login, | |||
| Software requirements (specific Debian/Ubuntu packages mentioned in paren- theses): | |||
| 3.4.6 <= g++ <= 4.8 | |||
| Python and libpython >= 2.4 (python and python-dev) Scons > 0.98.1 (scons) | |||
| SWIG >= 1.3.31 (swig) | |||
| zlib (zlib1g-dev) | |||
| m4 (m4) | |||
| To install all required packages in one go, issue instructions to apt-get: | |||
| sudo apt-get install g++-4.8 python-dev scons swig zlib1g-dev m4 | |||
| The simulator framework comes packaged as a gzipped tarball. Start the adventure by unpacking with | |||
| tar xvzf prefetcher.tgz | |||
| This will create a directory named framework. | |||
| Build | |||
| M5 uses the scons build system: | |||
| cd prefetcher/m5/ | |||
| scons -j2 ./build/ALPHA_SE/m5.opt | |||
| builds the optimized version of the M5 binaries. | |||
| -j2 specifies that the build process should build two targets in parallel. This is a useful option to cut down on compile time if your machine has several processors or cores. | |||
| The included build script compile.sh encapsulates the necessary build commands and options. | |||
| CPU2000 benchmark tests | |||
| The test prefetcher.py script can be used to evaluate the performance of your prefetcher against the SPEC CPU2000 benchmarks. It runs a selected suite of CPU2000 tests with your prefetcher, and compares the results to some reference prefetchers. | |||
| cd ../prefetcher/ | |||
| make test | |||
| The per-test statistics that M5 generates are written to output/<testname-prefetcher>/stats.txt. The statistics most relevant for hardware prefetching are then filtered and aggregated to a stats.txt file in the framework base directory. | |||
| cat output/*/stats.txt | |||
| @@ -0,0 +1,20 @@ | |||
| include ./scripts/FRAMEWORK | |||
| CXX=g++-4.8 | |||
| CC=gcc | |||
| export | |||
| all: compile test | |||
| compile: | |||
| ./scripts/compile.sh | |||
| test: | |||
| ./scripts/test_prefetcher.py | |||
| clean: | |||
| rm -Rf build | |||
| rm -Rf output | |||
| rm -Rf stats.txt | |||
| @@ -0,0 +1 @@ | |||
| PREFETCHER_FRAMEWORK=$(realpath ../) | |||
| @@ -0,0 +1,11 @@ | |||
| #!/bin/sh | |||
| SCRIPT_DIR=$(dirname $(readlink -f $0)) | |||
| [ ! -d "${PREFETCHER_FRAMEWORK}/m5" ] && { | |||
| echo "Cannot locate m5 framework" >&2 | |||
| exit 1 | |||
| } | |||
| cd ${PREFETCHER_FRAMEWORK}/m5 | |||
| scons -j2 NO_FAST_ALLOC=False EXTRAS="${SCRIPT_DIR}/../src" "${SCRIPT_DIR}/../build/ALPHA_SE/m5.opt" | |||
| @@ -0,0 +1,81 @@ | |||
| #!/usr/bin/env python2 | |||
| """ | |||
| Run the simulator and record statistics. | |||
| """ | |||
| import sys, os, os.path, glob | |||
| frameDir = os.environ['PREFETCHER_FRAMEWORK'] | |||
| homeDir = os.path.realpath(os.path.dirname(os.path.realpath(__file__))+ '/..') | |||
| sys.path.append(frameDir) | |||
| from lib.run_util import * | |||
| import lib.stats as stats | |||
| # Uncomment this to print commands instead of executing them. | |||
| #dry_run() | |||
| # Set paths | |||
| m5_path(homeDir + '/build/ALPHA_SE/m5.opt') | |||
| se_path(frameDir + '/m5/configs/example/se.py') | |||
| # Check that M5 is compiled | |||
| if not os.path.exists(homeDir + '/build/ALPHA_SE/m5.opt'): | |||
| print >>sys.stderr, "Could not find the M5 binary, run compile.sh to compile with your prefetcher." | |||
| sys.exit(1) | |||
| print "Remember to recompile after making changes." | |||
| # Set output directory | |||
| global_prefix(homeDir + '/output/') | |||
| # Configure | |||
| global_args( | |||
| '--checkpoint-dir=' + frameDir + '/lib/cp', | |||
| '--checkpoint-restore=%d' % 1e9, '--at-instruction', | |||
| '--caches', '--l2cache', | |||
| '--standard-switch', '--warmup-insts=%d' % 1e7, | |||
| '--max-inst=%d' % 1e7, | |||
| '--l2size=1MB', | |||
| '--membus-width=8', '--membus-clock=400MHz', '--mem-latency=30ns', | |||
| ) | |||
| # Prefetchers to run | |||
| prefetchers = Config('user', ['--prefetcher=on_access=true:policy=proxy']) | |||
| # Tests to run | |||
| tests = spec_configs | |||
| #tests = spec_configs[:2] | |||
| configs = cross(tests, prefetchers) | |||
| # Run tests | |||
| os.chdir(homeDir) | |||
| os.environ['M5_CPU2000'] = homeDir + '/data/cpu2000' | |||
| run_configs(configs) | |||
| # Read statistics | |||
| stats.BASELINE_PF = 'none' | |||
| pf_stats = stats.read_stats(*glob.glob(frameDir + '/lib/stats/*_1e7')) | |||
| pf_stats.update(stats.build_stats(homeDir + '/output')) | |||
| # Write statistics | |||
| stats_file = open(homeDir + '/stats.txt', 'w') | |||
| def save_stats(pf, test, echo): | |||
| table = stats.format_stats(pf_stats, pf, test) | |||
| stats_file.write(table) | |||
| if echo: | |||
| print table | |||
| # Prefetcher comparison for each test | |||
| for test in sorted(pf_stats['user']): | |||
| save_stats('all', test, False) | |||
| # User prefetcher results. | |||
| save_stats('user', 'all', True) | |||
| # Summary | |||
| save_stats('all', 'all', True) | |||
| stats_file.close() | |||
| @@ -0,0 +1,6 @@ | |||
| # -*- mode:python -*- | |||
| # This tells SCons where to find the prefetcher file. | |||
| Import('*') | |||
| Source('prefetcher.cc') | |||
| @@ -0,0 +1,99 @@ | |||
| /* C interface for prefetchers. */ | |||
| /* DO NOT MODIFY THIS FILE */ | |||
| #include <stdint.h> | |||
| /* | |||
| * This makes the DPRINT macro and all trace flags available. | |||
| * DPRINTF is a print macro that takes a trace flag, a format string and | |||
| * a variable number of print parameters (like regular printf), and prints | |||
| * them to stdout if the trace flag in question is enabled on the command | |||
| * line with --trace-flags=. | |||
| * | |||
| * For prefetcher use, the relevant flag i HWPrefetch. | |||
| * Example (which prints out the address of a cache access): | |||
| * | |||
| * DPRINTF(HWPrefetch, "Address %#x was accessed\n", stat.mem_addr) | |||
| * | |||
| */ | |||
| #include "base/trace.hh" | |||
| /* Size of cache blocks (cache lines) in bytes. */ | |||
| #define BLOCK_SIZE 64 | |||
| /* Maximum number of pending prefetch requests. */ | |||
| #define MAX_QUEUE_SIZE 100 | |||
| /* The largest possible physical memory address. */ | |||
| #define MAX_PHYS_MEM_ADDR ((uint64_t)(256*1024*1024) - 1) | |||
| /* M5 note: must match typedefs in in base/types.hh */ | |||
| typedef uint64_t Addr; | |||
| typedef int64_t Tick; | |||
| /* | |||
| * This is the information provided to the prefetcher on each call to | |||
| * prefetch_access by the simulator. | |||
| */ | |||
| struct AccessStat { | |||
| Addr pc; /* The address of the instruction that caused the access */ | |||
| Addr mem_addr; /* The memory address that was requested */ | |||
| Tick time; /* The simulator time cycle when the request was sent */ | |||
| int miss; /* Was this demand access a cache hit (0) or miss (1)? */ | |||
| }; | |||
| /* | |||
| * Functions that are called by the simulator, with implementation | |||
| * provided by the user. The implementation may be an empty function. | |||
| */ | |||
| /* | |||
| * The simulator calls this before any memory access to let the prefetcher | |||
| * initialize itself. | |||
| */ | |||
| extern "C" void prefetch_init(void); | |||
| /* | |||
| * The simulator calls this function to notify the prefetcher about | |||
| * a cache access (both hits and misses). | |||
| */ | |||
| extern "C" void prefetch_access(AccessStat stat); | |||
| /* | |||
| * The simulator calls this function to notify the prefetcher that | |||
| * a prefetch load to address addr has just completed. | |||
| */ | |||
| extern "C" void prefetch_complete(Addr addr); | |||
| /* Functions callable from the user-defined prefetcher. */ | |||
| /* | |||
| * The prefetcher calls this function to notify the simulator that | |||
| * a prefetch for address addr should be added to the prefetch queue. | |||
| */ | |||
| extern "C" void issue_prefetch(Addr addr); | |||
| /* Is the prefetch bit set for the cache block corresponding to addr? */ | |||
| extern "C" int get_prefetch_bit(Addr addr); | |||
| /* Set the prefetch bit for the cache block corresponding to addr. */ | |||
| extern "C" void set_prefetch_bit(Addr addr); | |||
| /* Clear the prefetch bit for the cache block corresponding to addr. */ | |||
| extern "C" void clear_prefetch_bit(Addr addr); | |||
| /* Is this address already in the cache? */ | |||
| extern "C" int in_cache(Addr addr); | |||
| /* Is this address already in the MSHR queue? */ | |||
| extern "C" int in_mshr_queue(Addr addr); | |||
| /* Number of occupied slots in the prefetch request queue */ | |||
| extern "C" int current_queue_size(void); | |||
| @@ -0,0 +1,36 @@ | |||
| /* | |||
| * A sample prefetcher which does sequential one-block lookahead. | |||
| * This means that the prefetcher fetches the next block _after_ the one that | |||
| * was just accessed. It also ignores requests to blocks already in the cache. | |||
| */ | |||
| #include "interface.hh" | |||
| void prefetch_init(void) | |||
| { | |||
| /* Called before any calls to prefetch_access. */ | |||
| /* This is the place to initialize data structures. */ | |||
| //DPRINTF(HWPrefetch, "Initialized sequential-on-access prefetcher\n"); | |||
| } | |||
| void prefetch_access(AccessStat stat) | |||
| { | |||
| /* pf_addr is now an address within the _next_ cache block */ | |||
| Addr pf_addr = stat.mem_addr + BLOCK_SIZE; | |||
| /* | |||
| * Issue a prefetch request if a demand miss occured, | |||
| * and the block is not already in cache. | |||
| */ | |||
| if (stat.miss && !in_cache(pf_addr)) { | |||
| issue_prefetch(pf_addr); | |||
| } | |||
| } | |||
| void prefetch_complete(Addr addr) { | |||
| /* | |||
| * Called when a block requested by the prefetcher has been loaded. | |||
| */ | |||
| } | |||