Initial commit 3.0, since I forgot to actually add filesmaster
| @@ -30,3 +30,7 @@ | |||||
| *.exe | *.exe | ||||
| *.out | *.out | ||||
| *.app | *.app | ||||
| stats.txt | |||||
| build/ | |||||
| output/ | |||||
| @@ -0,0 +1,52 @@ | |||||
| sudo apt-get install g++-4.8 python-dev scons swig zlib1g-dev m4 | |||||
| tar xvzf prefetcher.tgz | |||||
| cd prefetcher/prefetcher | |||||
| make | |||||
| Should provide this output | |||||
| OVERALL PERFORMANCE | |||||
| ---------------------------------------- | |||||
| PREFETCHER SPEEDUP | |||||
| ---------------------------------------- | |||||
| adaptive_sequential 1.01 | |||||
| dcpt 1.05 | |||||
| dcpt-p 1.08 | |||||
| none 1.00 | |||||
| rpt 1.06 | |||||
| sequential_on_access 1.01 | |||||
| sequential_on_miss 1.00 | |||||
| tagged 1.01 | |||||
| user 1.01 | |||||
| ---------------------------------------- | |||||
| Installing M5 on Linux | |||||
| First, download the modified M5 simulator and SPEC CPU2000 benchmarks suite from https://goo.gl/Jg7oFm (NB! 564 MiB large file). Located on OneDrive and requires NTNU login, | |||||
| Software requirements (specific Debian/Ubuntu packages mentioned in paren- theses): | |||||
| 3.4.6 <= g++ <= 4.8 | |||||
| Python and libpython >= 2.4 (python and python-dev) Scons > 0.98.1 (scons) | |||||
| SWIG >= 1.3.31 (swig) | |||||
| zlib (zlib1g-dev) | |||||
| m4 (m4) | |||||
| To install all required packages in one go, issue instructions to apt-get: | |||||
| sudo apt-get install g++-4.8 python-dev scons swig zlib1g-dev m4 | |||||
| The simulator framework comes packaged as a gzipped tarball. Start the adventure by unpacking with | |||||
| tar xvzf prefetcher.tgz | |||||
| This will create a directory named framework. | |||||
| Build | |||||
| M5 uses the scons build system: | |||||
| cd prefetcher/m5/ | |||||
| scons -j2 ./build/ALPHA_SE/m5.opt | |||||
| builds the optimized version of the M5 binaries. | |||||
| -j2 specifies that the build process should build two targets in parallel. This is a useful option to cut down on compile time if your machine has several processors or cores. | |||||
| The included build script compile.sh encapsulates the necessary build commands and options. | |||||
| CPU2000 benchmark tests | |||||
| The test prefetcher.py script can be used to evaluate the performance of your prefetcher against the SPEC CPU2000 benchmarks. It runs a selected suite of CPU2000 tests with your prefetcher, and compares the results to some reference prefetchers. | |||||
| cd ../prefetcher/ | |||||
| make test | |||||
| The per-test statistics that M5 generates are written to output/<testname-prefetcher>/stats.txt. The statistics most relevant for hardware prefetching are then filtered and aggregated to a stats.txt file in the framework base directory. | |||||
| cat output/*/stats.txt | |||||
| @@ -0,0 +1,20 @@ | |||||
| include ./scripts/FRAMEWORK | |||||
| CXX=g++-4.8 | |||||
| CC=gcc | |||||
| export | |||||
| all: compile test | |||||
| compile: | |||||
| ./scripts/compile.sh | |||||
| test: | |||||
| ./scripts/test_prefetcher.py | |||||
| clean: | |||||
| rm -Rf build | |||||
| rm -Rf output | |||||
| rm -Rf stats.txt | |||||
| @@ -0,0 +1 @@ | |||||
| PREFETCHER_FRAMEWORK=$(realpath ../) | |||||
| @@ -0,0 +1,11 @@ | |||||
| #!/bin/sh | |||||
| SCRIPT_DIR=$(dirname $(readlink -f $0)) | |||||
| [ ! -d "${PREFETCHER_FRAMEWORK}/m5" ] && { | |||||
| echo "Cannot locate m5 framework" >&2 | |||||
| exit 1 | |||||
| } | |||||
| cd ${PREFETCHER_FRAMEWORK}/m5 | |||||
| scons -j2 NO_FAST_ALLOC=False EXTRAS="${SCRIPT_DIR}/../src" "${SCRIPT_DIR}/../build/ALPHA_SE/m5.opt" | |||||
| @@ -0,0 +1,81 @@ | |||||
| #!/usr/bin/env python2 | |||||
| """ | |||||
| Run the simulator and record statistics. | |||||
| """ | |||||
| import sys, os, os.path, glob | |||||
| frameDir = os.environ['PREFETCHER_FRAMEWORK'] | |||||
| homeDir = os.path.realpath(os.path.dirname(os.path.realpath(__file__))+ '/..') | |||||
| sys.path.append(frameDir) | |||||
| from lib.run_util import * | |||||
| import lib.stats as stats | |||||
| # Uncomment this to print commands instead of executing them. | |||||
| #dry_run() | |||||
| # Set paths | |||||
| m5_path(homeDir + '/build/ALPHA_SE/m5.opt') | |||||
| se_path(frameDir + '/m5/configs/example/se.py') | |||||
| # Check that M5 is compiled | |||||
| if not os.path.exists(homeDir + '/build/ALPHA_SE/m5.opt'): | |||||
| print >>sys.stderr, "Could not find the M5 binary, run compile.sh to compile with your prefetcher." | |||||
| sys.exit(1) | |||||
| print "Remember to recompile after making changes." | |||||
| # Set output directory | |||||
| global_prefix(homeDir + '/output/') | |||||
| # Configure | |||||
| global_args( | |||||
| '--checkpoint-dir=' + frameDir + '/lib/cp', | |||||
| '--checkpoint-restore=%d' % 1e9, '--at-instruction', | |||||
| '--caches', '--l2cache', | |||||
| '--standard-switch', '--warmup-insts=%d' % 1e7, | |||||
| '--max-inst=%d' % 1e7, | |||||
| '--l2size=1MB', | |||||
| '--membus-width=8', '--membus-clock=400MHz', '--mem-latency=30ns', | |||||
| ) | |||||
| # Prefetchers to run | |||||
| prefetchers = Config('user', ['--prefetcher=on_access=true:policy=proxy']) | |||||
| # Tests to run | |||||
| tests = spec_configs | |||||
| #tests = spec_configs[:2] | |||||
| configs = cross(tests, prefetchers) | |||||
| # Run tests | |||||
| os.chdir(homeDir) | |||||
| os.environ['M5_CPU2000'] = homeDir + '/data/cpu2000' | |||||
| run_configs(configs) | |||||
| # Read statistics | |||||
| stats.BASELINE_PF = 'none' | |||||
| pf_stats = stats.read_stats(*glob.glob(frameDir + '/lib/stats/*_1e7')) | |||||
| pf_stats.update(stats.build_stats(homeDir + '/output')) | |||||
| # Write statistics | |||||
| stats_file = open(homeDir + '/stats.txt', 'w') | |||||
| def save_stats(pf, test, echo): | |||||
| table = stats.format_stats(pf_stats, pf, test) | |||||
| stats_file.write(table) | |||||
| if echo: | |||||
| print table | |||||
| # Prefetcher comparison for each test | |||||
| for test in sorted(pf_stats['user']): | |||||
| save_stats('all', test, False) | |||||
| # User prefetcher results. | |||||
| save_stats('user', 'all', True) | |||||
| # Summary | |||||
| save_stats('all', 'all', True) | |||||
| stats_file.close() | |||||
| @@ -0,0 +1,6 @@ | |||||
| # -*- mode:python -*- | |||||
| # This tells SCons where to find the prefetcher file. | |||||
| Import('*') | |||||
| Source('prefetcher.cc') | |||||
| @@ -0,0 +1,99 @@ | |||||
| /* C interface for prefetchers. */ | |||||
| /* DO NOT MODIFY THIS FILE */ | |||||
| #include <stdint.h> | |||||
| /* | |||||
| * This makes the DPRINT macro and all trace flags available. | |||||
| * DPRINTF is a print macro that takes a trace flag, a format string and | |||||
| * a variable number of print parameters (like regular printf), and prints | |||||
| * them to stdout if the trace flag in question is enabled on the command | |||||
| * line with --trace-flags=. | |||||
| * | |||||
| * For prefetcher use, the relevant flag i HWPrefetch. | |||||
| * Example (which prints out the address of a cache access): | |||||
| * | |||||
| * DPRINTF(HWPrefetch, "Address %#x was accessed\n", stat.mem_addr) | |||||
| * | |||||
| */ | |||||
| #include "base/trace.hh" | |||||
| /* Size of cache blocks (cache lines) in bytes. */ | |||||
| #define BLOCK_SIZE 64 | |||||
| /* Maximum number of pending prefetch requests. */ | |||||
| #define MAX_QUEUE_SIZE 100 | |||||
| /* The largest possible physical memory address. */ | |||||
| #define MAX_PHYS_MEM_ADDR ((uint64_t)(256*1024*1024) - 1) | |||||
| /* M5 note: must match typedefs in in base/types.hh */ | |||||
| typedef uint64_t Addr; | |||||
| typedef int64_t Tick; | |||||
| /* | |||||
| * This is the information provided to the prefetcher on each call to | |||||
| * prefetch_access by the simulator. | |||||
| */ | |||||
| struct AccessStat { | |||||
| Addr pc; /* The address of the instruction that caused the access */ | |||||
| Addr mem_addr; /* The memory address that was requested */ | |||||
| Tick time; /* The simulator time cycle when the request was sent */ | |||||
| int miss; /* Was this demand access a cache hit (0) or miss (1)? */ | |||||
| }; | |||||
| /* | |||||
| * Functions that are called by the simulator, with implementation | |||||
| * provided by the user. The implementation may be an empty function. | |||||
| */ | |||||
| /* | |||||
| * The simulator calls this before any memory access to let the prefetcher | |||||
| * initialize itself. | |||||
| */ | |||||
| extern "C" void prefetch_init(void); | |||||
| /* | |||||
| * The simulator calls this function to notify the prefetcher about | |||||
| * a cache access (both hits and misses). | |||||
| */ | |||||
| extern "C" void prefetch_access(AccessStat stat); | |||||
| /* | |||||
| * The simulator calls this function to notify the prefetcher that | |||||
| * a prefetch load to address addr has just completed. | |||||
| */ | |||||
| extern "C" void prefetch_complete(Addr addr); | |||||
| /* Functions callable from the user-defined prefetcher. */ | |||||
| /* | |||||
| * The prefetcher calls this function to notify the simulator that | |||||
| * a prefetch for address addr should be added to the prefetch queue. | |||||
| */ | |||||
| extern "C" void issue_prefetch(Addr addr); | |||||
| /* Is the prefetch bit set for the cache block corresponding to addr? */ | |||||
| extern "C" int get_prefetch_bit(Addr addr); | |||||
| /* Set the prefetch bit for the cache block corresponding to addr. */ | |||||
| extern "C" void set_prefetch_bit(Addr addr); | |||||
| /* Clear the prefetch bit for the cache block corresponding to addr. */ | |||||
| extern "C" void clear_prefetch_bit(Addr addr); | |||||
| /* Is this address already in the cache? */ | |||||
| extern "C" int in_cache(Addr addr); | |||||
| /* Is this address already in the MSHR queue? */ | |||||
| extern "C" int in_mshr_queue(Addr addr); | |||||
| /* Number of occupied slots in the prefetch request queue */ | |||||
| extern "C" int current_queue_size(void); | |||||
| @@ -0,0 +1,36 @@ | |||||
| /* | |||||
| * A sample prefetcher which does sequential one-block lookahead. | |||||
| * This means that the prefetcher fetches the next block _after_ the one that | |||||
| * was just accessed. It also ignores requests to blocks already in the cache. | |||||
| */ | |||||
| #include "interface.hh" | |||||
| void prefetch_init(void) | |||||
| { | |||||
| /* Called before any calls to prefetch_access. */ | |||||
| /* This is the place to initialize data structures. */ | |||||
| //DPRINTF(HWPrefetch, "Initialized sequential-on-access prefetcher\n"); | |||||
| } | |||||
| void prefetch_access(AccessStat stat) | |||||
| { | |||||
| /* pf_addr is now an address within the _next_ cache block */ | |||||
| Addr pf_addr = stat.mem_addr + BLOCK_SIZE; | |||||
| /* | |||||
| * Issue a prefetch request if a demand miss occured, | |||||
| * and the block is not already in cache. | |||||
| */ | |||||
| if (stat.miss && !in_cache(pf_addr)) { | |||||
| issue_prefetch(pf_addr); | |||||
| } | |||||
| } | |||||
| void prefetch_complete(Addr addr) { | |||||
| /* | |||||
| * Called when a block requested by the prefetcher has been loaded. | |||||
| */ | |||||
| } | |||||