| @@ -39,7 +39,8 @@ object Data { | |||
| // addr is the target address | |||
| case class PcUpdateJALR(addr: Addr) extends ExecutionEvent | |||
| case class PcUpdateJAL(addr: Addr) extends ExecutionEvent | |||
| case class PcUpdateB(addr: Addr) extends ExecutionEvent | |||
| case class PcUpdateBranch(addr: Addr) extends ExecutionEvent | |||
| case class PcUpdateNoBranch(addr: Addr) extends ExecutionEvent | |||
| case class PcUpdate(addr: Addr) extends ExecutionEvent | |||
| case class ExecutionTraceEvent(pc: Addr, event: ExecutionEvent*){ override def toString(): String = s"$pc: " + event.toList.mkString(", ") } | |||
| @@ -110,25 +110,10 @@ object Ops { | |||
| case class LUI(rd: Reg, imm: Imm) extends Op with UType | |||
| case class AUIPC(rd: Reg, imm: Imm) extends Op with UType | |||
| case class Store(rs2: Reg, rs1: Reg, offset: Imm, width: Int) extends Op with SType | |||
| object Store { | |||
| def sw(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 4) | |||
| def sh(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 2) | |||
| def sb(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 1) | |||
| } | |||
| case class Load(rd: Reg, rs1: Reg, offset: Imm, width: Int, signed: Boolean) extends Op with IType | |||
| object Load { | |||
| def lw (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 4, true) | |||
| def lh (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 2, true) | |||
| def lb (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 1, true) | |||
| def lhu(rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 2, false) | |||
| def lbu(rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 1, false) | |||
| } | |||
| case class JALR(rd: Reg, rs1: Reg, dst: String) extends Op with IType | |||
| case class JAL(rd: Reg, dst: String) extends Op with UType | |||
| case class SW(rs2: Reg, rs1: Reg, offset: Imm) extends Op with SType | |||
| case class LW(rd: Reg, rs1: Reg, offset: Imm) extends Op with IType | |||
| object LUI { def apply(rd: Int, imm: Int): LUI = LUI(Reg(rd), Imm(imm)) } | |||
| @@ -136,6 +121,8 @@ object Ops { | |||
| object JAL{ def apply(rd: Int, dst: String): JAL = JAL(Reg(rd), dst) } | |||
| object JALR{ def apply(rd: Int, rs1: Int, dst: String): JALR = JALR(Reg(rd), Reg(rs1), dst) } | |||
| object SW { def apply(rs2: Int, rs1: Int, offset: Int): SW = SW(Reg(rs2), Reg(rs1), Imm(offset)) } | |||
| object LW { def apply(rd: Int, rs1: Int, offset: Int): LW = LW(Reg(rd), Reg(rs1), Imm(offset)) } | |||
| // This op should not be assembled, but will for the sake of simplicity be rendered as a NOP | |||
| case object DONE extends Op with IType { val rd = Reg(0); val rs1 = Reg(0) } | |||
| @@ -38,21 +38,19 @@ case class VM( | |||
| } | |||
| private def executeBranch(op: Branch) = { | |||
| getAddr(op.dst).map{ addr => | |||
| val takeBranch = regs.compare(op.rs1, op.rs2, op.comp.run) | |||
| if(takeBranch){ | |||
| val nextVM = copy(pc = addr) | |||
| jump(nextVM, PcUpdateB(nextVM.pc)) | |||
| jump(nextVM, PcUpdateBranch(nextVM.pc)) | |||
| } | |||
| else { | |||
| step(this) | |||
| step(this, PcUpdateNoBranch(this.pc + Addr(4))) | |||
| } | |||
| } | |||
| } | |||
| /** | |||
| * The weird :_* syntax is simply a way to pass a list to a varArgs function. | |||
| * | |||
| @@ -42,7 +42,8 @@ object PrintUtils { | |||
| // addr is the target address | |||
| case PcUpdateJALR(addr) => fansi.Color.Green(s"PC updated to ${addr.show} via JALR") | |||
| case PcUpdateJAL(addr) => fansi.Color.Magenta(s"PC updated to ${addr.show} via JAL") | |||
| case PcUpdateB(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show} via Branch") | |||
| case PcUpdateBranch(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show} via Branch") | |||
| case PcUpdateNoBranch(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show}, skipping a Branch") | |||
| } | |||
| } | |||
| @@ -100,4 +100,94 @@ object TestRunner { | |||
| successful | |||
| }.toOption.getOrElse(false) | |||
| } | |||
| def profileBranching(testOptions: TestOptions): Boolean = { | |||
| val testResults = for { | |||
| lines <- fileUtils.readTest(testOptions) | |||
| program <- FiveStage.Parser.parseProgram(lines, testOptions) | |||
| (binary, (trace, finalVM)) <- program.validate.map(x => (x._1, x._2.run)) | |||
| } yield { | |||
| sealed trait BranchEvent | |||
| case class Taken(addr: Int) extends BranchEvent | |||
| case class NotTaken(addr: Int) extends BranchEvent | |||
| val events: List[BranchEvent] = trace.flatMap(_.event).collect{ | |||
| case PcUpdateBranch(x) => Taken(x.value) | |||
| case PcUpdateNoBranch(x) => NotTaken(x.value) | |||
| } | |||
| /** | |||
| * This is a sample profiler for a rather unrealistic branch predictor which has an unlimited amount | |||
| * of slots | |||
| */ | |||
| def OneBitInfiniteSlots(events: List[BranchEvent]): Int = { | |||
| // Helper inspects the next element of the event list. If the event is a mispredict the prediction table is updated | |||
| // to reflect this. | |||
| // As long as there are remaining events the helper calls itself recursively on the remainder | |||
| def helper(events: List[BranchEvent], predictionTable: Map[Int, Boolean]): Int = { | |||
| events match { | |||
| // Scala syntax for matching a list with a head element of some type and a tail | |||
| // `case h :: t =>` | |||
| // means we want to match a list with at least a head and a tail (tail can be Nil, so we | |||
| // essentially want to match a list with at least one element) | |||
| // h is the first element of the list, t is the remainder (which can be Nil, aka empty) | |||
| // `case Constructor(arg1, arg2) :: t => ` | |||
| // means we want to match a list whose first element is of type Constructor, giving us access to its internal | |||
| // values. | |||
| // `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))` | |||
| // means we want to match a list whose first element is of type Constructor while satisfying some predicate p, | |||
| // called an if guard. | |||
| case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) | |||
| case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true)) | |||
| case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) | |||
| case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) | |||
| case _ => 0 | |||
| } | |||
| } | |||
| // Initially every possible branch is set to false since the initial state of the predictor is to assume branch not taken | |||
| def initState = events.map{ | |||
| case Taken(addr) => (addr, false) | |||
| case NotTaken(addr) => (addr, false) | |||
| }.toMap | |||
| helper(events, initState) | |||
| } | |||
| say(OneBitInfiniteSlots(events)) | |||
| } | |||
| true | |||
| } | |||
| def profileCache(testOptions: TestOptions): Boolean = { | |||
| val testResults = for { | |||
| lines <- fileUtils.readTest(testOptions) | |||
| program <- FiveStage.Parser.parseProgram(lines, testOptions) | |||
| (binary, (trace, finalVM)) <- program.validate.map(x => (x._1, x._2.run)) | |||
| } yield { | |||
| sealed trait MemoryEvent | |||
| case class Write(addr: Int) extends MemoryEvent | |||
| case class Read(addr: Int) extends MemoryEvent | |||
| val events: List[MemoryEvent] = trace.flatMap(_.event).collect{ | |||
| case MemWrite(x,_) => Write(x.value) | |||
| case MemRead(x,_) => Read(x.value) | |||
| } | |||
| // Your cache here | |||
| } | |||
| true | |||
| } | |||
| } | |||
| @@ -0,0 +1,101 @@ | |||
| * Question 1 - Benchmarking | |||
| In order to gauge the performance increase from adding branch predictors it is necessary to do some testing. | |||
| Rather than writing a test from scratch it is better to use the tester already in use in the test harness. | |||
| When running a program the VM outputs a log of all events, including which branches have been taken and which | |||
| haven't, which as it turns out is the only information we actually need to gauge the effectiveness of a branch | |||
| predictor! | |||
| For this exercise you will write a program that parses a log of branch events. | |||
| #+BEGIN_SRC scala | |||
| sealed trait BranchEvent | |||
| case class Taken(addr: Int) extends BranchEvent | |||
| case class NotTaken(addr: Int) extends BranchEvent | |||
| def profile(events: List[BranchEvent]): Int = ??? | |||
| #+END_SRC | |||
| To help you get started, I have provided you with much of the necessary code. | |||
| In order to get an idea for how you should profile branch misses, consider the following profiler which calculates | |||
| misses for a processor with a branch predictor with a 1 bit predictor with infinite memory: | |||
| #+BEGIN_SRC scala | |||
| def OneBitInfiniteSlots(events: List[BranchEvent]): Int = { | |||
| // Helper inspects the next element of the event list. If the event is a mispredict the prediction table is updated | |||
| // to reflect this. | |||
| // As long as there are remaining events the helper calls itself recursively on the remainder | |||
| def helper(events: List[BranchEvent], predictionTable: Map[Int, Boolean]): Int = { | |||
| events match { | |||
| // Scala syntax for matching a list with a head element of some type and a tail | |||
| // `case h :: t =>` | |||
| // means we want to match a list with at least a head and a tail (tail can be Nil, so we | |||
| // essentially want to match a list with at least one element) | |||
| // h is the first element of the list, t is the remainder (which can be Nil, aka empty) | |||
| // `case Constructor(arg1, arg2) :: t => ` | |||
| // means we want to match a list whose first element is of type Constructor, giving us access to its internal | |||
| // values. | |||
| // `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))` | |||
| // means we want to match a list whose first element is of type Constructor while satisfying some predicate p, | |||
| // called an if guard. | |||
| case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) | |||
| case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true)) | |||
| case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) | |||
| case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) | |||
| case _ => 0 | |||
| } | |||
| } | |||
| // Initially every possible branch is set to false since the initial state of the predictor is to assume branch not taken | |||
| def initState = events.map{ | |||
| case Taken(addr) => (addr, false) | |||
| case NotTaken(addr) => (addr, false) | |||
| }.toMap | |||
| helper(events, initState) | |||
| } | |||
| #+END_SRC | |||
| ** Your task | |||
| Your job is to implement a test that checks how many misses occur for a 2 bit branch predictor with 4 slots. | |||
| For this task it is probably smart to use something else than a ~Map[(Int, Boolean)]~ | |||
| The skeleton code is located in ~testRunner.scala~ and can be run using testOnly FiveStage.ProfileTest. | |||
| If you do so now you will see that the unrealistic prediction model yields 1449 misses. | |||
| With a 2 bit 4 slot scheme, how many misses will you incur? | |||
| Answer with a number. | |||
| * Question 2 - Cache profiling | |||
| Unlike our design which has a very limited memory pool, real designs have access to vast amounts of memory, offset | |||
| by a steep cost in access latency. | |||
| To amend this a modern processor features several caches where even the smallest fastest cache has more memory than | |||
| your entire design. | |||
| In order to investigate how caches can alter performance it is therefore necessary to make some rather | |||
| unrealistic assumptions to see how different cache schemes impacts performance. | |||
| We will therefore assume the following: | |||
| + Reads from main memory takes 5 cycles | |||
| + cache has a total storage of 32 words (1024 bits) | |||
| + cache reads work as they do now (i.e no additional latency) | |||
| For this exercise you will write a program that parses a log of memory events, similar to previous task | |||
| #+BEGIN_SRC scala | |||
| sealed trait MemoryEvent | |||
| case class Write(addr: Int) extends MemoryEvent | |||
| case class Read(addr: Int) extends MemoryEvent | |||
| def profile(events: List[MemoryEvent]): Int = ??? | |||
| #+END_SRC | |||
| ** Your task | |||
| Your job is to implement a test that checks how many delay cycles will occur for a cache which: | |||
| + Follows a 2-way associative scheme | |||
| + Block size is 4 words (128 bits) | |||
| + Is write-through write no-allocate | |||
| + Eviction policy is LRU (least recently used) | |||