| @@ -39,7 +39,8 @@ object Data { | |||||
| // addr is the target address | // addr is the target address | ||||
| case class PcUpdateJALR(addr: Addr) extends ExecutionEvent | case class PcUpdateJALR(addr: Addr) extends ExecutionEvent | ||||
| case class PcUpdateJAL(addr: Addr) extends ExecutionEvent | case class PcUpdateJAL(addr: Addr) extends ExecutionEvent | ||||
| case class PcUpdateB(addr: Addr) extends ExecutionEvent | |||||
| case class PcUpdateBranch(addr: Addr) extends ExecutionEvent | |||||
| case class PcUpdateNoBranch(addr: Addr) extends ExecutionEvent | |||||
| case class PcUpdate(addr: Addr) extends ExecutionEvent | case class PcUpdate(addr: Addr) extends ExecutionEvent | ||||
| case class ExecutionTraceEvent(pc: Addr, event: ExecutionEvent*){ override def toString(): String = s"$pc: " + event.toList.mkString(", ") } | case class ExecutionTraceEvent(pc: Addr, event: ExecutionEvent*){ override def toString(): String = s"$pc: " + event.toList.mkString(", ") } | ||||
| @@ -110,25 +110,10 @@ object Ops { | |||||
| case class LUI(rd: Reg, imm: Imm) extends Op with UType | case class LUI(rd: Reg, imm: Imm) extends Op with UType | ||||
| case class AUIPC(rd: Reg, imm: Imm) extends Op with UType | case class AUIPC(rd: Reg, imm: Imm) extends Op with UType | ||||
| case class Store(rs2: Reg, rs1: Reg, offset: Imm, width: Int) extends Op with SType | |||||
| object Store { | |||||
| def sw(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 4) | |||||
| def sh(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 2) | |||||
| def sb(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 1) | |||||
| } | |||||
| case class Load(rd: Reg, rs1: Reg, offset: Imm, width: Int, signed: Boolean) extends Op with IType | |||||
| object Load { | |||||
| def lw (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 4, true) | |||||
| def lh (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 2, true) | |||||
| def lb (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 1, true) | |||||
| def lhu(rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 2, false) | |||||
| def lbu(rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 1, false) | |||||
| } | |||||
| case class JALR(rd: Reg, rs1: Reg, dst: String) extends Op with IType | case class JALR(rd: Reg, rs1: Reg, dst: String) extends Op with IType | ||||
| case class JAL(rd: Reg, dst: String) extends Op with UType | case class JAL(rd: Reg, dst: String) extends Op with UType | ||||
| case class SW(rs2: Reg, rs1: Reg, offset: Imm) extends Op with SType | |||||
| case class LW(rd: Reg, rs1: Reg, offset: Imm) extends Op with IType | |||||
| object LUI { def apply(rd: Int, imm: Int): LUI = LUI(Reg(rd), Imm(imm)) } | object LUI { def apply(rd: Int, imm: Int): LUI = LUI(Reg(rd), Imm(imm)) } | ||||
| @@ -136,6 +121,8 @@ object Ops { | |||||
| object JAL{ def apply(rd: Int, dst: String): JAL = JAL(Reg(rd), dst) } | object JAL{ def apply(rd: Int, dst: String): JAL = JAL(Reg(rd), dst) } | ||||
| object JALR{ def apply(rd: Int, rs1: Int, dst: String): JALR = JALR(Reg(rd), Reg(rs1), dst) } | object JALR{ def apply(rd: Int, rs1: Int, dst: String): JALR = JALR(Reg(rd), Reg(rs1), dst) } | ||||
| object SW { def apply(rs2: Int, rs1: Int, offset: Int): SW = SW(Reg(rs2), Reg(rs1), Imm(offset)) } | |||||
| object LW { def apply(rd: Int, rs1: Int, offset: Int): LW = LW(Reg(rd), Reg(rs1), Imm(offset)) } | |||||
| // This op should not be assembled, but will for the sake of simplicity be rendered as a NOP | // This op should not be assembled, but will for the sake of simplicity be rendered as a NOP | ||||
| case object DONE extends Op with IType { val rd = Reg(0); val rs1 = Reg(0) } | case object DONE extends Op with IType { val rd = Reg(0); val rs1 = Reg(0) } | ||||
| @@ -38,21 +38,19 @@ case class VM( | |||||
| } | } | ||||
| private def executeBranch(op: Branch) = { | private def executeBranch(op: Branch) = { | ||||
| getAddr(op.dst).map{ addr => | getAddr(op.dst).map{ addr => | ||||
| val takeBranch = regs.compare(op.rs1, op.rs2, op.comp.run) | val takeBranch = regs.compare(op.rs1, op.rs2, op.comp.run) | ||||
| if(takeBranch){ | if(takeBranch){ | ||||
| val nextVM = copy(pc = addr) | val nextVM = copy(pc = addr) | ||||
| jump(nextVM, PcUpdateB(nextVM.pc)) | |||||
| jump(nextVM, PcUpdateBranch(nextVM.pc)) | |||||
| } | } | ||||
| else { | else { | ||||
| step(this) | |||||
| step(this, PcUpdateNoBranch(this.pc + Addr(4))) | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| /** | /** | ||||
| * The weird :_* syntax is simply a way to pass a list to a varArgs function. | * The weird :_* syntax is simply a way to pass a list to a varArgs function. | ||||
| * | * | ||||
| @@ -42,7 +42,8 @@ object PrintUtils { | |||||
| // addr is the target address | // addr is the target address | ||||
| case PcUpdateJALR(addr) => fansi.Color.Green(s"PC updated to ${addr.show} via JALR") | case PcUpdateJALR(addr) => fansi.Color.Green(s"PC updated to ${addr.show} via JALR") | ||||
| case PcUpdateJAL(addr) => fansi.Color.Magenta(s"PC updated to ${addr.show} via JAL") | case PcUpdateJAL(addr) => fansi.Color.Magenta(s"PC updated to ${addr.show} via JAL") | ||||
| case PcUpdateB(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show} via Branch") | |||||
| case PcUpdateBranch(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show} via Branch") | |||||
| case PcUpdateNoBranch(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show}, skipping a Branch") | |||||
| } | } | ||||
| } | } | ||||
| @@ -100,4 +100,94 @@ object TestRunner { | |||||
| successful | successful | ||||
| }.toOption.getOrElse(false) | }.toOption.getOrElse(false) | ||||
| } | } | ||||
| def profileBranching(testOptions: TestOptions): Boolean = { | |||||
| val testResults = for { | |||||
| lines <- fileUtils.readTest(testOptions) | |||||
| program <- FiveStage.Parser.parseProgram(lines, testOptions) | |||||
| (binary, (trace, finalVM)) <- program.validate.map(x => (x._1, x._2.run)) | |||||
| } yield { | |||||
| sealed trait BranchEvent | |||||
| case class Taken(addr: Int) extends BranchEvent | |||||
| case class NotTaken(addr: Int) extends BranchEvent | |||||
| val events: List[BranchEvent] = trace.flatMap(_.event).collect{ | |||||
| case PcUpdateBranch(x) => Taken(x.value) | |||||
| case PcUpdateNoBranch(x) => NotTaken(x.value) | |||||
| } | |||||
| /** | |||||
| * This is a sample profiler for a rather unrealistic branch predictor which has an unlimited amount | |||||
| * of slots | |||||
| */ | |||||
| def OneBitInfiniteSlots(events: List[BranchEvent]): Int = { | |||||
| // Helper inspects the next element of the event list. If the event is a mispredict the prediction table is updated | |||||
| // to reflect this. | |||||
| // As long as there are remaining events the helper calls itself recursively on the remainder | |||||
| def helper(events: List[BranchEvent], predictionTable: Map[Int, Boolean]): Int = { | |||||
| events match { | |||||
| // Scala syntax for matching a list with a head element of some type and a tail | |||||
| // `case h :: t =>` | |||||
| // means we want to match a list with at least a head and a tail (tail can be Nil, so we | |||||
| // essentially want to match a list with at least one element) | |||||
| // h is the first element of the list, t is the remainder (which can be Nil, aka empty) | |||||
| // `case Constructor(arg1, arg2) :: t => ` | |||||
| // means we want to match a list whose first element is of type Constructor, giving us access to its internal | |||||
| // values. | |||||
| // `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))` | |||||
| // means we want to match a list whose first element is of type Constructor while satisfying some predicate p, | |||||
| // called an if guard. | |||||
| case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) | |||||
| case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true)) | |||||
| case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) | |||||
| case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) | |||||
| case _ => 0 | |||||
| } | |||||
| } | |||||
| // Initially every possible branch is set to false since the initial state of the predictor is to assume branch not taken | |||||
| def initState = events.map{ | |||||
| case Taken(addr) => (addr, false) | |||||
| case NotTaken(addr) => (addr, false) | |||||
| }.toMap | |||||
| helper(events, initState) | |||||
| } | |||||
| say(OneBitInfiniteSlots(events)) | |||||
| } | |||||
| true | |||||
| } | |||||
| def profileCache(testOptions: TestOptions): Boolean = { | |||||
| val testResults = for { | |||||
| lines <- fileUtils.readTest(testOptions) | |||||
| program <- FiveStage.Parser.parseProgram(lines, testOptions) | |||||
| (binary, (trace, finalVM)) <- program.validate.map(x => (x._1, x._2.run)) | |||||
| } yield { | |||||
| sealed trait MemoryEvent | |||||
| case class Write(addr: Int) extends MemoryEvent | |||||
| case class Read(addr: Int) extends MemoryEvent | |||||
| val events: List[MemoryEvent] = trace.flatMap(_.event).collect{ | |||||
| case MemWrite(x,_) => Write(x.value) | |||||
| case MemRead(x,_) => Read(x.value) | |||||
| } | |||||
| // Your cache here | |||||
| } | |||||
| true | |||||
| } | |||||
| } | } | ||||
| @@ -0,0 +1,101 @@ | |||||
| * Question 1 - Benchmarking | |||||
| In order to gauge the performance increase from adding branch predictors it is necessary to do some testing. | |||||
| Rather than writing a test from scratch it is better to use the tester already in use in the test harness. | |||||
| When running a program the VM outputs a log of all events, including which branches have been taken and which | |||||
| haven't, which as it turns out is the only information we actually need to gauge the effectiveness of a branch | |||||
| predictor! | |||||
| For this exercise you will write a program that parses a log of branch events. | |||||
| #+BEGIN_SRC scala | |||||
| sealed trait BranchEvent | |||||
| case class Taken(addr: Int) extends BranchEvent | |||||
| case class NotTaken(addr: Int) extends BranchEvent | |||||
| def profile(events: List[BranchEvent]): Int = ??? | |||||
| #+END_SRC | |||||
| To help you get started, I have provided you with much of the necessary code. | |||||
| In order to get an idea for how you should profile branch misses, consider the following profiler which calculates | |||||
| misses for a processor with a branch predictor with a 1 bit predictor with infinite memory: | |||||
| #+BEGIN_SRC scala | |||||
| def OneBitInfiniteSlots(events: List[BranchEvent]): Int = { | |||||
| // Helper inspects the next element of the event list. If the event is a mispredict the prediction table is updated | |||||
| // to reflect this. | |||||
| // As long as there are remaining events the helper calls itself recursively on the remainder | |||||
| def helper(events: List[BranchEvent], predictionTable: Map[Int, Boolean]): Int = { | |||||
| events match { | |||||
| // Scala syntax for matching a list with a head element of some type and a tail | |||||
| // `case h :: t =>` | |||||
| // means we want to match a list with at least a head and a tail (tail can be Nil, so we | |||||
| // essentially want to match a list with at least one element) | |||||
| // h is the first element of the list, t is the remainder (which can be Nil, aka empty) | |||||
| // `case Constructor(arg1, arg2) :: t => ` | |||||
| // means we want to match a list whose first element is of type Constructor, giving us access to its internal | |||||
| // values. | |||||
| // `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))` | |||||
| // means we want to match a list whose first element is of type Constructor while satisfying some predicate p, | |||||
| // called an if guard. | |||||
| case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) | |||||
| case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true)) | |||||
| case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) | |||||
| case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) | |||||
| case _ => 0 | |||||
| } | |||||
| } | |||||
| // Initially every possible branch is set to false since the initial state of the predictor is to assume branch not taken | |||||
| def initState = events.map{ | |||||
| case Taken(addr) => (addr, false) | |||||
| case NotTaken(addr) => (addr, false) | |||||
| }.toMap | |||||
| helper(events, initState) | |||||
| } | |||||
| #+END_SRC | |||||
| ** Your task | |||||
| Your job is to implement a test that checks how many misses occur for a 2 bit branch predictor with 4 slots. | |||||
| For this task it is probably smart to use something else than a ~Map[(Int, Boolean)]~ | |||||
| The skeleton code is located in ~testRunner.scala~ and can be run using testOnly FiveStage.ProfileTest. | |||||
| If you do so now you will see that the unrealistic prediction model yields 1449 misses. | |||||
| With a 2 bit 4 slot scheme, how many misses will you incur? | |||||
| Answer with a number. | |||||
| * Question 2 - Cache profiling | |||||
| Unlike our design which has a very limited memory pool, real designs have access to vast amounts of memory, offset | |||||
| by a steep cost in access latency. | |||||
| To amend this a modern processor features several caches where even the smallest fastest cache has more memory than | |||||
| your entire design. | |||||
| In order to investigate how caches can alter performance it is therefore necessary to make some rather | |||||
| unrealistic assumptions to see how different cache schemes impacts performance. | |||||
| We will therefore assume the following: | |||||
| + Reads from main memory takes 5 cycles | |||||
| + cache has a total storage of 32 words (1024 bits) | |||||
| + cache reads work as they do now (i.e no additional latency) | |||||
| For this exercise you will write a program that parses a log of memory events, similar to previous task | |||||
| #+BEGIN_SRC scala | |||||
| sealed trait MemoryEvent | |||||
| case class Write(addr: Int) extends MemoryEvent | |||||
| case class Read(addr: Int) extends MemoryEvent | |||||
| def profile(events: List[MemoryEvent]): Int = ??? | |||||
| #+END_SRC | |||||
| ** Your task | |||||
| Your job is to implement a test that checks how many delay cycles will occur for a cache which: | |||||
| + Follows a 2-way associative scheme | |||||
| + Block size is 4 words (128 bits) | |||||
| + Is write-through write no-allocate | |||||
| + Eviction policy is LRU (least recently used) | |||||