From a63c0ed4a9818ca984a152930c8aef7063fbe81e Mon Sep 17 00:00:00 2001 From: peteraa Date: Thu, 17 Oct 2019 16:15:56 +0200 Subject: [PATCH 01/22] Fix and add ops --- src/test/scala/RISCV/Ops.scala | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/test/scala/RISCV/Ops.scala b/src/test/scala/RISCV/Ops.scala index f77a351..9cd7ded 100644 --- a/src/test/scala/RISCV/Ops.scala +++ b/src/test/scala/RISCV/Ops.scala @@ -24,7 +24,6 @@ object Ops { sealed trait JImmediate extends ImmType sealed trait ShiftImmediate extends ImmType - sealed trait Comparison { def run(rs1Val: Int, rs2Val: Int): Boolean } @@ -51,7 +50,10 @@ object Ops { def beqz(rs1: Int, dst: Label) = Branch(Reg(rs1), Reg(0), dst, EQ) def bnez(rs1: Int, dst: Label) = Branch(Reg(rs1), Reg(0), dst, NE) - def blez(rs1: Int, dst: Label) = Branch(Reg(rs1), Reg(0), dst, LT) + def blez(rs1: Int, dst: Label) = Branch(Reg(0), Reg(rs1), dst, GE) + def bgez(rs1: Int, dst: Label) = Branch(Reg(rs1), Reg(0), dst, GE) + def bltz(rs1: Int, dst: Label) = Branch(Reg(rs1), Reg(0), dst, LT) + def bgtz(rs1: Int, dst: Label) = Branch(Reg(0), Reg(rs1), dst, LT) } sealed trait someDecorator @@ -105,10 +107,25 @@ object Ops { def sra( rd: Int, rs1: Int, imm: Int) = ArithImmShift(Reg(rd), Reg(rs1), Imm(imm), SRA) } - case class LUI(rd: Reg, imm: Imm) extends Op with UType - case class AUIPC(rd: Reg, imm: Imm) extends Op with UType - case class SW(rs2: Reg, rs1: Reg, offset: Imm) extends Op with SType - case class LW(rd: Reg, rs1: Reg, offset: Imm) extends Op with IType + case class LUI(rd: Reg, imm: Imm) extends Op with UType + case class AUIPC(rd: Reg, imm: Imm) extends Op with UType + + + case class Store(rs2: Reg, rs1: Reg, offset: Imm, width: Int) extends Op with SType + object Store { + def sw(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 4) + def sh(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 2) + def sb(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 1) + } + + case class Load(rd: Reg, rs1: Reg, offset: Imm, width: Int, signed: Boolean) extends Op with IType + object Load { + def lw (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 4, true) + def lh (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 2, true) + def lb (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 1, true) + def lhu(rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 2, false) + def lbu(rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 1, false) + } case class JALR(rd: Reg, rs1: Reg, dst: String) extends Op with IType case class JAL(rd: Reg, dst: String) extends Op with UType @@ -116,8 +133,6 @@ object Ops { object LUI { def apply(rd: Int, imm: Int): LUI = LUI(Reg(rd), Imm(imm)) } object AUIPC { def apply(rd: Int, imm: Int): AUIPC = AUIPC(Reg(rd), Imm(imm)) } - object SW { def apply(rs2: Int, rs1: Int, offset: Int): SW = SW(Reg(rs2), Reg(rs1), Imm(offset)) } - object LW { def apply(rd: Int, rs1: Int, offset: Int): LW = LW(Reg(rd), Reg(rs1), Imm(offset)) } object JAL{ def apply(rd: Int, dst: String): JAL = JAL(Reg(rd), dst) } object JALR{ def apply(rd: Int, rs1: Int, dst: String): JALR = JALR(Reg(rd), Reg(rs1), dst) } From ec5089de8e0c9f9e7ff333da55104505315dd632 Mon Sep 17 00:00:00 2001 From: peteraa Date: Thu, 17 Oct 2019 16:28:13 +0200 Subject: [PATCH 02/22] Add theory 2 --- src/test/scala/RISCV/DataTypes.scala | 3 +- src/test/scala/RISCV/Ops.scala | 21 +----- src/test/scala/RISCV/VM.scala | 6 +- src/test/scala/RISCV/printUtils.scala | 3 +- src/test/scala/RISCV/testRunner.scala | 90 +++++++++++++++++++++++ theory2.org | 101 ++++++++++++++++++++++++++ 6 files changed, 201 insertions(+), 23 deletions(-) create mode 100644 theory2.org diff --git a/src/test/scala/RISCV/DataTypes.scala b/src/test/scala/RISCV/DataTypes.scala index 180e854..4cd7c61 100644 --- a/src/test/scala/RISCV/DataTypes.scala +++ b/src/test/scala/RISCV/DataTypes.scala @@ -39,7 +39,8 @@ object Data { // addr is the target address case class PcUpdateJALR(addr: Addr) extends ExecutionEvent case class PcUpdateJAL(addr: Addr) extends ExecutionEvent - case class PcUpdateB(addr: Addr) extends ExecutionEvent + case class PcUpdateBranch(addr: Addr) extends ExecutionEvent + case class PcUpdateNoBranch(addr: Addr) extends ExecutionEvent case class PcUpdate(addr: Addr) extends ExecutionEvent case class ExecutionTraceEvent(pc: Addr, event: ExecutionEvent*){ override def toString(): String = s"$pc: " + event.toList.mkString(", ") } diff --git a/src/test/scala/RISCV/Ops.scala b/src/test/scala/RISCV/Ops.scala index 9cd7ded..f129986 100644 --- a/src/test/scala/RISCV/Ops.scala +++ b/src/test/scala/RISCV/Ops.scala @@ -110,25 +110,10 @@ object Ops { case class LUI(rd: Reg, imm: Imm) extends Op with UType case class AUIPC(rd: Reg, imm: Imm) extends Op with UType - - case class Store(rs2: Reg, rs1: Reg, offset: Imm, width: Int) extends Op with SType - object Store { - def sw(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 4) - def sh(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 2) - def sb(rs2: Int, rs1: Int, offset: Int) = Store(Reg(rs2), Reg(rs1), Imm(offset), 1) - } - - case class Load(rd: Reg, rs1: Reg, offset: Imm, width: Int, signed: Boolean) extends Op with IType - object Load { - def lw (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 4, true) - def lh (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 2, true) - def lb (rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 1, true) - def lhu(rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 2, false) - def lbu(rd: Int, rs1: Int, offset: Int) = Load(Reg(rd), Reg(rs1), Imm(offset), 1, false) - } - case class JALR(rd: Reg, rs1: Reg, dst: String) extends Op with IType case class JAL(rd: Reg, dst: String) extends Op with UType + case class SW(rs2: Reg, rs1: Reg, offset: Imm) extends Op with SType + case class LW(rd: Reg, rs1: Reg, offset: Imm) extends Op with IType object LUI { def apply(rd: Int, imm: Int): LUI = LUI(Reg(rd), Imm(imm)) } @@ -136,6 +121,8 @@ object Ops { object JAL{ def apply(rd: Int, dst: String): JAL = JAL(Reg(rd), dst) } object JALR{ def apply(rd: Int, rs1: Int, dst: String): JALR = JALR(Reg(rd), Reg(rs1), dst) } + object SW { def apply(rs2: Int, rs1: Int, offset: Int): SW = SW(Reg(rs2), Reg(rs1), Imm(offset)) } + object LW { def apply(rd: Int, rs1: Int, offset: Int): LW = LW(Reg(rd), Reg(rs1), Imm(offset)) } // This op should not be assembled, but will for the sake of simplicity be rendered as a NOP case object DONE extends Op with IType { val rd = Reg(0); val rs1 = Reg(0) } diff --git a/src/test/scala/RISCV/VM.scala b/src/test/scala/RISCV/VM.scala index 203161f..effaf6d 100644 --- a/src/test/scala/RISCV/VM.scala +++ b/src/test/scala/RISCV/VM.scala @@ -38,21 +38,19 @@ case class VM( } - private def executeBranch(op: Branch) = { getAddr(op.dst).map{ addr => val takeBranch = regs.compare(op.rs1, op.rs2, op.comp.run) if(takeBranch){ val nextVM = copy(pc = addr) - jump(nextVM, PcUpdateB(nextVM.pc)) + jump(nextVM, PcUpdateBranch(nextVM.pc)) } else { - step(this) + step(this, PcUpdateNoBranch(this.pc + Addr(4))) } } } - /** * The weird :_* syntax is simply a way to pass a list to a varArgs function. * diff --git a/src/test/scala/RISCV/printUtils.scala b/src/test/scala/RISCV/printUtils.scala index 58adefa..980e52e 100644 --- a/src/test/scala/RISCV/printUtils.scala +++ b/src/test/scala/RISCV/printUtils.scala @@ -42,7 +42,8 @@ object PrintUtils { // addr is the target address case PcUpdateJALR(addr) => fansi.Color.Green(s"PC updated to ${addr.show} via JALR") case PcUpdateJAL(addr) => fansi.Color.Magenta(s"PC updated to ${addr.show} via JAL") - case PcUpdateB(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show} via Branch") + case PcUpdateBranch(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show} via Branch") + case PcUpdateNoBranch(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show}, skipping a Branch") } } diff --git a/src/test/scala/RISCV/testRunner.scala b/src/test/scala/RISCV/testRunner.scala index 8ef1f1f..d51128f 100644 --- a/src/test/scala/RISCV/testRunner.scala +++ b/src/test/scala/RISCV/testRunner.scala @@ -100,4 +100,94 @@ object TestRunner { successful }.toOption.getOrElse(false) } + + def profileBranching(testOptions: TestOptions): Boolean = { + + val testResults = for { + lines <- fileUtils.readTest(testOptions) + program <- FiveStage.Parser.parseProgram(lines, testOptions) + (binary, (trace, finalVM)) <- program.validate.map(x => (x._1, x._2.run)) + } yield { + + sealed trait BranchEvent + case class Taken(addr: Int) extends BranchEvent + case class NotTaken(addr: Int) extends BranchEvent + + val events: List[BranchEvent] = trace.flatMap(_.event).collect{ + case PcUpdateBranch(x) => Taken(x.value) + case PcUpdateNoBranch(x) => NotTaken(x.value) + } + + + /** + * This is a sample profiler for a rather unrealistic branch predictor which has an unlimited amount + * of slots + */ + def OneBitInfiniteSlots(events: List[BranchEvent]): Int = { + + // Helper inspects the next element of the event list. If the event is a mispredict the prediction table is updated + // to reflect this. + // As long as there are remaining events the helper calls itself recursively on the remainder + def helper(events: List[BranchEvent], predictionTable: Map[Int, Boolean]): Int = { + events match { + + // Scala syntax for matching a list with a head element of some type and a tail + // `case h :: t =>` + // means we want to match a list with at least a head and a tail (tail can be Nil, so we + // essentially want to match a list with at least one element) + // h is the first element of the list, t is the remainder (which can be Nil, aka empty) + + // `case Constructor(arg1, arg2) :: t => ` + // means we want to match a list whose first element is of type Constructor, giving us access to its internal + // values. + + // `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))` + // means we want to match a list whose first element is of type Constructor while satisfying some predicate p, + // called an if guard. + case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) + case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true)) + case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) + case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) + case _ => 0 + } + } + + // Initially every possible branch is set to false since the initial state of the predictor is to assume branch not taken + def initState = events.map{ + case Taken(addr) => (addr, false) + case NotTaken(addr) => (addr, false) + }.toMap + + helper(events, initState) + } + + say(OneBitInfiniteSlots(events)) + + } + true + } + + + def profileCache(testOptions: TestOptions): Boolean = { + + val testResults = for { + lines <- fileUtils.readTest(testOptions) + program <- FiveStage.Parser.parseProgram(lines, testOptions) + (binary, (trace, finalVM)) <- program.validate.map(x => (x._1, x._2.run)) + } yield { + + sealed trait MemoryEvent + case class Write(addr: Int) extends MemoryEvent + case class Read(addr: Int) extends MemoryEvent + + val events: List[MemoryEvent] = trace.flatMap(_.event).collect{ + case MemWrite(x,_) => Write(x.value) + case MemRead(x,_) => Read(x.value) + } + + // Your cache here + + } + true + } } diff --git a/theory2.org b/theory2.org new file mode 100644 index 0000000..07cf86e --- /dev/null +++ b/theory2.org @@ -0,0 +1,101 @@ +* Question 1 - Benchmarking + In order to gauge the performance increase from adding branch predictors it is necessary to do some testing. + Rather than writing a test from scratch it is better to use the tester already in use in the test harness. + When running a program the VM outputs a log of all events, including which branches have been taken and which + haven't, which as it turns out is the only information we actually need to gauge the effectiveness of a branch + predictor! + + For this exercise you will write a program that parses a log of branch events. + + #+BEGIN_SRC scala + sealed trait BranchEvent + case class Taken(addr: Int) extends BranchEvent + case class NotTaken(addr: Int) extends BranchEvent + + + def profile(events: List[BranchEvent]): Int = ??? + #+END_SRC + + To help you get started, I have provided you with much of the necessary code. + In order to get an idea for how you should profile branch misses, consider the following profiler which calculates + misses for a processor with a branch predictor with a 1 bit predictor with infinite memory: + + #+BEGIN_SRC scala + def OneBitInfiniteSlots(events: List[BranchEvent]): Int = { + + // Helper inspects the next element of the event list. If the event is a mispredict the prediction table is updated + // to reflect this. + // As long as there are remaining events the helper calls itself recursively on the remainder + def helper(events: List[BranchEvent], predictionTable: Map[Int, Boolean]): Int = { + events match { + + // Scala syntax for matching a list with a head element of some type and a tail + // `case h :: t =>` + // means we want to match a list with at least a head and a tail (tail can be Nil, so we + // essentially want to match a list with at least one element) + // h is the first element of the list, t is the remainder (which can be Nil, aka empty) + + // `case Constructor(arg1, arg2) :: t => ` + // means we want to match a list whose first element is of type Constructor, giving us access to its internal + // values. + + // `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))` + // means we want to match a list whose first element is of type Constructor while satisfying some predicate p, + // called an if guard. + case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) + case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true)) + case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) + case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) + case _ => 0 + } + } + + // Initially every possible branch is set to false since the initial state of the predictor is to assume branch not taken + def initState = events.map{ + case Taken(addr) => (addr, false) + case NotTaken(addr) => (addr, false) + }.toMap + + helper(events, initState) + } + #+END_SRC + +** Your task + Your job is to implement a test that checks how many misses occur for a 2 bit branch predictor with 4 slots. + For this task it is probably smart to use something else than a ~Map[(Int, Boolean)]~ + + The skeleton code is located in ~testRunner.scala~ and can be run using testOnly FiveStage.ProfileTest. + If you do so now you will see that the unrealistic prediction model yields 1449 misses. + + With a 2 bit 4 slot scheme, how many misses will you incur? + Answer with a number. + +* Question 2 - Cache profiling + Unlike our design which has a very limited memory pool, real designs have access to vast amounts of memory, offset + by a steep cost in access latency. + To amend this a modern processor features several caches where even the smallest fastest cache has more memory than + your entire design. + In order to investigate how caches can alter performance it is therefore necessary to make some rather + unrealistic assumptions to see how different cache schemes impacts performance. + + We will therefore assume the following: + + Reads from main memory takes 5 cycles + + cache has a total storage of 32 words (1024 bits) + + cache reads work as they do now (i.e no additional latency) + + For this exercise you will write a program that parses a log of memory events, similar to previous task + #+BEGIN_SRC scala + sealed trait MemoryEvent + case class Write(addr: Int) extends MemoryEvent + case class Read(addr: Int) extends MemoryEvent + + + def profile(events: List[MemoryEvent]): Int = ??? + #+END_SRC + +** Your task + Your job is to implement a test that checks how many delay cycles will occur for a cache which: + + Follows a 2-way associative scheme + + Block size is 4 words (128 bits) + + Is write-through write no-allocate + + Eviction policy is LRU (least recently used) From 49bfd372d09907a6300226d3d403f6b378c909da Mon Sep 17 00:00:00 2001 From: peteraa Date: Thu, 17 Oct 2019 16:30:59 +0200 Subject: [PATCH 03/22] Add tests --- .../tests/programs/source/convolution.c | 53 + .../resources/tests/theory/branchProfiling.s | 200 ++ src/test/resources/tests/theory/convolution.s | 2510 +++++++++++++++++ src/test/scala/Manifest.scala | 17 + 4 files changed, 2780 insertions(+) create mode 100644 src/test/resources/tests/programs/source/convolution.c create mode 100644 src/test/resources/tests/theory/branchProfiling.s create mode 100644 src/test/resources/tests/theory/convolution.s diff --git a/src/test/resources/tests/programs/source/convolution.c b/src/test/resources/tests/programs/source/convolution.c new file mode 100644 index 0000000..42f9dd5 --- /dev/null +++ b/src/test/resources/tests/programs/source/convolution.c @@ -0,0 +1,53 @@ +// C rmsbolt starter file + +// Local Variables: +// rmsbolt-command: "/opt/riscv/bin/riscv32-unknown-elf-gcc -O0" +// rmsbolt-disassemble: nil +// End: + + +int lookup(int x, int y, int dim){ + int t = 0; + int ii; + for(ii = 0; ii < y; ii++){ + t += dim; + } + return t + x; +} + +void convolutePixel(int x, int y, int* image, int* output, int* kernel){ + int acc = 0; + acc += image[lookup( x - 1 , y - 1 , 32)] << kernel[0]; + acc += image[lookup( x , y - 1 , 32)] << kernel[1]; + acc += image[lookup( x + 1 , y - 1 , 32)] << kernel[2]; + + acc += image[lookup( x - 1 , y , 32)] << kernel[3]; + acc += image[lookup( x , y , 32)] << kernel[4]; + acc += image[lookup( x + 1 , y , 32)] << kernel[5]; + + acc += image[lookup( x - 1 , y + 1 , 32)] << kernel[6]; + acc += image[lookup( x , y + 1 , 32)] << kernel[7]; + acc += image[lookup( x + 1 , y + 1 , 32)] << kernel[8]; + + output[lookup(x, y, 30)] = acc; +} + +int run() { + + int* image = (int*)0; + int* output = (int*)(1024); + int* kernel = (int*)(1924); + + int ii; + int kk; + for(ii = 1; ii < 31; ii++){ + for(kk = 1; kk < 31; kk++){ + convolutePixel(ii, kk, image, output, kernel); + } + } + return 0; +} + +int main(){ + run(); +} diff --git a/src/test/resources/tests/theory/branchProfiling.s b/src/test/resources/tests/theory/branchProfiling.s new file mode 100644 index 0000000..acd6a11 --- /dev/null +++ b/src/test/resources/tests/theory/branchProfiling.s @@ -0,0 +1,200 @@ +main: + addi sp,sp,-16 + sw ra,12(sp) + call run + lw ra,12(sp) + addi sp,sp,16 + jr ra +rem: + bge a0,a1,.L7 + ret +.L7: + addi sp,sp,-16 + sw ra,12(sp) + sub a0,a0,a1 + call rem + lw ra,12(sp) + addi sp,sp,16 + jr ra +f1: + addi sp,sp,-16 + sw ra,12(sp) + sw s0,8(sp) + sw s1,4(sp) + sw s2,0(sp) + li s1,0 + li s2,241 + j .L9 +.L11: + mv a0,s0 +.L9: + addi s0,a0,-1 + blez a0,.L8 + beq s0,s2,.L8 + li a1,10 + mv a0,s0 + call rem + bnez a0,.L11 + add s1,s1,s0 + j .L11 +.L8: + mv a0,s1 + lw ra,12(sp) + lw s0,8(sp) + lw s1,4(sp) + lw s2,0(sp) + addi sp,sp,16 + jr ra +f2: + addi sp,sp,-32 + sw ra,28(sp) + sw s0,24(sp) + sw s1,20(sp) + sw s2,16(sp) + sw s3,12(sp) + sw s4,8(sp) + mv s3,a0 + li s2,0 + li s0,0 + li s4,3 +.L15: + sub a0,s3,s0 + call f1 + mv s1,a0 + add a0,s0,s3 + call f1 + add a0,s1,a0 + add s2,s2,a0 + addi s0,s0,1 + bne s0,s4,.L15 + mv a0,s2 + lw ra,28(sp) + lw s0,24(sp) + lw s1,20(sp) + lw s2,16(sp) + lw s3,12(sp) + lw s4,8(sp) + addi sp,sp,32 + jr ra +f3: + addi sp,sp,-16 + sw ra,12(sp) + sw s0,8(sp) + sw s1,4(sp) + mv s0,a0 + li a1,10 + call rem + beqz a0,.L23 + li a1,20 + mv a0,s0 + call rem + beqz a0,.L24 + mv a0,s0 + call f1 + mv s1,a0 + mv a0,s0 + call f2 + add a0,s1,a0 +.L18: + lw ra,12(sp) + lw s0,8(sp) + lw s1,4(sp) + addi sp,sp,16 + jr ra +.L23: + mv a0,s0 + call f2 + j .L18 +.L24: + mv a0,s0 + call f1 + j .L18 +getCall: + addi sp,sp,-16 + sw ra,12(sp) + beqz a0,.L30 + li a5,1 + beq a0,a5,.L31 + mv a0,a1 + call f3 +.L25: + lw ra,12(sp) + addi sp,sp,16 + jr ra +.L30: + mv a0,a1 + call f1 + j .L25 +.L31: + mv a0,a1 + call f2 + j .L25 +run: + addi sp,sp,-48 + sw ra,44(sp) + sw s0,40(sp) + sw s1,36(sp) + sw s2,32(sp) + sw s3,28(sp) + sw s4,24(sp) + sw s5,20(sp) + sw s6,16(sp) + sw s7,12(sp) + sw s8,8(sp) + li s1,0 + li s0,0 + li s3,0 + li s7,56 + li s6,2 + li s5,3 + li s4,24 +.L35: + sub a5,s7,s1 + lw s8,0(a5) + sgt a5,s0,s6 + xori a5,a5,1 + add s0,s0,a5 + sub a5,s0,s5 + snez a5,a5 + sub a5,zero,a5 + and s0,s0,a5 + lw a1,0(s1) + mv a0,s0 + call getCall + mv s2,a0 + mv a1,s8 + mv a0,s0 + call getCall + sub a0,s2,a0 + add s3,s3,a0 + addi s1,s1,4 + bne s1,s4,.L35 + mv a0,s3 + lw ra,44(sp) + lw s0,40(sp) + lw s1,36(sp) + lw s2,32(sp) + lw s3,28(sp) + lw s4,24(sp) + lw s5,20(sp) + lw s6,16(sp) + lw s7,12(sp) + lw s8,8(sp) + addi sp,sp,48 + jr ra +#memset 0x0, 0x4 +#memset 0x4, 0x7 +#memset 0x8, 0x3 +#memset 0xc, 0x8 +#memset 0x10, 0x4 +#memset 0x14, 0x22 +#memset 0x18, 0x19 +#memset 0x1c, 0x8 +#memset 0x20, 0x11 +#memset 0x24, 0x10 +#memset 0x28, 0x9 +#memset 0x2c, 0x8 +#memset 0x30, 0x7 +#memset 0x34, 0x6 +#memset 0x38, 0x5 +#memset 0x3c, 0x10 diff --git a/src/test/resources/tests/theory/convolution.s b/src/test/resources/tests/theory/convolution.s new file mode 100644 index 0000000..95fb6c1 --- /dev/null +++ b/src/test/resources/tests/theory/convolution.s @@ -0,0 +1,2510 @@ +main: + addi sp,sp,-16 + sw ra,12(sp) + sw s0,8(sp) + addi s0,sp,16 + call run + li a5,0 + mv a0,a5 + lw ra,12(sp) + lw s0,8(sp) + addi sp,sp,16 + jr ra +lookup: + addi sp,sp,-48 + sw s0,44(sp) + addi s0,sp,48 + sw a0,-36(s0) + sw a1,-40(s0) + sw a2,-44(s0) + sw zero,-20(s0) + sw zero,-24(s0) + j .L2 +.L3: + lw a4,-20(s0) + lw a5,-44(s0) + add a5,a4,a5 + sw a5,-20(s0) + lw a5,-24(s0) + addi a5,a5,1 + sw a5,-24(s0) +.L2: + lw a4,-24(s0) + lw a5,-40(s0) + blt a4,a5,.L3 + lw a4,-20(s0) + lw a5,-36(s0) + add a5,a4,a5 + mv a0,a5 + lw s0,44(sp) + addi sp,sp,48 + jr ra +convolutePixel: + addi sp,sp,-64 + sw ra,60(sp) + sw s0,56(sp) + addi s0,sp,64 + sw a0,-36(s0) + sw a1,-40(s0) + sw a2,-44(s0) + sw a3,-48(s0) + sw a4,-52(s0) + sw zero,-20(s0) + lw a5,-36(s0) + addi a4,a5,-1 + lw a5,-40(s0) + addi a5,a5,-1 + li a2,32 + mv a1,a5 + mv a0,a4 + call lookup + mv a5,a0 + slli a5,a5,2 + lw a4,-44(s0) + add a5,a4,a5 + lw a4,0(a5) + lw a5,-52(s0) + lw a5,0(a5) + sll a5,a4,a5 + lw a4,-20(s0) + add a5,a4,a5 + sw a5,-20(s0) + lw a5,-40(s0) + addi a5,a5,-1 + li a2,32 + mv a1,a5 + lw a0,-36(s0) + call lookup + mv a5,a0 + slli a5,a5,2 + lw a4,-44(s0) + add a5,a4,a5 + lw a4,0(a5) + lw a5,-52(s0) + addi a5,a5,4 + lw a5,0(a5) + sll a5,a4,a5 + lw a4,-20(s0) + add a5,a4,a5 + sw a5,-20(s0) + lw a5,-36(s0) + addi a4,a5,1 + lw a5,-40(s0) + addi a5,a5,-1 + li a2,32 + mv a1,a5 + mv a0,a4 + call lookup + mv a5,a0 + slli a5,a5,2 + lw a4,-44(s0) + add a5,a4,a5 + lw a4,0(a5) + lw a5,-52(s0) + addi a5,a5,8 + lw a5,0(a5) + sll a5,a4,a5 + lw a4,-20(s0) + add a5,a4,a5 + sw a5,-20(s0) + lw a5,-36(s0) + addi a5,a5,-1 + li a2,32 + lw a1,-40(s0) + mv a0,a5 + call lookup + mv a5,a0 + slli a5,a5,2 + lw a4,-44(s0) + add a5,a4,a5 + lw a4,0(a5) + lw a5,-52(s0) + addi a5,a5,12 + lw a5,0(a5) + sll a5,a4,a5 + lw a4,-20(s0) + add a5,a4,a5 + sw a5,-20(s0) + li a2,32 + lw a1,-40(s0) + lw a0,-36(s0) + call lookup + mv a5,a0 + slli a5,a5,2 + lw a4,-44(s0) + add a5,a4,a5 + lw a4,0(a5) + lw a5,-52(s0) + addi a5,a5,16 + lw a5,0(a5) + sll a5,a4,a5 + lw a4,-20(s0) + add a5,a4,a5 + sw a5,-20(s0) + lw a5,-36(s0) + addi a5,a5,1 + li a2,32 + lw a1,-40(s0) + mv a0,a5 + call lookup + mv a5,a0 + slli a5,a5,2 + lw a4,-44(s0) + add a5,a4,a5 + lw a4,0(a5) + lw a5,-52(s0) + addi a5,a5,20 + lw a5,0(a5) + sll a5,a4,a5 + lw a4,-20(s0) + add a5,a4,a5 + sw a5,-20(s0) + lw a5,-36(s0) + addi a4,a5,-1 + lw a5,-40(s0) + addi a5,a5,1 + li a2,32 + mv a1,a5 + mv a0,a4 + call lookup + mv a5,a0 + slli a5,a5,2 + lw a4,-44(s0) + add a5,a4,a5 + lw a4,0(a5) + lw a5,-52(s0) + addi a5,a5,24 + lw a5,0(a5) + sll a5,a4,a5 + lw a4,-20(s0) + add a5,a4,a5 + sw a5,-20(s0) + lw a5,-40(s0) + addi a5,a5,1 + li a2,32 + mv a1,a5 + lw a0,-36(s0) + call lookup + mv a5,a0 + slli a5,a5,2 + lw a4,-44(s0) + add a5,a4,a5 + lw a4,0(a5) + lw a5,-52(s0) + addi a5,a5,28 + lw a5,0(a5) + sll a5,a4,a5 + lw a4,-20(s0) + add a5,a4,a5 + sw a5,-20(s0) + lw a5,-36(s0) + addi a4,a5,1 + lw a5,-40(s0) + addi a5,a5,1 + li a2,32 + mv a1,a5 + mv a0,a4 + call lookup + mv a5,a0 + slli a5,a5,2 + lw a4,-44(s0) + add a5,a4,a5 + lw a4,0(a5) + lw a5,-52(s0) + addi a5,a5,32 + lw a5,0(a5) + sll a5,a4,a5 + lw a4,-20(s0) + add a5,a4,a5 + sw a5,-20(s0) + li a2,30 + lw a1,-40(s0) + lw a0,-36(s0) + call lookup + mv a5,a0 + slli a5,a5,2 + lw a4,-48(s0) + add a5,a4,a5 + lw a4,-20(s0) + sw a4,0(a5) + nop + lw ra,60(sp) + lw s0,56(sp) + addi sp,sp,64 + jr ra +run: + addi sp,sp,-48 + sw ra,44(sp) + sw s0,40(sp) + addi s0,sp,48 + sw zero,-28(s0) + li a5,1024 + sw a5,-32(s0) + li a5,1924 + sw a5,-36(s0) + li a5,1 + sw a5,-20(s0) + j .L7 +.L10: + li a5,1 + sw a5,-24(s0) + j .L8 +.L9: + lw a4,-36(s0) + lw a3,-32(s0) + lw a2,-28(s0) + lw a1,-24(s0) + lw a0,-20(s0) + call convolutePixel + lw a5,-24(s0) + addi a5,a5,1 + sw a5,-24(s0) +.L8: + lw a4,-24(s0) + li a5,6 + ble a4,a5,.L9 + lw a5,-20(s0) + addi a5,a5,1 + sw a5,-20(s0) +.L7: + lw a4,-20(s0) + li a5,6 + ble a4,a5,.L10 + li a5,0 + mv a0,a5 + lw ra,44(sp) + lw s0,40(sp) + addi sp,sp,48 + jr ra +#memset 0x0, 0x1 +#memset 0x4, 0x5 +#memset 0x8, 0x9 +#memset 0x12, 0x1 +#memset 0x16, 0x1 +#memset 0x20, 0x2 +#memset 0x24, 0x2 +#memset 0x28, 0x2 +#memset 0x32, 0x3 +#memset 0x36, 0x3 +#memset 0x40, 0x4 +#memset 0x44, 0x4 +#memset 0x48, 0x4 +#memset 0x52, 0x5 +#memset 0x56, 0x5 +#memset 0x60, 0x6 +#memset 0x64, 0x6 +#memset 0x68, 0x6 +#memset 0x72, 0x7 +#memset 0x76, 0x7 +#memset 0x80, 0x8 +#memset 0x84, 0x8 +#memset 0x88, 0x8 +#memset 0x92, 0x9 +#memset 0x96, 0x9 +#memset 0x100, 0x1 +#memset 0x104, 0x1 +#memset 0x108, 0x1 +#memset 0x112, 0x1 +#memset 0x116, 0x1 +#memset 0x120, 0x1 +#memset 0x124, 0x1 +#memset 0x128, 0x1 +#memset 0x132, 0x1 +#memset 0x136, 0x1 +#memset 0x140, 0x1 +#memset 0x144, 0x1 +#memset 0x148, 0x1 +#memset 0x152, 0x1 +#memset 0x156, 0x1 +#memset 0x160, 0x1 +#memset 0x164, 0x1 +#memset 0x168, 0x1 +#memset 0x172, 0x1 +#memset 0x176, 0x1 +#memset 0x180, 0x1 +#memset 0x184, 0x1 +#memset 0x188, 0x1 +#memset 0x192, 0x1 +#memset 0x196, 0x1 +#memset 0x200, 0x2 +#memset 0x204, 0x2 +#memset 0x208, 0x2 +#memset 0x212, 0x2 +#memset 0x216, 0x2 +#memset 0x220, 0x2 +#memset 0x224, 0x2 +#memset 0x228, 0x2 +#memset 0x232, 0x2 +#memset 0x236, 0x2 +#memset 0x240, 0x2 +#memset 0x244, 0x2 +#memset 0x248, 0x2 +#memset 0x252, 0x2 +#memset 0x256, 0x2 +#memset 0x260, 0x2 +#memset 0x264, 0x2 +#memset 0x268, 0x2 +#memset 0x272, 0x2 +#memset 0x276, 0x2 +#memset 0x280, 0x2 +#memset 0x284, 0x2 +#memset 0x288, 0x2 +#memset 0x292, 0x2 +#memset 0x296, 0x2 +#memset 0x300, 0x3 +#memset 0x304, 0x3 +#memset 0x308, 0x3 +#memset 0x312, 0x3 +#memset 0x316, 0x3 +#memset 0x320, 0x3 +#memset 0x324, 0x3 +#memset 0x328, 0x3 +#memset 0x332, 0x3 +#memset 0x336, 0x3 +#memset 0x340, 0x3 +#memset 0x344, 0x3 +#memset 0x348, 0x3 +#memset 0x352, 0x3 +#memset 0x356, 0x3 +#memset 0x360, 0x3 +#memset 0x364, 0x3 +#memset 0x368, 0x3 +#memset 0x372, 0x3 +#memset 0x376, 0x3 +#memset 0x380, 0x3 +#memset 0x384, 0x3 +#memset 0x388, 0x3 +#memset 0x392, 0x3 +#memset 0x396, 0x3 +#memset 0x400, 0x4 +#memset 0x404, 0x4 +#memset 0x408, 0x4 +#memset 0x412, 0x4 +#memset 0x416, 0x4 +#memset 0x420, 0x4 +#memset 0x424, 0x4 +#memset 0x428, 0x4 +#memset 0x432, 0x4 +#memset 0x436, 0x4 +#memset 0x440, 0x4 +#memset 0x444, 0x4 +#memset 0x448, 0x4 +#memset 0x452, 0x4 +#memset 0x456, 0x4 +#memset 0x460, 0x4 +#memset 0x464, 0x4 +#memset 0x468, 0x4 +#memset 0x472, 0x4 +#memset 0x476, 0x4 +#memset 0x480, 0x4 +#memset 0x484, 0x4 +#memset 0x488, 0x4 +#memset 0x492, 0x4 +#memset 0x496, 0x4 +#memset 0x500, 0x5 +#memset 0x504, 0x5 +#memset 0x508, 0x5 +#memset 0x512, 0x5 +#memset 0x516, 0x5 +#memset 0x520, 0x5 +#memset 0x524, 0x5 +#memset 0x528, 0x5 +#memset 0x532, 0x5 +#memset 0x536, 0x5 +#memset 0x540, 0x5 +#memset 0x544, 0x5 +#memset 0x548, 0x5 +#memset 0x552, 0x5 +#memset 0x556, 0x5 +#memset 0x560, 0x5 +#memset 0x564, 0x5 +#memset 0x568, 0x5 +#memset 0x572, 0x5 +#memset 0x576, 0x5 +#memset 0x580, 0x5 +#memset 0x584, 0x5 +#memset 0x588, 0x5 +#memset 0x592, 0x5 +#memset 0x596, 0x5 +#memset 0x600, 0x6 +#memset 0x604, 0x6 +#memset 0x608, 0x6 +#memset 0x612, 0x6 +#memset 0x616, 0x6 +#memset 0x620, 0x6 +#memset 0x624, 0x6 +#memset 0x628, 0x6 +#memset 0x632, 0x6 +#memset 0x636, 0x6 +#memset 0x640, 0x6 +#memset 0x644, 0x6 +#memset 0x648, 0x6 +#memset 0x652, 0x6 +#memset 0x656, 0x6 +#memset 0x660, 0x6 +#memset 0x664, 0x6 +#memset 0x668, 0x6 +#memset 0x672, 0x6 +#memset 0x676, 0x6 +#memset 0x680, 0x6 +#memset 0x684, 0x6 +#memset 0x688, 0x6 +#memset 0x692, 0x6 +#memset 0x696, 0x6 +#memset 0x700, 0x7 +#memset 0x704, 0x7 +#memset 0x708, 0x7 +#memset 0x712, 0x7 +#memset 0x716, 0x7 +#memset 0x720, 0x7 +#memset 0x724, 0x7 +#memset 0x728, 0x7 +#memset 0x732, 0x7 +#memset 0x736, 0x7 +#memset 0x740, 0x7 +#memset 0x744, 0x7 +#memset 0x748, 0x7 +#memset 0x752, 0x7 +#memset 0x756, 0x7 +#memset 0x760, 0x7 +#memset 0x764, 0x7 +#memset 0x768, 0x7 +#memset 0x772, 0x7 +#memset 0x776, 0x7 +#memset 0x780, 0x7 +#memset 0x784, 0x7 +#memset 0x788, 0x7 +#memset 0x792, 0x7 +#memset 0x796, 0x7 +#memset 0x800, 0x8 +#memset 0x804, 0x8 +#memset 0x808, 0x8 +#memset 0x812, 0x8 +#memset 0x816, 0x8 +#memset 0x820, 0x8 +#memset 0x824, 0x8 +#memset 0x828, 0x8 +#memset 0x832, 0x8 +#memset 0x836, 0x8 +#memset 0x840, 0x8 +#memset 0x844, 0x8 +#memset 0x848, 0x8 +#memset 0x852, 0x8 +#memset 0x856, 0x8 +#memset 0x860, 0x8 +#memset 0x864, 0x8 +#memset 0x868, 0x8 +#memset 0x872, 0x8 +#memset 0x876, 0x8 +#memset 0x880, 0x8 +#memset 0x884, 0x8 +#memset 0x888, 0x8 +#memset 0x892, 0x8 +#memset 0x896, 0x8 +#memset 0x900, 0x9 +#memset 0x904, 0x9 +#memset 0x908, 0x9 +#memset 0x912, 0x9 +#memset 0x916, 0x9 +#memset 0x920, 0x9 +#memset 0x924, 0x9 +#memset 0x928, 0x9 +#memset 0x932, 0x9 +#memset 0x936, 0x9 +#memset 0x940, 0x9 +#memset 0x944, 0x9 +#memset 0x948, 0x9 +#memset 0x952, 0x9 +#memset 0x956, 0x9 +#memset 0x960, 0x9 +#memset 0x964, 0x9 +#memset 0x968, 0x9 +#memset 0x972, 0x9 +#memset 0x976, 0x9 +#memset 0x980, 0x9 +#memset 0x984, 0x9 +#memset 0x988, 0x9 +#memset 0x992, 0x9 +#memset 0x996, 0x9 +#memset 0x1000, 0x1 +#memset 0x1004, 0x1 +#memset 0x1008, 0x1 +#memset 0x1012, 0x1 +#memset 0x1016, 0x1 +#memset 0x1020, 0x1 +#memset 0x1024, 0x1 +#memset 0x1028, 0x1 +#memset 0x1032, 0x1 +#memset 0x1036, 0x1 +#memset 0x1040, 0x1 +#memset 0x1044, 0x1 +#memset 0x1048, 0x1 +#memset 0x1052, 0x1 +#memset 0x1056, 0x1 +#memset 0x1060, 0x1 +#memset 0x1064, 0x1 +#memset 0x1068, 0x1 +#memset 0x1072, 0x1 +#memset 0x1076, 0x1 +#memset 0x1080, 0x1 +#memset 0x1084, 0x1 +#memset 0x1088, 0x1 +#memset 0x1092, 0x1 +#memset 0x1096, 0x1 +#memset 0x1100, 0x1 +#memset 0x1104, 0x1 +#memset 0x1108, 0x1 +#memset 0x1112, 0x1 +#memset 0x1116, 0x1 +#memset 0x1120, 0x1 +#memset 0x1124, 0x1 +#memset 0x1128, 0x1 +#memset 0x1132, 0x1 +#memset 0x1136, 0x1 +#memset 0x1140, 0x1 +#memset 0x1144, 0x1 +#memset 0x1148, 0x1 +#memset 0x1152, 0x1 +#memset 0x1156, 0x1 +#memset 0x1160, 0x1 +#memset 0x1164, 0x1 +#memset 0x1168, 0x1 +#memset 0x1172, 0x1 +#memset 0x1176, 0x1 +#memset 0x1180, 0x1 +#memset 0x1184, 0x1 +#memset 0x1188, 0x1 +#memset 0x1192, 0x1 +#memset 0x1196, 0x1 +#memset 0x1200, 0x1 +#memset 0x1204, 0x1 +#memset 0x1208, 0x1 +#memset 0x1212, 0x1 +#memset 0x1216, 0x1 +#memset 0x1220, 0x1 +#memset 0x1224, 0x1 +#memset 0x1228, 0x1 +#memset 0x1232, 0x1 +#memset 0x1236, 0x1 +#memset 0x1240, 0x1 +#memset 0x1244, 0x1 +#memset 0x1248, 0x1 +#memset 0x1252, 0x1 +#memset 0x1256, 0x1 +#memset 0x1260, 0x1 +#memset 0x1264, 0x1 +#memset 0x1268, 0x1 +#memset 0x1272, 0x1 +#memset 0x1276, 0x1 +#memset 0x1280, 0x1 +#memset 0x1284, 0x1 +#memset 0x1288, 0x1 +#memset 0x1292, 0x1 +#memset 0x1296, 0x1 +#memset 0x1300, 0x1 +#memset 0x1304, 0x1 +#memset 0x1308, 0x1 +#memset 0x1312, 0x1 +#memset 0x1316, 0x1 +#memset 0x1320, 0x1 +#memset 0x1324, 0x1 +#memset 0x1328, 0x1 +#memset 0x1332, 0x1 +#memset 0x1336, 0x1 +#memset 0x1340, 0x1 +#memset 0x1344, 0x1 +#memset 0x1348, 0x1 +#memset 0x1352, 0x1 +#memset 0x1356, 0x1 +#memset 0x1360, 0x1 +#memset 0x1364, 0x1 +#memset 0x1368, 0x1 +#memset 0x1372, 0x1 +#memset 0x1376, 0x1 +#memset 0x1380, 0x1 +#memset 0x1384, 0x1 +#memset 0x1388, 0x1 +#memset 0x1392, 0x1 +#memset 0x1396, 0x1 +#memset 0x1400, 0x1 +#memset 0x1404, 0x1 +#memset 0x1408, 0x1 +#memset 0x1412, 0x1 +#memset 0x1416, 0x1 +#memset 0x1420, 0x1 +#memset 0x1424, 0x1 +#memset 0x1428, 0x1 +#memset 0x1432, 0x1 +#memset 0x1436, 0x1 +#memset 0x1440, 0x1 +#memset 0x1444, 0x1 +#memset 0x1448, 0x1 +#memset 0x1452, 0x1 +#memset 0x1456, 0x1 +#memset 0x1460, 0x1 +#memset 0x1464, 0x1 +#memset 0x1468, 0x1 +#memset 0x1472, 0x1 +#memset 0x1476, 0x1 +#memset 0x1480, 0x1 +#memset 0x1484, 0x1 +#memset 0x1488, 0x1 +#memset 0x1492, 0x1 +#memset 0x1496, 0x1 +#memset 0x1500, 0x1 +#memset 0x1504, 0x1 +#memset 0x1508, 0x1 +#memset 0x1512, 0x1 +#memset 0x1516, 0x1 +#memset 0x1520, 0x1 +#memset 0x1524, 0x1 +#memset 0x1528, 0x1 +#memset 0x1532, 0x1 +#memset 0x1536, 0x1 +#memset 0x1540, 0x1 +#memset 0x1544, 0x1 +#memset 0x1548, 0x1 +#memset 0x1552, 0x1 +#memset 0x1556, 0x1 +#memset 0x1560, 0x1 +#memset 0x1564, 0x1 +#memset 0x1568, 0x1 +#memset 0x1572, 0x1 +#memset 0x1576, 0x1 +#memset 0x1580, 0x1 +#memset 0x1584, 0x1 +#memset 0x1588, 0x1 +#memset 0x1592, 0x1 +#memset 0x1596, 0x1 +#memset 0x1600, 0x1 +#memset 0x1604, 0x1 +#memset 0x1608, 0x1 +#memset 0x1612, 0x1 +#memset 0x1616, 0x1 +#memset 0x1620, 0x1 +#memset 0x1624, 0x1 +#memset 0x1628, 0x1 +#memset 0x1632, 0x1 +#memset 0x1636, 0x1 +#memset 0x1640, 0x1 +#memset 0x1644, 0x1 +#memset 0x1648, 0x1 +#memset 0x1652, 0x1 +#memset 0x1656, 0x1 +#memset 0x1660, 0x1 +#memset 0x1664, 0x1 +#memset 0x1668, 0x1 +#memset 0x1672, 0x1 +#memset 0x1676, 0x1 +#memset 0x1680, 0x1 +#memset 0x1684, 0x1 +#memset 0x1688, 0x1 +#memset 0x1692, 0x1 +#memset 0x1696, 0x1 +#memset 0x1700, 0x1 +#memset 0x1704, 0x1 +#memset 0x1708, 0x1 +#memset 0x1712, 0x1 +#memset 0x1716, 0x1 +#memset 0x1720, 0x1 +#memset 0x1724, 0x1 +#memset 0x1728, 0x1 +#memset 0x1732, 0x1 +#memset 0x1736, 0x1 +#memset 0x1740, 0x1 +#memset 0x1744, 0x1 +#memset 0x1748, 0x1 +#memset 0x1752, 0x1 +#memset 0x1756, 0x1 +#memset 0x1760, 0x1 +#memset 0x1764, 0x1 +#memset 0x1768, 0x1 +#memset 0x1772, 0x1 +#memset 0x1776, 0x1 +#memset 0x1780, 0x1 +#memset 0x1784, 0x1 +#memset 0x1788, 0x1 +#memset 0x1792, 0x1 +#memset 0x1796, 0x1 +#memset 0x1800, 0x1 +#memset 0x1804, 0x1 +#memset 0x1808, 0x1 +#memset 0x1812, 0x1 +#memset 0x1816, 0x1 +#memset 0x1820, 0x1 +#memset 0x1824, 0x1 +#memset 0x1828, 0x1 +#memset 0x1832, 0x1 +#memset 0x1836, 0x1 +#memset 0x1840, 0x1 +#memset 0x1844, 0x1 +#memset 0x1848, 0x1 +#memset 0x1852, 0x1 +#memset 0x1856, 0x1 +#memset 0x1860, 0x1 +#memset 0x1864, 0x1 +#memset 0x1868, 0x1 +#memset 0x1872, 0x1 +#memset 0x1876, 0x1 +#memset 0x1880, 0x1 +#memset 0x1884, 0x1 +#memset 0x1888, 0x1 +#memset 0x1892, 0x1 +#memset 0x1896, 0x1 +#memset 0x1900, 0x1 +#memset 0x1904, 0x1 +#memset 0x1908, 0x1 +#memset 0x1912, 0x1 +#memset 0x1916, 0x1 +#memset 0x1920, 0x1 +#memset 0x1924, 0x1 +#memset 0x1928, 0x1 +#memset 0x1932, 0x1 +#memset 0x1936, 0x1 +#memset 0x1940, 0x1 +#memset 0x1944, 0x1 +#memset 0x1948, 0x1 +#memset 0x1952, 0x1 +#memset 0x1956, 0x1 +#memset 0x1960, 0x1 +#memset 0x1964, 0x1 +#memset 0x1968, 0x1 +#memset 0x1972, 0x1 +#memset 0x1976, 0x1 +#memset 0x1980, 0x1 +#memset 0x1984, 0x1 +#memset 0x1988, 0x1 +#memset 0x1992, 0x1 +#memset 0x1996, 0x1 +#memset 0x2000, 0x2 +#memset 0x2004, 0x2 +#memset 0x2008, 0x2 +#memset 0x2012, 0x2 +#memset 0x2016, 0x2 +#memset 0x2020, 0x2 +#memset 0x2024, 0x2 +#memset 0x2028, 0x2 +#memset 0x2032, 0x2 +#memset 0x2036, 0x2 +#memset 0x2040, 0x2 +#memset 0x2044, 0x2 +#memset 0x2048, 0x2 +#memset 0x2052, 0x2 +#memset 0x2056, 0x2 +#memset 0x2060, 0x2 +#memset 0x2064, 0x2 +#memset 0x2068, 0x2 +#memset 0x2072, 0x2 +#memset 0x2076, 0x2 +#memset 0x2080, 0x2 +#memset 0x2084, 0x2 +#memset 0x2088, 0x2 +#memset 0x2092, 0x2 +#memset 0x2096, 0x2 +#memset 0x2100, 0x2 +#memset 0x2104, 0x2 +#memset 0x2108, 0x2 +#memset 0x2112, 0x2 +#memset 0x2116, 0x2 +#memset 0x2120, 0x2 +#memset 0x2124, 0x2 +#memset 0x2128, 0x2 +#memset 0x2132, 0x2 +#memset 0x2136, 0x2 +#memset 0x2140, 0x2 +#memset 0x2144, 0x2 +#memset 0x2148, 0x2 +#memset 0x2152, 0x2 +#memset 0x2156, 0x2 +#memset 0x2160, 0x2 +#memset 0x2164, 0x2 +#memset 0x2168, 0x2 +#memset 0x2172, 0x2 +#memset 0x2176, 0x2 +#memset 0x2180, 0x2 +#memset 0x2184, 0x2 +#memset 0x2188, 0x2 +#memset 0x2192, 0x2 +#memset 0x2196, 0x2 +#memset 0x2200, 0x2 +#memset 0x2204, 0x2 +#memset 0x2208, 0x2 +#memset 0x2212, 0x2 +#memset 0x2216, 0x2 +#memset 0x2220, 0x2 +#memset 0x2224, 0x2 +#memset 0x2228, 0x2 +#memset 0x2232, 0x2 +#memset 0x2236, 0x2 +#memset 0x2240, 0x2 +#memset 0x2244, 0x2 +#memset 0x2248, 0x2 +#memset 0x2252, 0x2 +#memset 0x2256, 0x2 +#memset 0x2260, 0x2 +#memset 0x2264, 0x2 +#memset 0x2268, 0x2 +#memset 0x2272, 0x2 +#memset 0x2276, 0x2 +#memset 0x2280, 0x2 +#memset 0x2284, 0x2 +#memset 0x2288, 0x2 +#memset 0x2292, 0x2 +#memset 0x2296, 0x2 +#memset 0x2300, 0x2 +#memset 0x2304, 0x2 +#memset 0x2308, 0x2 +#memset 0x2312, 0x2 +#memset 0x2316, 0x2 +#memset 0x2320, 0x2 +#memset 0x2324, 0x2 +#memset 0x2328, 0x2 +#memset 0x2332, 0x2 +#memset 0x2336, 0x2 +#memset 0x2340, 0x2 +#memset 0x2344, 0x2 +#memset 0x2348, 0x2 +#memset 0x2352, 0x2 +#memset 0x2356, 0x2 +#memset 0x2360, 0x2 +#memset 0x2364, 0x2 +#memset 0x2368, 0x2 +#memset 0x2372, 0x2 +#memset 0x2376, 0x2 +#memset 0x2380, 0x2 +#memset 0x2384, 0x2 +#memset 0x2388, 0x2 +#memset 0x2392, 0x2 +#memset 0x2396, 0x2 +#memset 0x2400, 0x2 +#memset 0x2404, 0x2 +#memset 0x2408, 0x2 +#memset 0x2412, 0x2 +#memset 0x2416, 0x2 +#memset 0x2420, 0x2 +#memset 0x2424, 0x2 +#memset 0x2428, 0x2 +#memset 0x2432, 0x2 +#memset 0x2436, 0x2 +#memset 0x2440, 0x2 +#memset 0x2444, 0x2 +#memset 0x2448, 0x2 +#memset 0x2452, 0x2 +#memset 0x2456, 0x2 +#memset 0x2460, 0x2 +#memset 0x2464, 0x2 +#memset 0x2468, 0x2 +#memset 0x2472, 0x2 +#memset 0x2476, 0x2 +#memset 0x2480, 0x2 +#memset 0x2484, 0x2 +#memset 0x2488, 0x2 +#memset 0x2492, 0x2 +#memset 0x2496, 0x2 +#memset 0x2500, 0x2 +#memset 0x2504, 0x2 +#memset 0x2508, 0x2 +#memset 0x2512, 0x2 +#memset 0x2516, 0x2 +#memset 0x2520, 0x2 +#memset 0x2524, 0x2 +#memset 0x2528, 0x2 +#memset 0x2532, 0x2 +#memset 0x2536, 0x2 +#memset 0x2540, 0x2 +#memset 0x2544, 0x2 +#memset 0x2548, 0x2 +#memset 0x2552, 0x2 +#memset 0x2556, 0x2 +#memset 0x2560, 0x2 +#memset 0x2564, 0x2 +#memset 0x2568, 0x2 +#memset 0x2572, 0x2 +#memset 0x2576, 0x2 +#memset 0x2580, 0x2 +#memset 0x2584, 0x2 +#memset 0x2588, 0x2 +#memset 0x2592, 0x2 +#memset 0x2596, 0x2 +#memset 0x2600, 0x2 +#memset 0x2604, 0x2 +#memset 0x2608, 0x2 +#memset 0x2612, 0x2 +#memset 0x2616, 0x2 +#memset 0x2620, 0x2 +#memset 0x2624, 0x2 +#memset 0x2628, 0x2 +#memset 0x2632, 0x2 +#memset 0x2636, 0x2 +#memset 0x2640, 0x2 +#memset 0x2644, 0x2 +#memset 0x2648, 0x2 +#memset 0x2652, 0x2 +#memset 0x2656, 0x2 +#memset 0x2660, 0x2 +#memset 0x2664, 0x2 +#memset 0x2668, 0x2 +#memset 0x2672, 0x2 +#memset 0x2676, 0x2 +#memset 0x2680, 0x2 +#memset 0x2684, 0x2 +#memset 0x2688, 0x2 +#memset 0x2692, 0x2 +#memset 0x2696, 0x2 +#memset 0x2700, 0x2 +#memset 0x2704, 0x2 +#memset 0x2708, 0x2 +#memset 0x2712, 0x2 +#memset 0x2716, 0x2 +#memset 0x2720, 0x2 +#memset 0x2724, 0x2 +#memset 0x2728, 0x2 +#memset 0x2732, 0x2 +#memset 0x2736, 0x2 +#memset 0x2740, 0x2 +#memset 0x2744, 0x2 +#memset 0x2748, 0x2 +#memset 0x2752, 0x2 +#memset 0x2756, 0x2 +#memset 0x2760, 0x2 +#memset 0x2764, 0x2 +#memset 0x2768, 0x2 +#memset 0x2772, 0x2 +#memset 0x2776, 0x2 +#memset 0x2780, 0x2 +#memset 0x2784, 0x2 +#memset 0x2788, 0x2 +#memset 0x2792, 0x2 +#memset 0x2796, 0x2 +#memset 0x2800, 0x2 +#memset 0x2804, 0x2 +#memset 0x2808, 0x2 +#memset 0x2812, 0x2 +#memset 0x2816, 0x2 +#memset 0x2820, 0x2 +#memset 0x2824, 0x2 +#memset 0x2828, 0x2 +#memset 0x2832, 0x2 +#memset 0x2836, 0x2 +#memset 0x2840, 0x2 +#memset 0x2844, 0x2 +#memset 0x2848, 0x2 +#memset 0x2852, 0x2 +#memset 0x2856, 0x2 +#memset 0x2860, 0x2 +#memset 0x2864, 0x2 +#memset 0x2868, 0x2 +#memset 0x2872, 0x2 +#memset 0x2876, 0x2 +#memset 0x2880, 0x2 +#memset 0x2884, 0x2 +#memset 0x2888, 0x2 +#memset 0x2892, 0x2 +#memset 0x2896, 0x2 +#memset 0x2900, 0x2 +#memset 0x2904, 0x2 +#memset 0x2908, 0x2 +#memset 0x2912, 0x2 +#memset 0x2916, 0x2 +#memset 0x2920, 0x2 +#memset 0x2924, 0x2 +#memset 0x2928, 0x2 +#memset 0x2932, 0x2 +#memset 0x2936, 0x2 +#memset 0x2940, 0x2 +#memset 0x2944, 0x2 +#memset 0x2948, 0x2 +#memset 0x2952, 0x2 +#memset 0x2956, 0x2 +#memset 0x2960, 0x2 +#memset 0x2964, 0x2 +#memset 0x2968, 0x2 +#memset 0x2972, 0x2 +#memset 0x2976, 0x2 +#memset 0x2980, 0x2 +#memset 0x2984, 0x2 +#memset 0x2988, 0x2 +#memset 0x2992, 0x2 +#memset 0x2996, 0x2 +#memset 0x3000, 0x3 +#memset 0x3004, 0x3 +#memset 0x3008, 0x3 +#memset 0x3012, 0x3 +#memset 0x3016, 0x3 +#memset 0x3020, 0x3 +#memset 0x3024, 0x3 +#memset 0x3028, 0x3 +#memset 0x3032, 0x3 +#memset 0x3036, 0x3 +#memset 0x3040, 0x3 +#memset 0x3044, 0x3 +#memset 0x3048, 0x3 +#memset 0x3052, 0x3 +#memset 0x3056, 0x3 +#memset 0x3060, 0x3 +#memset 0x3064, 0x3 +#memset 0x3068, 0x3 +#memset 0x3072, 0x3 +#memset 0x3076, 0x3 +#memset 0x3080, 0x3 +#memset 0x3084, 0x3 +#memset 0x3088, 0x3 +#memset 0x3092, 0x3 +#memset 0x3096, 0x3 +#memset 0x3100, 0x3 +#memset 0x3104, 0x3 +#memset 0x3108, 0x3 +#memset 0x3112, 0x3 +#memset 0x3116, 0x3 +#memset 0x3120, 0x3 +#memset 0x3124, 0x3 +#memset 0x3128, 0x3 +#memset 0x3132, 0x3 +#memset 0x3136, 0x3 +#memset 0x3140, 0x3 +#memset 0x3144, 0x3 +#memset 0x3148, 0x3 +#memset 0x3152, 0x3 +#memset 0x3156, 0x3 +#memset 0x3160, 0x3 +#memset 0x3164, 0x3 +#memset 0x3168, 0x3 +#memset 0x3172, 0x3 +#memset 0x3176, 0x3 +#memset 0x3180, 0x3 +#memset 0x3184, 0x3 +#memset 0x3188, 0x3 +#memset 0x3192, 0x3 +#memset 0x3196, 0x3 +#memset 0x3200, 0x3 +#memset 0x3204, 0x3 +#memset 0x3208, 0x3 +#memset 0x3212, 0x3 +#memset 0x3216, 0x3 +#memset 0x3220, 0x3 +#memset 0x3224, 0x3 +#memset 0x3228, 0x3 +#memset 0x3232, 0x3 +#memset 0x3236, 0x3 +#memset 0x3240, 0x3 +#memset 0x3244, 0x3 +#memset 0x3248, 0x3 +#memset 0x3252, 0x3 +#memset 0x3256, 0x3 +#memset 0x3260, 0x3 +#memset 0x3264, 0x3 +#memset 0x3268, 0x3 +#memset 0x3272, 0x3 +#memset 0x3276, 0x3 +#memset 0x3280, 0x3 +#memset 0x3284, 0x3 +#memset 0x3288, 0x3 +#memset 0x3292, 0x3 +#memset 0x3296, 0x3 +#memset 0x3300, 0x3 +#memset 0x3304, 0x3 +#memset 0x3308, 0x3 +#memset 0x3312, 0x3 +#memset 0x3316, 0x3 +#memset 0x3320, 0x3 +#memset 0x3324, 0x3 +#memset 0x3328, 0x3 +#memset 0x3332, 0x3 +#memset 0x3336, 0x3 +#memset 0x3340, 0x3 +#memset 0x3344, 0x3 +#memset 0x3348, 0x3 +#memset 0x3352, 0x3 +#memset 0x3356, 0x3 +#memset 0x3360, 0x3 +#memset 0x3364, 0x3 +#memset 0x3368, 0x3 +#memset 0x3372, 0x3 +#memset 0x3376, 0x3 +#memset 0x3380, 0x3 +#memset 0x3384, 0x3 +#memset 0x3388, 0x3 +#memset 0x3392, 0x3 +#memset 0x3396, 0x3 +#memset 0x3400, 0x3 +#memset 0x3404, 0x3 +#memset 0x3408, 0x3 +#memset 0x3412, 0x3 +#memset 0x3416, 0x3 +#memset 0x3420, 0x3 +#memset 0x3424, 0x3 +#memset 0x3428, 0x3 +#memset 0x3432, 0x3 +#memset 0x3436, 0x3 +#memset 0x3440, 0x3 +#memset 0x3444, 0x3 +#memset 0x3448, 0x3 +#memset 0x3452, 0x3 +#memset 0x3456, 0x3 +#memset 0x3460, 0x3 +#memset 0x3464, 0x3 +#memset 0x3468, 0x3 +#memset 0x3472, 0x3 +#memset 0x3476, 0x3 +#memset 0x3480, 0x3 +#memset 0x3484, 0x3 +#memset 0x3488, 0x3 +#memset 0x3492, 0x3 +#memset 0x3496, 0x3 +#memset 0x3500, 0x3 +#memset 0x3504, 0x3 +#memset 0x3508, 0x3 +#memset 0x3512, 0x3 +#memset 0x3516, 0x3 +#memset 0x3520, 0x3 +#memset 0x3524, 0x3 +#memset 0x3528, 0x3 +#memset 0x3532, 0x3 +#memset 0x3536, 0x3 +#memset 0x3540, 0x3 +#memset 0x3544, 0x3 +#memset 0x3548, 0x3 +#memset 0x3552, 0x3 +#memset 0x3556, 0x3 +#memset 0x3560, 0x3 +#memset 0x3564, 0x3 +#memset 0x3568, 0x3 +#memset 0x3572, 0x3 +#memset 0x3576, 0x3 +#memset 0x3580, 0x3 +#memset 0x3584, 0x3 +#memset 0x3588, 0x3 +#memset 0x3592, 0x3 +#memset 0x3596, 0x3 +#memset 0x3600, 0x3 +#memset 0x3604, 0x3 +#memset 0x3608, 0x3 +#memset 0x3612, 0x3 +#memset 0x3616, 0x3 +#memset 0x3620, 0x3 +#memset 0x3624, 0x3 +#memset 0x3628, 0x3 +#memset 0x3632, 0x3 +#memset 0x3636, 0x3 +#memset 0x3640, 0x3 +#memset 0x3644, 0x3 +#memset 0x3648, 0x3 +#memset 0x3652, 0x3 +#memset 0x3656, 0x3 +#memset 0x3660, 0x3 +#memset 0x3664, 0x3 +#memset 0x3668, 0x3 +#memset 0x3672, 0x3 +#memset 0x3676, 0x3 +#memset 0x3680, 0x3 +#memset 0x3684, 0x3 +#memset 0x3688, 0x3 +#memset 0x3692, 0x3 +#memset 0x3696, 0x3 +#memset 0x3700, 0x3 +#memset 0x3704, 0x3 +#memset 0x3708, 0x3 +#memset 0x3712, 0x3 +#memset 0x3716, 0x3 +#memset 0x3720, 0x3 +#memset 0x3724, 0x3 +#memset 0x3728, 0x3 +#memset 0x3732, 0x3 +#memset 0x3736, 0x3 +#memset 0x3740, 0x3 +#memset 0x3744, 0x3 +#memset 0x3748, 0x3 +#memset 0x3752, 0x3 +#memset 0x3756, 0x3 +#memset 0x3760, 0x3 +#memset 0x3764, 0x3 +#memset 0x3768, 0x3 +#memset 0x3772, 0x3 +#memset 0x3776, 0x3 +#memset 0x3780, 0x3 +#memset 0x3784, 0x3 +#memset 0x3788, 0x3 +#memset 0x3792, 0x3 +#memset 0x3796, 0x3 +#memset 0x3800, 0x3 +#memset 0x3804, 0x3 +#memset 0x3808, 0x3 +#memset 0x3812, 0x3 +#memset 0x3816, 0x3 +#memset 0x3820, 0x3 +#memset 0x3824, 0x3 +#memset 0x3828, 0x3 +#memset 0x3832, 0x3 +#memset 0x3836, 0x3 +#memset 0x3840, 0x3 +#memset 0x3844, 0x3 +#memset 0x3848, 0x3 +#memset 0x3852, 0x3 +#memset 0x3856, 0x3 +#memset 0x3860, 0x3 +#memset 0x3864, 0x3 +#memset 0x3868, 0x3 +#memset 0x3872, 0x3 +#memset 0x3876, 0x3 +#memset 0x3880, 0x3 +#memset 0x3884, 0x3 +#memset 0x3888, 0x3 +#memset 0x3892, 0x3 +#memset 0x3896, 0x3 +#memset 0x3900, 0x3 +#memset 0x3904, 0x3 +#memset 0x3908, 0x3 +#memset 0x3912, 0x3 +#memset 0x3916, 0x3 +#memset 0x3920, 0x3 +#memset 0x3924, 0x3 +#memset 0x3928, 0x3 +#memset 0x3932, 0x3 +#memset 0x3936, 0x3 +#memset 0x3940, 0x3 +#memset 0x3944, 0x3 +#memset 0x3948, 0x3 +#memset 0x3952, 0x3 +#memset 0x3956, 0x3 +#memset 0x3960, 0x3 +#memset 0x3964, 0x3 +#memset 0x3968, 0x3 +#memset 0x3972, 0x3 +#memset 0x3976, 0x3 +#memset 0x3980, 0x3 +#memset 0x3984, 0x3 +#memset 0x3988, 0x3 +#memset 0x3992, 0x3 +#memset 0x3996, 0x3 +#memset 0x4000, 0x4 +#memset 0x4004, 0x4 +#memset 0x4008, 0x4 +#memset 0x4012, 0x4 +#memset 0x4016, 0x4 +#memset 0x4020, 0x4 +#memset 0x4024, 0x4 +#memset 0x4028, 0x4 +#memset 0x4032, 0x4 +#memset 0x4036, 0x4 +#memset 0x4040, 0x4 +#memset 0x4044, 0x4 +#memset 0x4048, 0x4 +#memset 0x4052, 0x4 +#memset 0x4056, 0x4 +#memset 0x4060, 0x4 +#memset 0x4064, 0x4 +#memset 0x4068, 0x4 +#memset 0x4072, 0x4 +#memset 0x4076, 0x4 +#memset 0x4080, 0x4 +#memset 0x4084, 0x4 +#memset 0x4088, 0x4 +#memset 0x4092, 0x4 +#memset 0x4096, 0x4 +#memset 0x4100, 0x4 +#memset 0x4104, 0x4 +#memset 0x4108, 0x4 +#memset 0x4112, 0x4 +#memset 0x4116, 0x4 +#memset 0x4120, 0x4 +#memset 0x4124, 0x4 +#memset 0x4128, 0x4 +#memset 0x4132, 0x4 +#memset 0x4136, 0x4 +#memset 0x4140, 0x4 +#memset 0x4144, 0x4 +#memset 0x4148, 0x4 +#memset 0x4152, 0x4 +#memset 0x4156, 0x4 +#memset 0x4160, 0x4 +#memset 0x4164, 0x4 +#memset 0x4168, 0x4 +#memset 0x4172, 0x4 +#memset 0x4176, 0x4 +#memset 0x4180, 0x4 +#memset 0x4184, 0x4 +#memset 0x4188, 0x4 +#memset 0x4192, 0x4 +#memset 0x4196, 0x4 +#memset 0x4200, 0x4 +#memset 0x4204, 0x4 +#memset 0x4208, 0x4 +#memset 0x4212, 0x4 +#memset 0x4216, 0x4 +#memset 0x4220, 0x4 +#memset 0x4224, 0x4 +#memset 0x4228, 0x4 +#memset 0x4232, 0x4 +#memset 0x4236, 0x4 +#memset 0x4240, 0x4 +#memset 0x4244, 0x4 +#memset 0x4248, 0x4 +#memset 0x4252, 0x4 +#memset 0x4256, 0x4 +#memset 0x4260, 0x4 +#memset 0x4264, 0x4 +#memset 0x4268, 0x4 +#memset 0x4272, 0x4 +#memset 0x4276, 0x4 +#memset 0x4280, 0x4 +#memset 0x4284, 0x4 +#memset 0x4288, 0x4 +#memset 0x4292, 0x4 +#memset 0x4296, 0x4 +#memset 0x4300, 0x4 +#memset 0x4304, 0x4 +#memset 0x4308, 0x4 +#memset 0x4312, 0x4 +#memset 0x4316, 0x4 +#memset 0x4320, 0x4 +#memset 0x4324, 0x4 +#memset 0x4328, 0x4 +#memset 0x4332, 0x4 +#memset 0x4336, 0x4 +#memset 0x4340, 0x4 +#memset 0x4344, 0x4 +#memset 0x4348, 0x4 +#memset 0x4352, 0x4 +#memset 0x4356, 0x4 +#memset 0x4360, 0x4 +#memset 0x4364, 0x4 +#memset 0x4368, 0x4 +#memset 0x4372, 0x4 +#memset 0x4376, 0x4 +#memset 0x4380, 0x4 +#memset 0x4384, 0x4 +#memset 0x4388, 0x4 +#memset 0x4392, 0x4 +#memset 0x4396, 0x4 +#memset 0x4400, 0x4 +#memset 0x4404, 0x4 +#memset 0x4408, 0x4 +#memset 0x4412, 0x4 +#memset 0x4416, 0x4 +#memset 0x4420, 0x4 +#memset 0x4424, 0x4 +#memset 0x4428, 0x4 +#memset 0x4432, 0x4 +#memset 0x4436, 0x4 +#memset 0x4440, 0x4 +#memset 0x4444, 0x4 +#memset 0x4448, 0x4 +#memset 0x4452, 0x4 +#memset 0x4456, 0x4 +#memset 0x4460, 0x4 +#memset 0x4464, 0x4 +#memset 0x4468, 0x4 +#memset 0x4472, 0x4 +#memset 0x4476, 0x4 +#memset 0x4480, 0x4 +#memset 0x4484, 0x4 +#memset 0x4488, 0x4 +#memset 0x4492, 0x4 +#memset 0x4496, 0x4 +#memset 0x4500, 0x4 +#memset 0x4504, 0x4 +#memset 0x4508, 0x4 +#memset 0x4512, 0x4 +#memset 0x4516, 0x4 +#memset 0x4520, 0x4 +#memset 0x4524, 0x4 +#memset 0x4528, 0x4 +#memset 0x4532, 0x4 +#memset 0x4536, 0x4 +#memset 0x4540, 0x4 +#memset 0x4544, 0x4 +#memset 0x4548, 0x4 +#memset 0x4552, 0x4 +#memset 0x4556, 0x4 +#memset 0x4560, 0x4 +#memset 0x4564, 0x4 +#memset 0x4568, 0x4 +#memset 0x4572, 0x4 +#memset 0x4576, 0x4 +#memset 0x4580, 0x4 +#memset 0x4584, 0x4 +#memset 0x4588, 0x4 +#memset 0x4592, 0x4 +#memset 0x4596, 0x4 +#memset 0x4600, 0x4 +#memset 0x4604, 0x4 +#memset 0x4608, 0x4 +#memset 0x4612, 0x4 +#memset 0x4616, 0x4 +#memset 0x4620, 0x4 +#memset 0x4624, 0x4 +#memset 0x4628, 0x4 +#memset 0x4632, 0x4 +#memset 0x4636, 0x4 +#memset 0x4640, 0x4 +#memset 0x4644, 0x4 +#memset 0x4648, 0x4 +#memset 0x4652, 0x4 +#memset 0x4656, 0x4 +#memset 0x4660, 0x4 +#memset 0x4664, 0x4 +#memset 0x4668, 0x4 +#memset 0x4672, 0x4 +#memset 0x4676, 0x4 +#memset 0x4680, 0x4 +#memset 0x4684, 0x4 +#memset 0x4688, 0x4 +#memset 0x4692, 0x4 +#memset 0x4696, 0x4 +#memset 0x4700, 0x4 +#memset 0x4704, 0x4 +#memset 0x4708, 0x4 +#memset 0x4712, 0x4 +#memset 0x4716, 0x4 +#memset 0x4720, 0x4 +#memset 0x4724, 0x4 +#memset 0x4728, 0x4 +#memset 0x4732, 0x4 +#memset 0x4736, 0x4 +#memset 0x4740, 0x4 +#memset 0x4744, 0x4 +#memset 0x4748, 0x4 +#memset 0x4752, 0x4 +#memset 0x4756, 0x4 +#memset 0x4760, 0x4 +#memset 0x4764, 0x4 +#memset 0x4768, 0x4 +#memset 0x4772, 0x4 +#memset 0x4776, 0x4 +#memset 0x4780, 0x4 +#memset 0x4784, 0x4 +#memset 0x4788, 0x4 +#memset 0x4792, 0x4 +#memset 0x4796, 0x4 +#memset 0x4800, 0x4 +#memset 0x4804, 0x4 +#memset 0x4808, 0x4 +#memset 0x4812, 0x4 +#memset 0x4816, 0x4 +#memset 0x4820, 0x4 +#memset 0x4824, 0x4 +#memset 0x4828, 0x4 +#memset 0x4832, 0x4 +#memset 0x4836, 0x4 +#memset 0x4840, 0x4 +#memset 0x4844, 0x4 +#memset 0x4848, 0x4 +#memset 0x4852, 0x4 +#memset 0x4856, 0x4 +#memset 0x4860, 0x4 +#memset 0x4864, 0x4 +#memset 0x4868, 0x4 +#memset 0x4872, 0x4 +#memset 0x4876, 0x4 +#memset 0x4880, 0x4 +#memset 0x4884, 0x4 +#memset 0x4888, 0x4 +#memset 0x4892, 0x4 +#memset 0x4896, 0x4 +#memset 0x4900, 0x4 +#memset 0x4904, 0x4 +#memset 0x4908, 0x4 +#memset 0x4912, 0x4 +#memset 0x4916, 0x4 +#memset 0x4920, 0x4 +#memset 0x4924, 0x4 +#memset 0x4928, 0x4 +#memset 0x4932, 0x4 +#memset 0x4936, 0x4 +#memset 0x4940, 0x4 +#memset 0x4944, 0x4 +#memset 0x4948, 0x4 +#memset 0x4952, 0x4 +#memset 0x4956, 0x4 +#memset 0x4960, 0x4 +#memset 0x4964, 0x4 +#memset 0x4968, 0x4 +#memset 0x4972, 0x4 +#memset 0x4976, 0x4 +#memset 0x4980, 0x4 +#memset 0x4984, 0x4 +#memset 0x4988, 0x4 +#memset 0x4992, 0x4 +#memset 0x4996, 0x4 +#memset 0x5000, 0x5 +#memset 0x5004, 0x5 +#memset 0x5008, 0x5 +#memset 0x5012, 0x5 +#memset 0x5016, 0x5 +#memset 0x5020, 0x5 +#memset 0x5024, 0x5 +#memset 0x5028, 0x5 +#memset 0x5032, 0x5 +#memset 0x5036, 0x5 +#memset 0x5040, 0x5 +#memset 0x5044, 0x5 +#memset 0x5048, 0x5 +#memset 0x5052, 0x5 +#memset 0x5056, 0x5 +#memset 0x5060, 0x5 +#memset 0x5064, 0x5 +#memset 0x5068, 0x5 +#memset 0x5072, 0x5 +#memset 0x5076, 0x5 +#memset 0x5080, 0x5 +#memset 0x5084, 0x5 +#memset 0x5088, 0x5 +#memset 0x5092, 0x5 +#memset 0x5096, 0x5 +#memset 0x5100, 0x5 +#memset 0x5104, 0x5 +#memset 0x5108, 0x5 +#memset 0x5112, 0x5 +#memset 0x5116, 0x5 +#memset 0x5120, 0x5 +#memset 0x5124, 0x5 +#memset 0x5128, 0x5 +#memset 0x5132, 0x5 +#memset 0x5136, 0x5 +#memset 0x5140, 0x5 +#memset 0x5144, 0x5 +#memset 0x5148, 0x5 +#memset 0x5152, 0x5 +#memset 0x5156, 0x5 +#memset 0x5160, 0x5 +#memset 0x5164, 0x5 +#memset 0x5168, 0x5 +#memset 0x5172, 0x5 +#memset 0x5176, 0x5 +#memset 0x5180, 0x5 +#memset 0x5184, 0x5 +#memset 0x5188, 0x5 +#memset 0x5192, 0x5 +#memset 0x5196, 0x5 +#memset 0x5200, 0x5 +#memset 0x5204, 0x5 +#memset 0x5208, 0x5 +#memset 0x5212, 0x5 +#memset 0x5216, 0x5 +#memset 0x5220, 0x5 +#memset 0x5224, 0x5 +#memset 0x5228, 0x5 +#memset 0x5232, 0x5 +#memset 0x5236, 0x5 +#memset 0x5240, 0x5 +#memset 0x5244, 0x5 +#memset 0x5248, 0x5 +#memset 0x5252, 0x5 +#memset 0x5256, 0x5 +#memset 0x5260, 0x5 +#memset 0x5264, 0x5 +#memset 0x5268, 0x5 +#memset 0x5272, 0x5 +#memset 0x5276, 0x5 +#memset 0x5280, 0x5 +#memset 0x5284, 0x5 +#memset 0x5288, 0x5 +#memset 0x5292, 0x5 +#memset 0x5296, 0x5 +#memset 0x5300, 0x5 +#memset 0x5304, 0x5 +#memset 0x5308, 0x5 +#memset 0x5312, 0x5 +#memset 0x5316, 0x5 +#memset 0x5320, 0x5 +#memset 0x5324, 0x5 +#memset 0x5328, 0x5 +#memset 0x5332, 0x5 +#memset 0x5336, 0x5 +#memset 0x5340, 0x5 +#memset 0x5344, 0x5 +#memset 0x5348, 0x5 +#memset 0x5352, 0x5 +#memset 0x5356, 0x5 +#memset 0x5360, 0x5 +#memset 0x5364, 0x5 +#memset 0x5368, 0x5 +#memset 0x5372, 0x5 +#memset 0x5376, 0x5 +#memset 0x5380, 0x5 +#memset 0x5384, 0x5 +#memset 0x5388, 0x5 +#memset 0x5392, 0x5 +#memset 0x5396, 0x5 +#memset 0x5400, 0x5 +#memset 0x5404, 0x5 +#memset 0x5408, 0x5 +#memset 0x5412, 0x5 +#memset 0x5416, 0x5 +#memset 0x5420, 0x5 +#memset 0x5424, 0x5 +#memset 0x5428, 0x5 +#memset 0x5432, 0x5 +#memset 0x5436, 0x5 +#memset 0x5440, 0x5 +#memset 0x5444, 0x5 +#memset 0x5448, 0x5 +#memset 0x5452, 0x5 +#memset 0x5456, 0x5 +#memset 0x5460, 0x5 +#memset 0x5464, 0x5 +#memset 0x5468, 0x5 +#memset 0x5472, 0x5 +#memset 0x5476, 0x5 +#memset 0x5480, 0x5 +#memset 0x5484, 0x5 +#memset 0x5488, 0x5 +#memset 0x5492, 0x5 +#memset 0x5496, 0x5 +#memset 0x5500, 0x5 +#memset 0x5504, 0x5 +#memset 0x5508, 0x5 +#memset 0x5512, 0x5 +#memset 0x5516, 0x5 +#memset 0x5520, 0x5 +#memset 0x5524, 0x5 +#memset 0x5528, 0x5 +#memset 0x5532, 0x5 +#memset 0x5536, 0x5 +#memset 0x5540, 0x5 +#memset 0x5544, 0x5 +#memset 0x5548, 0x5 +#memset 0x5552, 0x5 +#memset 0x5556, 0x5 +#memset 0x5560, 0x5 +#memset 0x5564, 0x5 +#memset 0x5568, 0x5 +#memset 0x5572, 0x5 +#memset 0x5576, 0x5 +#memset 0x5580, 0x5 +#memset 0x5584, 0x5 +#memset 0x5588, 0x5 +#memset 0x5592, 0x5 +#memset 0x5596, 0x5 +#memset 0x5600, 0x5 +#memset 0x5604, 0x5 +#memset 0x5608, 0x5 +#memset 0x5612, 0x5 +#memset 0x5616, 0x5 +#memset 0x5620, 0x5 +#memset 0x5624, 0x5 +#memset 0x5628, 0x5 +#memset 0x5632, 0x5 +#memset 0x5636, 0x5 +#memset 0x5640, 0x5 +#memset 0x5644, 0x5 +#memset 0x5648, 0x5 +#memset 0x5652, 0x5 +#memset 0x5656, 0x5 +#memset 0x5660, 0x5 +#memset 0x5664, 0x5 +#memset 0x5668, 0x5 +#memset 0x5672, 0x5 +#memset 0x5676, 0x5 +#memset 0x5680, 0x5 +#memset 0x5684, 0x5 +#memset 0x5688, 0x5 +#memset 0x5692, 0x5 +#memset 0x5696, 0x5 +#memset 0x5700, 0x5 +#memset 0x5704, 0x5 +#memset 0x5708, 0x5 +#memset 0x5712, 0x5 +#memset 0x5716, 0x5 +#memset 0x5720, 0x5 +#memset 0x5724, 0x5 +#memset 0x5728, 0x5 +#memset 0x5732, 0x5 +#memset 0x5736, 0x5 +#memset 0x5740, 0x5 +#memset 0x5744, 0x5 +#memset 0x5748, 0x5 +#memset 0x5752, 0x5 +#memset 0x5756, 0x5 +#memset 0x5760, 0x5 +#memset 0x5764, 0x5 +#memset 0x5768, 0x5 +#memset 0x5772, 0x5 +#memset 0x5776, 0x5 +#memset 0x5780, 0x5 +#memset 0x5784, 0x5 +#memset 0x5788, 0x5 +#memset 0x5792, 0x5 +#memset 0x5796, 0x5 +#memset 0x5800, 0x5 +#memset 0x5804, 0x5 +#memset 0x5808, 0x5 +#memset 0x5812, 0x5 +#memset 0x5816, 0x5 +#memset 0x5820, 0x5 +#memset 0x5824, 0x5 +#memset 0x5828, 0x5 +#memset 0x5832, 0x5 +#memset 0x5836, 0x5 +#memset 0x5840, 0x5 +#memset 0x5844, 0x5 +#memset 0x5848, 0x5 +#memset 0x5852, 0x5 +#memset 0x5856, 0x5 +#memset 0x5860, 0x5 +#memset 0x5864, 0x5 +#memset 0x5868, 0x5 +#memset 0x5872, 0x5 +#memset 0x5876, 0x5 +#memset 0x5880, 0x5 +#memset 0x5884, 0x5 +#memset 0x5888, 0x5 +#memset 0x5892, 0x5 +#memset 0x5896, 0x5 +#memset 0x5900, 0x5 +#memset 0x5904, 0x5 +#memset 0x5908, 0x5 +#memset 0x5912, 0x5 +#memset 0x5916, 0x5 +#memset 0x5920, 0x5 +#memset 0x5924, 0x5 +#memset 0x5928, 0x5 +#memset 0x5932, 0x5 +#memset 0x5936, 0x5 +#memset 0x5940, 0x5 +#memset 0x5944, 0x5 +#memset 0x5948, 0x5 +#memset 0x5952, 0x5 +#memset 0x5956, 0x5 +#memset 0x5960, 0x5 +#memset 0x5964, 0x5 +#memset 0x5968, 0x5 +#memset 0x5972, 0x5 +#memset 0x5976, 0x5 +#memset 0x5980, 0x5 +#memset 0x5984, 0x5 +#memset 0x5988, 0x5 +#memset 0x5992, 0x5 +#memset 0x5996, 0x5 +#memset 0x6000, 0x6 +#memset 0x6004, 0x6 +#memset 0x6008, 0x6 +#memset 0x6012, 0x6 +#memset 0x6016, 0x6 +#memset 0x6020, 0x6 +#memset 0x6024, 0x6 +#memset 0x6028, 0x6 +#memset 0x6032, 0x6 +#memset 0x6036, 0x6 +#memset 0x6040, 0x6 +#memset 0x6044, 0x6 +#memset 0x6048, 0x6 +#memset 0x6052, 0x6 +#memset 0x6056, 0x6 +#memset 0x6060, 0x6 +#memset 0x6064, 0x6 +#memset 0x6068, 0x6 +#memset 0x6072, 0x6 +#memset 0x6076, 0x6 +#memset 0x6080, 0x6 +#memset 0x6084, 0x6 +#memset 0x6088, 0x6 +#memset 0x6092, 0x6 +#memset 0x6096, 0x6 +#memset 0x6100, 0x6 +#memset 0x6104, 0x6 +#memset 0x6108, 0x6 +#memset 0x6112, 0x6 +#memset 0x6116, 0x6 +#memset 0x6120, 0x6 +#memset 0x6124, 0x6 +#memset 0x6128, 0x6 +#memset 0x6132, 0x6 +#memset 0x6136, 0x6 +#memset 0x6140, 0x6 +#memset 0x6144, 0x6 +#memset 0x6148, 0x6 +#memset 0x6152, 0x6 +#memset 0x6156, 0x6 +#memset 0x6160, 0x6 +#memset 0x6164, 0x6 +#memset 0x6168, 0x6 +#memset 0x6172, 0x6 +#memset 0x6176, 0x6 +#memset 0x6180, 0x6 +#memset 0x6184, 0x6 +#memset 0x6188, 0x6 +#memset 0x6192, 0x6 +#memset 0x6196, 0x6 +#memset 0x6200, 0x6 +#memset 0x6204, 0x6 +#memset 0x6208, 0x6 +#memset 0x6212, 0x6 +#memset 0x6216, 0x6 +#memset 0x6220, 0x6 +#memset 0x6224, 0x6 +#memset 0x6228, 0x6 +#memset 0x6232, 0x6 +#memset 0x6236, 0x6 +#memset 0x6240, 0x6 +#memset 0x6244, 0x6 +#memset 0x6248, 0x6 +#memset 0x6252, 0x6 +#memset 0x6256, 0x6 +#memset 0x6260, 0x6 +#memset 0x6264, 0x6 +#memset 0x6268, 0x6 +#memset 0x6272, 0x6 +#memset 0x6276, 0x6 +#memset 0x6280, 0x6 +#memset 0x6284, 0x6 +#memset 0x6288, 0x6 +#memset 0x6292, 0x6 +#memset 0x6296, 0x6 +#memset 0x6300, 0x6 +#memset 0x6304, 0x6 +#memset 0x6308, 0x6 +#memset 0x6312, 0x6 +#memset 0x6316, 0x6 +#memset 0x6320, 0x6 +#memset 0x6324, 0x6 +#memset 0x6328, 0x6 +#memset 0x6332, 0x6 +#memset 0x6336, 0x6 +#memset 0x6340, 0x6 +#memset 0x6344, 0x6 +#memset 0x6348, 0x6 +#memset 0x6352, 0x6 +#memset 0x6356, 0x6 +#memset 0x6360, 0x6 +#memset 0x6364, 0x6 +#memset 0x6368, 0x6 +#memset 0x6372, 0x6 +#memset 0x6376, 0x6 +#memset 0x6380, 0x6 +#memset 0x6384, 0x6 +#memset 0x6388, 0x6 +#memset 0x6392, 0x6 +#memset 0x6396, 0x6 +#memset 0x6400, 0x6 +#memset 0x6404, 0x6 +#memset 0x6408, 0x6 +#memset 0x6412, 0x6 +#memset 0x6416, 0x6 +#memset 0x6420, 0x6 +#memset 0x6424, 0x6 +#memset 0x6428, 0x6 +#memset 0x6432, 0x6 +#memset 0x6436, 0x6 +#memset 0x6440, 0x6 +#memset 0x6444, 0x6 +#memset 0x6448, 0x6 +#memset 0x6452, 0x6 +#memset 0x6456, 0x6 +#memset 0x6460, 0x6 +#memset 0x6464, 0x6 +#memset 0x6468, 0x6 +#memset 0x6472, 0x6 +#memset 0x6476, 0x6 +#memset 0x6480, 0x6 +#memset 0x6484, 0x6 +#memset 0x6488, 0x6 +#memset 0x6492, 0x6 +#memset 0x6496, 0x6 +#memset 0x6500, 0x6 +#memset 0x6504, 0x6 +#memset 0x6508, 0x6 +#memset 0x6512, 0x6 +#memset 0x6516, 0x6 +#memset 0x6520, 0x6 +#memset 0x6524, 0x6 +#memset 0x6528, 0x6 +#memset 0x6532, 0x6 +#memset 0x6536, 0x6 +#memset 0x6540, 0x6 +#memset 0x6544, 0x6 +#memset 0x6548, 0x6 +#memset 0x6552, 0x6 +#memset 0x6556, 0x6 +#memset 0x6560, 0x6 +#memset 0x6564, 0x6 +#memset 0x6568, 0x6 +#memset 0x6572, 0x6 +#memset 0x6576, 0x6 +#memset 0x6580, 0x6 +#memset 0x6584, 0x6 +#memset 0x6588, 0x6 +#memset 0x6592, 0x6 +#memset 0x6596, 0x6 +#memset 0x6600, 0x6 +#memset 0x6604, 0x6 +#memset 0x6608, 0x6 +#memset 0x6612, 0x6 +#memset 0x6616, 0x6 +#memset 0x6620, 0x6 +#memset 0x6624, 0x6 +#memset 0x6628, 0x6 +#memset 0x6632, 0x6 +#memset 0x6636, 0x6 +#memset 0x6640, 0x6 +#memset 0x6644, 0x6 +#memset 0x6648, 0x6 +#memset 0x6652, 0x6 +#memset 0x6656, 0x6 +#memset 0x6660, 0x6 +#memset 0x6664, 0x6 +#memset 0x6668, 0x6 +#memset 0x6672, 0x6 +#memset 0x6676, 0x6 +#memset 0x6680, 0x6 +#memset 0x6684, 0x6 +#memset 0x6688, 0x6 +#memset 0x6692, 0x6 +#memset 0x6696, 0x6 +#memset 0x6700, 0x6 +#memset 0x6704, 0x6 +#memset 0x6708, 0x6 +#memset 0x6712, 0x6 +#memset 0x6716, 0x6 +#memset 0x6720, 0x6 +#memset 0x6724, 0x6 +#memset 0x6728, 0x6 +#memset 0x6732, 0x6 +#memset 0x6736, 0x6 +#memset 0x6740, 0x6 +#memset 0x6744, 0x6 +#memset 0x6748, 0x6 +#memset 0x6752, 0x6 +#memset 0x6756, 0x6 +#memset 0x6760, 0x6 +#memset 0x6764, 0x6 +#memset 0x6768, 0x6 +#memset 0x6772, 0x6 +#memset 0x6776, 0x6 +#memset 0x6780, 0x6 +#memset 0x6784, 0x6 +#memset 0x6788, 0x6 +#memset 0x6792, 0x6 +#memset 0x6796, 0x6 +#memset 0x6800, 0x6 +#memset 0x6804, 0x6 +#memset 0x6808, 0x6 +#memset 0x6812, 0x6 +#memset 0x6816, 0x6 +#memset 0x6820, 0x6 +#memset 0x6824, 0x6 +#memset 0x6828, 0x6 +#memset 0x6832, 0x6 +#memset 0x6836, 0x6 +#memset 0x6840, 0x6 +#memset 0x6844, 0x6 +#memset 0x6848, 0x6 +#memset 0x6852, 0x6 +#memset 0x6856, 0x6 +#memset 0x6860, 0x6 +#memset 0x6864, 0x6 +#memset 0x6868, 0x6 +#memset 0x6872, 0x6 +#memset 0x6876, 0x6 +#memset 0x6880, 0x6 +#memset 0x6884, 0x6 +#memset 0x6888, 0x6 +#memset 0x6892, 0x6 +#memset 0x6896, 0x6 +#memset 0x6900, 0x6 +#memset 0x6904, 0x6 +#memset 0x6908, 0x6 +#memset 0x6912, 0x6 +#memset 0x6916, 0x6 +#memset 0x6920, 0x6 +#memset 0x6924, 0x6 +#memset 0x6928, 0x6 +#memset 0x6932, 0x6 +#memset 0x6936, 0x6 +#memset 0x6940, 0x6 +#memset 0x6944, 0x6 +#memset 0x6948, 0x6 +#memset 0x6952, 0x6 +#memset 0x6956, 0x6 +#memset 0x6960, 0x6 +#memset 0x6964, 0x6 +#memset 0x6968, 0x6 +#memset 0x6972, 0x6 +#memset 0x6976, 0x6 +#memset 0x6980, 0x6 +#memset 0x6984, 0x6 +#memset 0x6988, 0x6 +#memset 0x6992, 0x6 +#memset 0x6996, 0x6 +#memset 0x7000, 0x7 +#memset 0x7004, 0x7 +#memset 0x7008, 0x7 +#memset 0x7012, 0x7 +#memset 0x7016, 0x7 +#memset 0x7020, 0x7 +#memset 0x7024, 0x7 +#memset 0x7028, 0x7 +#memset 0x7032, 0x7 +#memset 0x7036, 0x7 +#memset 0x7040, 0x7 +#memset 0x7044, 0x7 +#memset 0x7048, 0x7 +#memset 0x7052, 0x7 +#memset 0x7056, 0x7 +#memset 0x7060, 0x7 +#memset 0x7064, 0x7 +#memset 0x7068, 0x7 +#memset 0x7072, 0x7 +#memset 0x7076, 0x7 +#memset 0x7080, 0x7 +#memset 0x7084, 0x7 +#memset 0x7088, 0x7 +#memset 0x7092, 0x7 +#memset 0x7096, 0x7 +#memset 0x7100, 0x7 +#memset 0x7104, 0x7 +#memset 0x7108, 0x7 +#memset 0x7112, 0x7 +#memset 0x7116, 0x7 +#memset 0x7120, 0x7 +#memset 0x7124, 0x7 +#memset 0x7128, 0x7 +#memset 0x7132, 0x7 +#memset 0x7136, 0x7 +#memset 0x7140, 0x7 +#memset 0x7144, 0x7 +#memset 0x7148, 0x7 +#memset 0x7152, 0x7 +#memset 0x7156, 0x7 +#memset 0x7160, 0x7 +#memset 0x7164, 0x7 +#memset 0x7168, 0x7 +#memset 0x7172, 0x7 +#memset 0x7176, 0x7 +#memset 0x7180, 0x7 +#memset 0x7184, 0x7 +#memset 0x7188, 0x7 +#memset 0x7192, 0x7 +#memset 0x7196, 0x7 +#memset 0x7200, 0x7 +#memset 0x7204, 0x7 +#memset 0x7208, 0x7 +#memset 0x7212, 0x7 +#memset 0x7216, 0x7 +#memset 0x7220, 0x7 +#memset 0x7224, 0x7 +#memset 0x7228, 0x7 +#memset 0x7232, 0x7 +#memset 0x7236, 0x7 +#memset 0x7240, 0x7 +#memset 0x7244, 0x7 +#memset 0x7248, 0x7 +#memset 0x7252, 0x7 +#memset 0x7256, 0x7 +#memset 0x7260, 0x7 +#memset 0x7264, 0x7 +#memset 0x7268, 0x7 +#memset 0x7272, 0x7 +#memset 0x7276, 0x7 +#memset 0x7280, 0x7 +#memset 0x7284, 0x7 +#memset 0x7288, 0x7 +#memset 0x7292, 0x7 +#memset 0x7296, 0x7 +#memset 0x7300, 0x7 +#memset 0x7304, 0x7 +#memset 0x7308, 0x7 +#memset 0x7312, 0x7 +#memset 0x7316, 0x7 +#memset 0x7320, 0x7 +#memset 0x7324, 0x7 +#memset 0x7328, 0x7 +#memset 0x7332, 0x7 +#memset 0x7336, 0x7 +#memset 0x7340, 0x7 +#memset 0x7344, 0x7 +#memset 0x7348, 0x7 +#memset 0x7352, 0x7 +#memset 0x7356, 0x7 +#memset 0x7360, 0x7 +#memset 0x7364, 0x7 +#memset 0x7368, 0x7 +#memset 0x7372, 0x7 +#memset 0x7376, 0x7 +#memset 0x7380, 0x7 +#memset 0x7384, 0x7 +#memset 0x7388, 0x7 +#memset 0x7392, 0x7 +#memset 0x7396, 0x7 +#memset 0x7400, 0x7 +#memset 0x7404, 0x7 +#memset 0x7408, 0x7 +#memset 0x7412, 0x7 +#memset 0x7416, 0x7 +#memset 0x7420, 0x7 +#memset 0x7424, 0x7 +#memset 0x7428, 0x7 +#memset 0x7432, 0x7 +#memset 0x7436, 0x7 +#memset 0x7440, 0x7 +#memset 0x7444, 0x7 +#memset 0x7448, 0x7 +#memset 0x7452, 0x7 +#memset 0x7456, 0x7 +#memset 0x7460, 0x7 +#memset 0x7464, 0x7 +#memset 0x7468, 0x7 +#memset 0x7472, 0x7 +#memset 0x7476, 0x7 +#memset 0x7480, 0x7 +#memset 0x7484, 0x7 +#memset 0x7488, 0x7 +#memset 0x7492, 0x7 +#memset 0x7496, 0x7 +#memset 0x7500, 0x7 +#memset 0x7504, 0x7 +#memset 0x7508, 0x7 +#memset 0x7512, 0x7 +#memset 0x7516, 0x7 +#memset 0x7520, 0x7 +#memset 0x7524, 0x7 +#memset 0x7528, 0x7 +#memset 0x7532, 0x7 +#memset 0x7536, 0x7 +#memset 0x7540, 0x7 +#memset 0x7544, 0x7 +#memset 0x7548, 0x7 +#memset 0x7552, 0x7 +#memset 0x7556, 0x7 +#memset 0x7560, 0x7 +#memset 0x7564, 0x7 +#memset 0x7568, 0x7 +#memset 0x7572, 0x7 +#memset 0x7576, 0x7 +#memset 0x7580, 0x7 +#memset 0x7584, 0x7 +#memset 0x7588, 0x7 +#memset 0x7592, 0x7 +#memset 0x7596, 0x7 +#memset 0x7600, 0x7 +#memset 0x7604, 0x7 +#memset 0x7608, 0x7 +#memset 0x7612, 0x7 +#memset 0x7616, 0x7 +#memset 0x7620, 0x7 +#memset 0x7624, 0x7 +#memset 0x7628, 0x7 +#memset 0x7632, 0x7 +#memset 0x7636, 0x7 +#memset 0x7640, 0x7 +#memset 0x7644, 0x7 +#memset 0x7648, 0x7 +#memset 0x7652, 0x7 +#memset 0x7656, 0x7 +#memset 0x7660, 0x7 +#memset 0x7664, 0x7 +#memset 0x7668, 0x7 +#memset 0x7672, 0x7 +#memset 0x7676, 0x7 +#memset 0x7680, 0x7 +#memset 0x7684, 0x7 +#memset 0x7688, 0x7 +#memset 0x7692, 0x7 +#memset 0x7696, 0x7 +#memset 0x7700, 0x7 +#memset 0x7704, 0x7 +#memset 0x7708, 0x7 +#memset 0x7712, 0x7 +#memset 0x7716, 0x7 +#memset 0x7720, 0x7 +#memset 0x7724, 0x7 +#memset 0x7728, 0x7 +#memset 0x7732, 0x7 +#memset 0x7736, 0x7 +#memset 0x7740, 0x7 +#memset 0x7744, 0x7 +#memset 0x7748, 0x7 +#memset 0x7752, 0x7 +#memset 0x7756, 0x7 +#memset 0x7760, 0x7 +#memset 0x7764, 0x7 +#memset 0x7768, 0x7 +#memset 0x7772, 0x7 +#memset 0x7776, 0x7 +#memset 0x7780, 0x7 +#memset 0x7784, 0x7 +#memset 0x7788, 0x7 +#memset 0x7792, 0x7 +#memset 0x7796, 0x7 +#memset 0x7800, 0x7 +#memset 0x7804, 0x7 +#memset 0x7808, 0x7 +#memset 0x7812, 0x7 +#memset 0x7816, 0x7 +#memset 0x7820, 0x7 +#memset 0x7824, 0x7 +#memset 0x7828, 0x7 +#memset 0x7832, 0x7 +#memset 0x7836, 0x7 +#memset 0x7840, 0x7 +#memset 0x7844, 0x7 +#memset 0x7848, 0x7 +#memset 0x7852, 0x7 +#memset 0x7856, 0x7 +#memset 0x7860, 0x7 +#memset 0x7864, 0x7 +#memset 0x7868, 0x7 +#memset 0x7872, 0x7 +#memset 0x7876, 0x7 +#memset 0x7880, 0x7 +#memset 0x7884, 0x7 +#memset 0x7888, 0x7 +#memset 0x7892, 0x7 +#memset 0x7896, 0x7 +#memset 0x7900, 0x7 +#memset 0x7904, 0x7 +#memset 0x7908, 0x7 +#memset 0x7912, 0x7 +#memset 0x7916, 0x7 +#memset 0x7920, 0x7 +#memset 0x7924, 0x7 +#memset 0x7928, 0x7 +#memset 0x7932, 0x7 +#memset 0x7936, 0x7 +#memset 0x7940, 0x7 +#memset 0x7944, 0x7 +#memset 0x7948, 0x7 +#memset 0x7952, 0x7 +#memset 0x7956, 0x7 +#memset 0x7960, 0x7 +#memset 0x7964, 0x7 +#memset 0x7968, 0x7 +#memset 0x7972, 0x7 +#memset 0x7976, 0x7 +#memset 0x7980, 0x7 +#memset 0x7984, 0x7 +#memset 0x7988, 0x7 +#memset 0x7992, 0x7 +#memset 0x7996, 0x7 +#memset 0x8000, 0x8 +#memset 0x8004, 0x8 +#memset 0x8008, 0x8 +#memset 0x8012, 0x8 +#memset 0x8016, 0x8 +#memset 0x8020, 0x8 +#memset 0x8024, 0x8 +#memset 0x8028, 0x8 +#memset 0x8032, 0x8 +#memset 0x8036, 0x8 +#memset 0x8040, 0x8 +#memset 0x8044, 0x8 +#memset 0x8048, 0x8 +#memset 0x8052, 0x8 +#memset 0x8056, 0x8 +#memset 0x8060, 0x8 +#memset 0x8064, 0x8 +#memset 0x8068, 0x8 +#memset 0x8072, 0x8 +#memset 0x8076, 0x8 +#memset 0x8080, 0x8 +#memset 0x8084, 0x8 +#memset 0x8088, 0x8 +#memset 0x8092, 0x8 +#memset 0x8096, 0x8 +#memset 0x8100, 0x8 +#memset 0x8104, 0x8 +#memset 0x8108, 0x8 +#memset 0x8112, 0x8 +#memset 0x8116, 0x8 +#memset 0x8120, 0x8 +#memset 0x8124, 0x8 +#memset 0x8128, 0x8 +#memset 0x8132, 0x8 +#memset 0x8136, 0x8 +#memset 0x8140, 0x8 +#memset 0x8144, 0x8 +#memset 0x8148, 0x8 +#memset 0x8152, 0x8 +#memset 0x8156, 0x8 +#memset 0x8160, 0x8 +#memset 0x8164, 0x8 +#memset 0x8168, 0x8 +#memset 0x8172, 0x8 +#memset 0x8176, 0x8 +#memset 0x8180, 0x8 +#memset 0x8184, 0x8 +#memset 0x8188, 0x8 +#memset 0x8192, 0x8 +#memset 0x8196, 0x8 +#memset 0x8200, 0x8 +#memset 0x8204, 0x8 +#memset 0x8208, 0x8 +#memset 0x8212, 0x8 +#memset 0x8216, 0x8 +#memset 0x8220, 0x8 +#memset 0x8224, 0x8 +#memset 0x8228, 0x8 +#memset 0x8232, 0x8 +#memset 0x8236, 0x8 +#memset 0x8240, 0x8 +#memset 0x8244, 0x8 +#memset 0x8248, 0x8 +#memset 0x8252, 0x8 +#memset 0x8256, 0x8 +#memset 0x8260, 0x8 +#memset 0x8264, 0x8 +#memset 0x8268, 0x8 +#memset 0x8272, 0x8 +#memset 0x8276, 0x8 +#memset 0x8280, 0x8 +#memset 0x8284, 0x8 +#memset 0x8288, 0x8 +#memset 0x8292, 0x8 +#memset 0x8296, 0x8 +#memset 0x8300, 0x8 +#memset 0x8304, 0x8 +#memset 0x8308, 0x8 +#memset 0x8312, 0x8 +#memset 0x8316, 0x8 +#memset 0x8320, 0x8 +#memset 0x8324, 0x8 +#memset 0x8328, 0x8 +#memset 0x8332, 0x8 +#memset 0x8336, 0x8 +#memset 0x8340, 0x8 +#memset 0x8344, 0x8 +#memset 0x8348, 0x8 +#memset 0x8352, 0x8 +#memset 0x8356, 0x8 +#memset 0x8360, 0x8 +#memset 0x8364, 0x8 +#memset 0x8368, 0x8 +#memset 0x8372, 0x8 +#memset 0x8376, 0x8 +#memset 0x8380, 0x8 +#memset 0x8384, 0x8 +#memset 0x8388, 0x8 +#memset 0x8392, 0x8 +#memset 0x8396, 0x8 +#memset 0x8400, 0x8 +#memset 0x8404, 0x8 +#memset 0x8408, 0x8 +#memset 0x8412, 0x8 +#memset 0x8416, 0x8 +#memset 0x8420, 0x8 +#memset 0x8424, 0x8 +#memset 0x8428, 0x8 +#memset 0x8432, 0x8 +#memset 0x8436, 0x8 +#memset 0x8440, 0x8 +#memset 0x8444, 0x8 +#memset 0x8448, 0x8 +#memset 0x8452, 0x8 +#memset 0x8456, 0x8 +#memset 0x8460, 0x8 +#memset 0x8464, 0x8 +#memset 0x8468, 0x8 +#memset 0x8472, 0x8 +#memset 0x8476, 0x8 +#memset 0x8480, 0x8 +#memset 0x8484, 0x8 +#memset 0x8488, 0x8 +#memset 0x8492, 0x8 +#memset 0x8496, 0x8 +#memset 0x8500, 0x8 +#memset 0x8504, 0x8 +#memset 0x8508, 0x8 +#memset 0x8512, 0x8 +#memset 0x8516, 0x8 +#memset 0x8520, 0x8 +#memset 0x8524, 0x8 +#memset 0x8528, 0x8 +#memset 0x8532, 0x8 +#memset 0x8536, 0x8 +#memset 0x8540, 0x8 +#memset 0x8544, 0x8 +#memset 0x8548, 0x8 +#memset 0x8552, 0x8 +#memset 0x8556, 0x8 +#memset 0x8560, 0x8 +#memset 0x8564, 0x8 +#memset 0x8568, 0x8 +#memset 0x8572, 0x8 +#memset 0x8576, 0x8 +#memset 0x8580, 0x8 +#memset 0x8584, 0x8 +#memset 0x8588, 0x8 +#memset 0x8592, 0x8 +#memset 0x8596, 0x8 +#memset 0x8600, 0x8 +#memset 0x8604, 0x8 +#memset 0x8608, 0x8 +#memset 0x8612, 0x8 +#memset 0x8616, 0x8 +#memset 0x8620, 0x8 +#memset 0x8624, 0x8 +#memset 0x8628, 0x8 +#memset 0x8632, 0x8 +#memset 0x8636, 0x8 +#memset 0x8640, 0x8 +#memset 0x8644, 0x8 +#memset 0x8648, 0x8 +#memset 0x8652, 0x8 +#memset 0x8656, 0x8 +#memset 0x8660, 0x8 +#memset 0x8664, 0x8 +#memset 0x8668, 0x8 +#memset 0x8672, 0x8 +#memset 0x8676, 0x8 +#memset 0x8680, 0x8 +#memset 0x8684, 0x8 +#memset 0x8688, 0x8 +#memset 0x8692, 0x8 +#memset 0x8696, 0x8 +#memset 0x8700, 0x8 +#memset 0x8704, 0x8 +#memset 0x8708, 0x8 +#memset 0x8712, 0x8 +#memset 0x8716, 0x8 +#memset 0x8720, 0x8 +#memset 0x8724, 0x8 +#memset 0x8728, 0x8 +#memset 0x8732, 0x8 +#memset 0x8736, 0x8 +#memset 0x8740, 0x8 +#memset 0x8744, 0x8 +#memset 0x8748, 0x8 +#memset 0x8752, 0x8 +#memset 0x8756, 0x8 +#memset 0x8760, 0x8 +#memset 0x8764, 0x8 +#memset 0x8768, 0x8 +#memset 0x8772, 0x8 +#memset 0x8776, 0x8 +#memset 0x8780, 0x8 +#memset 0x8784, 0x8 +#memset 0x8788, 0x8 +#memset 0x8792, 0x8 +#memset 0x8796, 0x8 +#memset 0x8800, 0x8 +#memset 0x8804, 0x8 +#memset 0x8808, 0x8 +#memset 0x8812, 0x8 +#memset 0x8816, 0x8 +#memset 0x8820, 0x8 +#memset 0x8824, 0x8 +#memset 0x8828, 0x8 +#memset 0x8832, 0x8 +#memset 0x8836, 0x8 +#memset 0x8840, 0x8 +#memset 0x8844, 0x8 +#memset 0x8848, 0x8 +#memset 0x8852, 0x8 +#memset 0x8856, 0x8 +#memset 0x8860, 0x8 +#memset 0x8864, 0x8 +#memset 0x8868, 0x8 +#memset 0x8872, 0x8 +#memset 0x8876, 0x8 +#memset 0x8880, 0x8 +#memset 0x8884, 0x8 +#memset 0x8888, 0x8 +#memset 0x8892, 0x8 +#memset 0x8896, 0x8 +#memset 0x8900, 0x8 +#memset 0x8904, 0x8 +#memset 0x8908, 0x8 +#memset 0x8912, 0x8 +#memset 0x8916, 0x8 +#memset 0x8920, 0x8 +#memset 0x8924, 0x8 diff --git a/src/test/scala/Manifest.scala b/src/test/scala/Manifest.scala index 4ca5d56..4d08552 100644 --- a/src/test/scala/Manifest.scala +++ b/src/test/scala/Manifest.scala @@ -48,6 +48,23 @@ object Manifest { } + +class ProfileBranching extends FlatSpec with Matchers { + it should "profile some branches" in { + TestRunner.profileBranching( + Manifest.singleTestOptions.copy(testName = "branchProfiling.s") + ) should be(true) + } +} + +class ProfileCache extends FlatSpec with Matchers { + it should "profile a cache" in { + TestRunner.profileCache( + Manifest.singleTestOptions.copy(testName = "convolution.s") + ) should be(true) + } +} + class SingleTest extends FlatSpec with Matchers { it should "just werk" in { TestRunner.run(Manifest.singleTestOptions) should be(true) From 4e21e33d68fefa471dcc7a2cf3d605dd68e93a76 Mon Sep 17 00:00:00 2001 From: peteraa Date: Thu, 17 Oct 2019 16:47:21 +0200 Subject: [PATCH 04/22] Some fixes --- src/test/scala/Manifest.scala | 3 ++- src/test/scala/RISCV/DataTypes.scala | 3 +-- src/test/scala/RISCV/Parser.scala | 6 ++---- src/test/scala/RISCV/testRunner.scala | 18 ++++++++++-------- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/test/scala/Manifest.scala b/src/test/scala/Manifest.scala index 4d08552..f5352fa 100644 --- a/src/test/scala/Manifest.scala +++ b/src/test/scala/Manifest.scala @@ -43,7 +43,8 @@ object Manifest { printMergedTrace = false, nopPadded = nopPadded, breakPoints = Nil, // not implemented - testName = name) + testName = name, + maxSteps = 15000) } diff --git a/src/test/scala/RISCV/DataTypes.scala b/src/test/scala/RISCV/DataTypes.scala index 4cd7c61..5627015 100644 --- a/src/test/scala/RISCV/DataTypes.scala +++ b/src/test/scala/RISCV/DataTypes.scala @@ -236,7 +236,6 @@ object Data { ops : List[SourceInfo[Op]], settings : List[TestSetting], labelMap : Map[Label, Addr], - maxSteps : Int = 5000 ){ def imem: Map[Addr, Op] = @@ -272,7 +271,7 @@ object Data { /** * Returns the binary code and the execution trace or an error for convenient error checking. */ - def validate: Either[String, (Map[Addr, Int], ExecutionTrace[VM])] = machineCode.flatMap{ binary => + def validate(maxSteps: Int): Either[String, (Map[Addr, Int], ExecutionTrace[VM])] = machineCode.flatMap{ binary => val uk = "UNKNOWN" val (finish, trace) = VM.run(maxSteps, vm) finish match { diff --git a/src/test/scala/RISCV/Parser.scala b/src/test/scala/RISCV/Parser.scala index 37773d9..d0cb41c 100644 --- a/src/test/scala/RISCV/Parser.scala +++ b/src/test/scala/RISCV/Parser.scala @@ -66,6 +66,7 @@ object Parser { stringWs("sra") ~> arith.mapN{Arith.sra}, stringWs("slt") ~> arith.mapN{Arith.slt}, + stringWs("sgt") ~> arith.mapN{ case(x,y,z) => Arith.slt(x,z,y)}, stringWs("sltu") ~> arith.mapN{Arith.sltu}, // pseudos @@ -99,10 +100,7 @@ object Parser { stringWs("seqz") ~> (reg <~ sep, reg, ok(1)).mapN{ArithImm.sltu}, stringWs("li") ~> (reg ~ sep ~ (hex | int)).collect{ - case((a, b), c) if (c.nBitsS <= 12) => { - say(s"for c: $c, nBitsS was ${c.nBitsS}") - ArithImm.add(a, 0, c) - } + case((a, b), c) if (c.nBitsS <= 12) => { ArithImm.add(a, 0, c) } }, diff --git a/src/test/scala/RISCV/testRunner.scala b/src/test/scala/RISCV/testRunner.scala index d51128f..aa53791 100644 --- a/src/test/scala/RISCV/testRunner.scala +++ b/src/test/scala/RISCV/testRunner.scala @@ -25,7 +25,8 @@ case class TestOptions( printMergedTrace : Boolean, nopPadded : Boolean, breakPoints : List[Int], // Not implemented - testName : String + testName : String, + maxSteps : Int ) case class TestResult( @@ -44,12 +45,12 @@ object TestRunner { val testResults = for { lines <- fileUtils.readTest(testOptions) program <- FiveStage.Parser.parseProgram(lines, testOptions) - (binary, (trace, finalVM)) <- program.validate.map(x => (x._1, x._2.run)) + (binary, (trace, finalVM)) <- program.validate(testOptions.maxSteps).map(x => (x._1, x._2.run)) (termitationCause, chiselTrace) <- ChiselTestRunner( - binary.toList.sortBy(_._1.value).map(_._2), - program.settings, - finalVM.pc, - 15000) + binary.toList.sortBy(_._1.value).map(_._2), + program.settings, + finalVM.pc, + testOptions.maxSteps) } yield { val traces = mergeTraces(trace, chiselTrace).map(x => printMergedTraces((x), program)) @@ -106,7 +107,7 @@ object TestRunner { val testResults = for { lines <- fileUtils.readTest(testOptions) program <- FiveStage.Parser.parseProgram(lines, testOptions) - (binary, (trace, finalVM)) <- program.validate.map(x => (x._1, x._2.run)) + (binary, (trace, finalVM)) <- program.validate(testOptions.maxSteps).map(x => (x._1, x._2.run)) } yield { sealed trait BranchEvent @@ -164,6 +165,7 @@ object TestRunner { say(OneBitInfiniteSlots(events)) } + true } @@ -173,7 +175,7 @@ object TestRunner { val testResults = for { lines <- fileUtils.readTest(testOptions) program <- FiveStage.Parser.parseProgram(lines, testOptions) - (binary, (trace, finalVM)) <- program.validate.map(x => (x._1, x._2.run)) + (binary, (trace, finalVM)) <- program.validate(testOptions.maxSteps).map(x => (x._1, x._2.run)) } yield { sealed trait MemoryEvent From 90d3bd946f32cd8e05d8b99f0013c67f41ad0018 Mon Sep 17 00:00:00 2001 From: peteraa Date: Thu, 17 Oct 2019 16:59:16 +0200 Subject: [PATCH 05/22] Add more theory --- theory2.org | 79 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 23 deletions(-) diff --git a/theory2.org b/theory2.org index 07cf86e..bfee973 100644 --- a/theory2.org +++ b/theory2.org @@ -1,4 +1,36 @@ -* Question 1 - Benchmarking +* Question 1 - Hazards + For the following program describe each hazard with type (data or control), line number and a + small (max one sentence) description + +** program 1 + #+begin_src asm + addi t0, zero, 10 + addi t1, zero, 20 + sub t1, t1, t0 + beq t1, zero, .L2 + jr ra + #+end_src + +** program 2 + #+begin_src asm + addi t0, zero, 10 + lw t0, 10(t0) + beq t0, zero, .L3 + jr ra + #+end_src + +** program 3 + #+begin_src asm + lw t0, 0(t0) + lw t1, 4(t0) + sw t0, 8(t1) + lw t1, 12(t0) + beq t0, t1, .L3 + jr ra + #+end_src + +* Question 2 - ??? +* Question 3 - Benchmarking In order to gauge the performance increase from adding branch predictors it is necessary to do some testing. Rather than writing a test from scratch it is better to use the tester already in use in the test harness. When running a program the VM outputs a log of all events, including which branches have been taken and which @@ -11,7 +43,7 @@ sealed trait BranchEvent case class Taken(addr: Int) extends BranchEvent case class NotTaken(addr: Int) extends BranchEvent - + def profile(events: List[BranchEvent]): Int = ??? #+END_SRC @@ -22,73 +54,74 @@ #+BEGIN_SRC scala def OneBitInfiniteSlots(events: List[BranchEvent]): Int = { - + // Helper inspects the next element of the event list. If the event is a mispredict the prediction table is updated // to reflect this. // As long as there are remaining events the helper calls itself recursively on the remainder def helper(events: List[BranchEvent], predictionTable: Map[Int, Boolean]): Int = { events match { - // Scala syntax for matching a list with a head element of some type and a tail - // `case h :: t =>` + // Scala syntax for matching a list with a head element of some type and a tail + // `case h :: t =>` // means we want to match a list with at least a head and a tail (tail can be Nil, so we // essentially want to match a list with at least one element) // h is the first element of the list, t is the remainder (which can be Nil, aka empty) - // `case Constructor(arg1, arg2) :: t => ` + // `case Constructor(arg1, arg2) :: t => ` // means we want to match a list whose first element is of type Constructor, giving us access to its internal // values. - // `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))` + // `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))` // means we want to match a list whose first element is of type Constructor while satisfying some predicate p, // called an if guard. - case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) - case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true)) - case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) - case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) - case _ => 0 + case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) + case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true)) + case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) + case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) + case _ => 0 } } - + // Initially every possible branch is set to false since the initial state of the predictor is to assume branch not taken def initState = events.map{ case Taken(addr) => (addr, false) case NotTaken(addr) => (addr, false) }.toMap - + helper(events, initState) } #+END_SRC - + +** TODO Branch predictor is underspecified, needs to be cleaned up ** Your task Your job is to implement a test that checks how many misses occur for a 2 bit branch predictor with 4 slots. For this task it is probably smart to use something else than a ~Map[(Int, Boolean)]~ - + The skeleton code is located in ~testRunner.scala~ and can be run using testOnly FiveStage.ProfileTest. If you do so now you will see that the unrealistic prediction model yields 1449 misses. - With a 2 bit 4 slot scheme, how many misses will you incur? + With a 2 bit 4 slot scheme, how many misses will you incur? Answer with a number. - -* Question 2 - Cache profiling + +* Question 4 - Cache profiling Unlike our design which has a very limited memory pool, real designs have access to vast amounts of memory, offset by a steep cost in access latency. To amend this a modern processor features several caches where even the smallest fastest cache has more memory than your entire design. In order to investigate how caches can alter performance it is therefore necessary to make some rather unrealistic assumptions to see how different cache schemes impacts performance. - - We will therefore assume the following: + + We will therefore assume the following: + Reads from main memory takes 5 cycles + cache has a total storage of 32 words (1024 bits) + cache reads work as they do now (i.e no additional latency) - + For this exercise you will write a program that parses a log of memory events, similar to previous task #+BEGIN_SRC scala sealed trait MemoryEvent case class Write(addr: Int) extends MemoryEvent case class Read(addr: Int) extends MemoryEvent - + def profile(events: List[MemoryEvent]): Int = ??? #+END_SRC From 9da79e38518a07cb590214a2773768c1aa30a83d Mon Sep 17 00:00:00 2001 From: peteraa Date: Thu, 17 Oct 2019 17:39:34 +0200 Subject: [PATCH 06/22] More theory --- theory2.org | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/theory2.org b/theory2.org index bfee973..84def08 100644 --- a/theory2.org +++ b/theory2.org @@ -6,11 +6,13 @@ #+begin_src asm addi t0, zero, 10 addi t1, zero, 20 + L2: sub t1, t1, t0 beq t1, zero, .L2 jr ra #+end_src + ** program 2 #+begin_src asm addi t0, zero, 10 @@ -19,6 +21,7 @@ jr ra #+end_src + ** program 3 #+begin_src asm lw t0, 0(t0) @@ -29,7 +32,62 @@ jr ra #+end_src -* Question 2 - ??? + +* Question 2 - Handling hazards + For this question, keep in mind that the forwarder does not care if the values it forwards are being used or not! + Even for a JAL instructions which has neither an rs1 or rs2 field, the forwarder must still forward its values. + +** Data hazards 1 + At some cycle the following instructions can be found in a 5 stage design: + + EX: || MEM: || WB: + ---------------------||-------------------------||-------------------------- + rs1: 4 || rs1: 4 || rs1: 1 + rs2: 5 || rs2: 6 || rs2: 2 + rd: 6 || rd: 4 || rd: 5 + memToReg = false || memToReg = false || memToReg = false + regWrite = true || regWrite = false || regWrite = true + memWrite = false || memWrite = false || memWrite = false + branch = false || branch = true || branch = false + jump = false || jump = false || jump = false + + For the operation currently in EX, from where (ID, MEM or WB) should the forwarder get data from for rs1 and rs2? + +** Data hazards 2 + + At some cycle the following instructions can be found in a 5 stage design: + + EX: || MEM: || WB: + ---------------------||-------------------------||-------------------------- + rs1: 1 || rs1: 4 || rs1: 1 + rs2: 5 || rs2: 6 || rs2: 0 + rd: 0 || rd: 1 || rd: 0 + memToReg = false || memToReg = false || memToReg = false + regWrite = true || regWrite = true || regWrite = true + memWrite = false || memWrite = false || memWrite = false + branch = false || branch = true || branch = false + jump = true || jump = true || jump = false + + For the operation currently in EX, from where (ID, MEM or WB) should the forwarder get data from for rs1 and rs2? + +** Data hazards 3 + + At some cycle the following instructions can be found in a 5 stage design: + + EX: || MEM: || WB: + ---------------------||-------------------------||-------------------------- + rs1: 2 || rs1: 4 || rs1: 3 + rs2: 5 || rs2: 6 || rs2: 4 + rd: 1 || rd: 1 || rd: 5 + memToReg = false || memToReg = true || memToReg = false + regWrite = false || regWrite = true || regWrite = true + memWrite = true || memWrite = false || memWrite = false + branch = false || branch = false || branch = false + jump = false || jump = false || jump = false + + Should the forwarding unit issue a load hazard signal? + (Hint: what are the semantics of the instruction currently in EX stage?) + * Question 3 - Benchmarking In order to gauge the performance increase from adding branch predictors it is necessary to do some testing. Rather than writing a test from scratch it is better to use the tester already in use in the test harness. From 61ce60e4593895eead562975e42a32bc403ef74a Mon Sep 17 00:00:00 2001 From: peteraa Date: Thu, 17 Oct 2019 17:39:59 +0200 Subject: [PATCH 07/22] git expert strikes again --- theory2.org | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/theory2.org b/theory2.org index 84def08..872fc96 100644 --- a/theory2.org +++ b/theory2.org @@ -42,14 +42,14 @@ EX: || MEM: || WB: ---------------------||-------------------------||-------------------------- - rs1: 4 || rs1: 4 || rs1: 1 - rs2: 5 || rs2: 6 || rs2: 2 - rd: 6 || rd: 4 || rd: 5 - memToReg = false || memToReg = false || memToReg = false - regWrite = true || regWrite = false || regWrite = true - memWrite = false || memWrite = false || memWrite = false - branch = false || branch = true || branch = false - jump = false || jump = false || jump = false + rs1: 4 || rs1: 4 || rs1: 1 + rs2: 5 || rs2: 6 || rs2: 2 + rd: 6 || rd: 4 || rd: 5 + memToReg = false || memToReg = false || memToReg = false + regWrite = true || regWrite = false || regWrite = true + memWrite = false || memWrite = false || memWrite = false + branch = false || branch = true || branch = false + jump = false || jump = false || jump = false For the operation currently in EX, from where (ID, MEM or WB) should the forwarder get data from for rs1 and rs2? @@ -59,12 +59,12 @@ EX: || MEM: || WB: ---------------------||-------------------------||-------------------------- - rs1: 1 || rs1: 4 || rs1: 1 - rs2: 5 || rs2: 6 || rs2: 0 - rd: 0 || rd: 1 || rd: 0 + rs1: 1 || rs1: 4 || rs1: 1 + rs2: 5 || rs2: 6 || rs2: 0 + rd: 0 || rd: 1 || rd: 0 memToReg = false || memToReg = false || memToReg = false regWrite = true || regWrite = true || regWrite = true - memWrite = false || memWrite = false || memWrite = false + memWrite = false || memWrite = false || memWrite = false branch = false || branch = true || branch = false jump = true || jump = true || jump = false @@ -76,12 +76,12 @@ EX: || MEM: || WB: ---------------------||-------------------------||-------------------------- - rs1: 2 || rs1: 4 || rs1: 3 - rs2: 5 || rs2: 6 || rs2: 4 - rd: 1 || rd: 1 || rd: 5 + rs1: 2 || rs1: 4 || rs1: 3 + rs2: 5 || rs2: 6 || rs2: 4 + rd: 1 || rd: 1 || rd: 5 memToReg = false || memToReg = true || memToReg = false regWrite = false || regWrite = true || regWrite = true - memWrite = true || memWrite = false || memWrite = false + memWrite = true || memWrite = false || memWrite = false branch = false || branch = false || branch = false jump = false || jump = false || jump = false From f18b35d53b326c79730198a959e2fc5cbdf21935 Mon Sep 17 00:00:00 2001 From: peteraa Date: Thu, 17 Oct 2019 17:41:04 +0200 Subject: [PATCH 08/22] improve rendering on git --- theory2.org | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/theory2.org b/theory2.org index 872fc96..74b2100 100644 --- a/theory2.org +++ b/theory2.org @@ -40,6 +40,7 @@ ** Data hazards 1 At some cycle the following instructions can be found in a 5 stage design: + #+begin_src text EX: || MEM: || WB: ---------------------||-------------------------||-------------------------- rs1: 4 || rs1: 4 || rs1: 1 @@ -50,6 +51,7 @@ memWrite = false || memWrite = false || memWrite = false branch = false || branch = true || branch = false jump = false || jump = false || jump = false + #+end_src For the operation currently in EX, from where (ID, MEM or WB) should the forwarder get data from for rs1 and rs2? @@ -57,6 +59,7 @@ At some cycle the following instructions can be found in a 5 stage design: + #+begin_src text EX: || MEM: || WB: ---------------------||-------------------------||-------------------------- rs1: 1 || rs1: 4 || rs1: 1 @@ -67,6 +70,7 @@ memWrite = false || memWrite = false || memWrite = false branch = false || branch = true || branch = false jump = true || jump = true || jump = false + #+end_src For the operation currently in EX, from where (ID, MEM or WB) should the forwarder get data from for rs1 and rs2? @@ -74,6 +78,7 @@ At some cycle the following instructions can be found in a 5 stage design: + #+begin_src text EX: || MEM: || WB: ---------------------||-------------------------||-------------------------- rs1: 2 || rs1: 4 || rs1: 3 @@ -87,6 +92,7 @@ Should the forwarding unit issue a load hazard signal? (Hint: what are the semantics of the instruction currently in EX stage?) + #+end_src * Question 3 - Benchmarking In order to gauge the performance increase from adding branch predictors it is necessary to do some testing. From 6bf8612e81f26eb8d083e33e860228a55f11c66b Mon Sep 17 00:00:00 2001 From: peteraa Date: Thu, 17 Oct 2019 18:04:37 +0200 Subject: [PATCH 09/22] Add branch predictor theory question --- theory2.org | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/theory2.org b/theory2.org index 74b2100..83980ad 100644 --- a/theory2.org +++ b/theory2.org @@ -1,5 +1,5 @@ * Question 1 - Hazards - For the following program describe each hazard with type (data or control), line number and a + For the following programs describe each hazard with type (data or control), line number and a small (max one sentence) description ** program 1 @@ -94,7 +94,41 @@ (Hint: what are the semantics of the instruction currently in EX stage?) #+end_src -* Question 3 - Benchmarking +* Question 3 - Branch prediction + Consider a 2 bit branch predictor with only 4 slots where the decision to take a branch or + not is decided in accordance to the following table + + #+begin_src text + state || predict taken || next state if taken || next state if not taken || + =======||=================||=======================||==========================|| + 00 || NO || 01 || 00 || + 01 || NO || 11 || 00 || + 10 || YES || 11 || 00 || + 11 || YES || 11 || 10 || + #+end_src + + At some point during execution the program counter is ~0xc~ and the branch predictor table looks like this: + + #+begin_src text + slot || value + ======||======== + 00 || 01 + 01 || 00 + 10 || 11 + 11 || 01 + #+end_src + + + #+begin_src asm + 0xc addi x1, x3, 10 + 0x10 add x2, x1, x1 + 0x14 beq x1, x2, .L1 + 0x18 j .L2 + #+end_src + + Will the predictor predict taken or not taken for the beq instruction? + +* Question 4 - Benchmarking In order to gauge the performance increase from adding branch predictors it is necessary to do some testing. Rather than writing a test from scratch it is better to use the tester already in use in the test harness. When running a program the VM outputs a log of all events, including which branches have been taken and which @@ -162,12 +196,11 @@ For this task it is probably smart to use something else than a ~Map[(Int, Boolean)]~ The skeleton code is located in ~testRunner.scala~ and can be run using testOnly FiveStage.ProfileTest. - If you do so now you will see that the unrealistic prediction model yields 1449 misses. With a 2 bit 4 slot scheme, how many misses will you incur? Answer with a number. -* Question 4 - Cache profiling +* Question 5 - Cache profiling Unlike our design which has a very limited memory pool, real designs have access to vast amounts of memory, offset by a steep cost in access latency. To amend this a modern processor features several caches where even the smallest fastest cache has more memory than @@ -191,7 +224,7 @@ #+END_SRC ** Your task - Your job is to implement a test that checks how many delay cycles will occur for a cache which: + Your job is to implement a model that tests how many delay cycles will occur for a cache which: + Follows a 2-way associative scheme + Block size is 4 words (128 bits) + Is write-through write no-allocate From 2944ee9d4ea56ac223aa5c39532a17c0ec700207 Mon Sep 17 00:00:00 2001 From: peteraa Date: Thu, 17 Oct 2019 19:20:34 +0200 Subject: [PATCH 10/22] More diddling with theory --- theory2.org | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/theory2.org b/theory2.org index 83980ad..5b0d0c9 100644 --- a/theory2.org +++ b/theory2.org @@ -95,9 +95,8 @@ #+end_src * Question 3 - Branch prediction - Consider a 2 bit branch predictor with only 4 slots where the decision to take a branch or - not is decided in accordance to the following table - + Consider a 2 bit branch predictor with only 4 slots for a 32 bit architecture, where the decision to + take a branch or not is decided in accordance to the following table: #+begin_src text state || predict taken || next state if taken || next state if not taken || =======||=================||=======================||==========================|| @@ -108,7 +107,6 @@ #+end_src At some point during execution the program counter is ~0xc~ and the branch predictor table looks like this: - #+begin_src text slot || value ======||======== @@ -118,7 +116,7 @@ 11 || 01 #+end_src - + For the following program: #+begin_src asm 0xc addi x1, x3, 10 0x10 add x2, x1, x1 @@ -190,10 +188,13 @@ } #+END_SRC -** TODO Branch predictor is underspecified, needs to be cleaned up ** Your task - Your job is to implement a test that checks how many misses occur for a 2 bit branch predictor with 4 slots. - For this task it is probably smart to use something else than a ~Map[(Int, Boolean)]~ + Your job is to implement a test that checks how many misses occur for a 2 bit branch predictor with 8 slots. + The rule table is the same as in question 3. + For simplicitys sake, assume that every value in the table is initialized to 00. + + For this task it is necessary to use something more sophisticated than ~Map[(Int, Boolean)]~ to represent + your branch predictor model. The skeleton code is located in ~testRunner.scala~ and can be run using testOnly FiveStage.ProfileTest. From ba7dfd8d2fb653d96c03022080bd582b8f335fac Mon Sep 17 00:00:00 2001 From: Peter Aaser Date: Fri, 18 Oct 2019 13:40:27 +0200 Subject: [PATCH 11/22] Update Manifest.scala --- src/test/scala/Manifest.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/scala/Manifest.scala b/src/test/scala/Manifest.scala index f5352fa..2ab4eed 100644 --- a/src/test/scala/Manifest.scala +++ b/src/test/scala/Manifest.scala @@ -31,7 +31,8 @@ object Manifest { printMergedTrace = true, nopPadded = nopPadded, breakPoints = Nil, // not implemented - testName = singleTest) + testName = singleTest, + maxSteps = 15000) val allTestOptions: String => TestOptions = name => TestOptions( From 7394e7a464094caa91c41d016ee23005ee72d50a Mon Sep 17 00:00:00 2001 From: peteraa Date: Mon, 28 Oct 2019 09:40:16 +0100 Subject: [PATCH 12/22] Fix branch predictor task being wrong in several orthogonal ways. --- src/test/scala/RISCV/DataTypes.scala | 21 +++++++-- src/test/scala/RISCV/VM.scala | 2 +- src/test/scala/RISCV/printUtils.scala | 8 ++-- src/test/scala/RISCV/testRunner.scala | 68 +++++++++++++++++++++++---- theory2.org | 2 + 5 files changed, 81 insertions(+), 20 deletions(-) diff --git a/src/test/scala/RISCV/DataTypes.scala b/src/test/scala/RISCV/DataTypes.scala index 5627015..14d6c61 100644 --- a/src/test/scala/RISCV/DataTypes.scala +++ b/src/test/scala/RISCV/DataTypes.scala @@ -37,11 +37,11 @@ object Data { case class MemRead(addr: Addr, word: Int) extends ExecutionEvent // addr is the target address - case class PcUpdateJALR(addr: Addr) extends ExecutionEvent - case class PcUpdateJAL(addr: Addr) extends ExecutionEvent - case class PcUpdateBranch(addr: Addr) extends ExecutionEvent - case class PcUpdateNoBranch(addr: Addr) extends ExecutionEvent - case class PcUpdate(addr: Addr) extends ExecutionEvent + case class PcUpdateJALR(addr: Addr) extends ExecutionEvent + case class PcUpdateJAL(addr: Addr) extends ExecutionEvent + case class PcUpdateBranch(addr: Addr, target: Addr) extends ExecutionEvent + case class PcUpdateNoBranch(addr: Addr) extends ExecutionEvent + case class PcUpdate(addr: Addr) extends ExecutionEvent case class ExecutionTraceEvent(pc: Addr, event: ExecutionEvent*){ override def toString(): String = s"$pc: " + event.toList.mkString(", ") } type ExecutionTrace[A] = Writer[List[ExecutionTraceEvent], A] @@ -169,6 +169,17 @@ object Data { } def log2: Int = math.ceil(math.log(i.toDouble)/math.log(2.0)).toInt + + // Discards two lowest bits + def getTag(slots: Int): Int = { + val bitsLeft = 32 - (slots.log2 + 2) + val bitsRight = 32 - slots.log2 + val leftShifted = i << bitsLeft + val rightShifted = leftShifted >>> bitsRight + // say(i) + // say(rightShifted) + rightShifted + } } implicit class StringOps(s: String) { diff --git a/src/test/scala/RISCV/VM.scala b/src/test/scala/RISCV/VM.scala index effaf6d..cf597ba 100644 --- a/src/test/scala/RISCV/VM.scala +++ b/src/test/scala/RISCV/VM.scala @@ -43,7 +43,7 @@ case class VM( val takeBranch = regs.compare(op.rs1, op.rs2, op.comp.run) if(takeBranch){ val nextVM = copy(pc = addr) - jump(nextVM, PcUpdateBranch(nextVM.pc)) + jump(nextVM, PcUpdateBranch(pc, nextVM.pc)) } else { step(this, PcUpdateNoBranch(this.pc + Addr(4))) diff --git a/src/test/scala/RISCV/printUtils.scala b/src/test/scala/RISCV/printUtils.scala index 980e52e..02b76fc 100644 --- a/src/test/scala/RISCV/printUtils.scala +++ b/src/test/scala/RISCV/printUtils.scala @@ -40,10 +40,10 @@ object PrintUtils { case MemRead(addr, word) => fansi.Color.Red(f"M[${addr.show}] -> 0x${word.hs}") // addr is the target address - case PcUpdateJALR(addr) => fansi.Color.Green(s"PC updated to ${addr.show} via JALR") - case PcUpdateJAL(addr) => fansi.Color.Magenta(s"PC updated to ${addr.show} via JAL") - case PcUpdateBranch(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show} via Branch") - case PcUpdateNoBranch(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show}, skipping a Branch") + case PcUpdateJALR(addr) => fansi.Color.Green(s"PC updated to ${addr.show} via JALR") + case PcUpdateJAL(addr) => fansi.Color.Magenta(s"PC updated to ${addr.show} via JAL") + case PcUpdateBranch(from, to) => fansi.Color.Yellow(s"PC updated to ${to.show} via Branch") + case PcUpdateNoBranch(addr) => fansi.Color.Yellow(s"PC updated to ${addr.show}, skipping a Branch") } } diff --git a/src/test/scala/RISCV/testRunner.scala b/src/test/scala/RISCV/testRunner.scala index aa53791..80a582e 100644 --- a/src/test/scala/RISCV/testRunner.scala +++ b/src/test/scala/RISCV/testRunner.scala @@ -111,12 +111,12 @@ object TestRunner { } yield { sealed trait BranchEvent - case class Taken(addr: Int) extends BranchEvent - case class NotTaken(addr: Int) extends BranchEvent + case class Taken(from: Int, to: Int) extends BranchEvent { override def toString = s"Taken ${from.hs}\t${to.hs}" } + case class NotTaken(addr: Int) extends BranchEvent { override def toString = s"Not Taken ${addr.hs}" } val events: List[BranchEvent] = trace.flatMap(_.event).collect{ - case PcUpdateBranch(x) => Taken(x.value) - case PcUpdateNoBranch(x) => NotTaken(x.value) + case PcUpdateBranch(from, to) => Taken(from.value, to.value) + case PcUpdateNoBranch(at) => NotTaken(at.value) } @@ -126,6 +126,9 @@ object TestRunner { */ def OneBitInfiniteSlots(events: List[BranchEvent]): Int = { + // Uncomment to take a look at the event log + // say(events.mkString("\n","\n","\n")) + // Helper inspects the next element of the event list. If the event is a mispredict the prediction table is updated // to reflect this. // As long as there are remaining events the helper calls itself recursively on the remainder @@ -145,24 +148,69 @@ object TestRunner { // `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))` // means we want to match a list whose first element is of type Constructor while satisfying some predicate p, // called an if guard. - case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) - case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true)) - case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) - case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) + case Taken(from, to) :: t if( predictionTable(from)) => helper(t, predictionTable) + case Taken(from, to) :: t if(!predictionTable(from)) => 1 + helper(t, predictionTable.updated(from, true)) + case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) + case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) case _ => 0 } } // Initially every possible branch is set to false since the initial state of the predictor is to assume branch not taken def initState = events.map{ - case Taken(addr) => (addr, false) - case NotTaken(addr) => (addr, false) + case Taken(from, addr) => (from, false) + case NotTaken(addr) => (addr, false) }.toMap helper(events, initState) } + + def nBitPredictor(events: List[BranchEvent]): Int = { + + case class nBitPredictor( + values : List[Int], + predictionRules : List[Boolean], + transitionRules : Int => Boolean => Int, + ){ + val slots = values.size + + def predict(pc: Int): Boolean = predictionRules(values(pc.getTag(slots))) + + def update(pc: Int, taken: Boolean): nBitPredictor = { + val current = values(pc.getTag(slots)) + copy(values = values.updated(pc.getTag(slots), transitionRules(current)(taken))) + } + } + + val initPredictor = nBitPredictor( + List.fill(4)(0), + List( + false, + false, + true, + true, + ), + r => r match { + case 0 => taken => if(taken) 1 else 0 + case 1 => taken => if(taken) 3 else 0 + case 2 => taken => if(taken) 3 else 0 + case 3 => taken => if(taken) 3 else 2 + } + ) + + events.foldLeft((0, initPredictor)){ case(((acc, bp), event)) => event match { + case Taken(pc, _) if bp.predict(pc) => (acc, bp.update(pc, true)) + case Taken(pc, _) => (acc + 1, bp.update(pc, false)) + case NotTaken(pc) if !bp.predict(pc) => (acc, bp.update(pc, false)) + case NotTaken(pc) => (acc + 1, bp.update(pc, true)) + }}._1 + } + + + say(OneBitInfiniteSlots(events)) + say(nBitPredictor(events)) } diff --git a/theory2.org b/theory2.org index 5b0d0c9..90bf611 100644 --- a/theory2.org +++ b/theory2.org @@ -230,3 +230,5 @@ + Block size is 4 words (128 bits) + Is write-through write no-allocate + Eviction policy is LRU (least recently used) + + Your answer should be the number of cache miss latency cycles when using this cache. From e8322e2e5c7ba3ebe7e917779a450620f0e3cbfe Mon Sep 17 00:00:00 2001 From: peteraa Date: Mon, 28 Oct 2019 15:35:34 +0100 Subject: [PATCH 13/22] bricoleur commit --- src/test/scala/RISCV/testRunner.scala | 47 --------------------------- theory2.org | 25 ++++++++------ 2 files changed, 15 insertions(+), 57 deletions(-) diff --git a/src/test/scala/RISCV/testRunner.scala b/src/test/scala/RISCV/testRunner.scala index 80a582e..96e5c4b 100644 --- a/src/test/scala/RISCV/testRunner.scala +++ b/src/test/scala/RISCV/testRunner.scala @@ -164,54 +164,7 @@ object TestRunner { helper(events, initState) } - - - def nBitPredictor(events: List[BranchEvent]): Int = { - - case class nBitPredictor( - values : List[Int], - predictionRules : List[Boolean], - transitionRules : Int => Boolean => Int, - ){ - val slots = values.size - - def predict(pc: Int): Boolean = predictionRules(values(pc.getTag(slots))) - - def update(pc: Int, taken: Boolean): nBitPredictor = { - val current = values(pc.getTag(slots)) - copy(values = values.updated(pc.getTag(slots), transitionRules(current)(taken))) - } - } - - val initPredictor = nBitPredictor( - List.fill(4)(0), - List( - false, - false, - true, - true, - ), - r => r match { - case 0 => taken => if(taken) 1 else 0 - case 1 => taken => if(taken) 3 else 0 - case 2 => taken => if(taken) 3 else 0 - case 3 => taken => if(taken) 3 else 2 - } - ) - - events.foldLeft((0, initPredictor)){ case(((acc, bp), event)) => event match { - case Taken(pc, _) if bp.predict(pc) => (acc, bp.update(pc, true)) - case Taken(pc, _) => (acc + 1, bp.update(pc, false)) - case NotTaken(pc) if !bp.predict(pc) => (acc, bp.update(pc, false)) - case NotTaken(pc) => (acc + 1, bp.update(pc, true)) - }}._1 - } - - - say(OneBitInfiniteSlots(events)) - say(nBitPredictor(events)) - } true diff --git a/theory2.org b/theory2.org index 90bf611..0837bb6 100644 --- a/theory2.org +++ b/theory2.org @@ -95,7 +95,7 @@ #+end_src * Question 3 - Branch prediction - Consider a 2 bit branch predictor with only 4 slots for a 32 bit architecture, where the decision to + Consider a 2 bit branch predictor with only 4 slots for a 32 bit architecture (without BTB), where the decision to take a branch or not is decided in accordance to the following table: #+begin_src text state || predict taken || next state if taken || next state if not taken || @@ -137,8 +137,8 @@ #+BEGIN_SRC scala sealed trait BranchEvent - case class Taken(addr: Int) extends BranchEvent - case class NotTaken(addr: Int) extends BranchEvent + case class Taken(from: Int, to: Int) extends BranchEvent + case class NotTaken(at: Int) extends BranchEvent def profile(events: List[BranchEvent]): Int = ??? @@ -170,11 +170,11 @@ // `case Constructor(arg1, arg2) :: t => if(p(arg1, arg2))` // means we want to match a list whose first element is of type Constructor while satisfying some predicate p, // called an if guard. - case Taken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) - case Taken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, true)) - case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) - case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) - case _ => 0 + case Taken(from, to) :: t if( predictionTable(from)) => helper(t, predictionTable) + case Taken(from, to) :: t if(!predictionTable(from)) => 1 + helper(t, predictionTable.updated(from, true)) + case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) + case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) + case _ => 0 } } @@ -191,14 +191,19 @@ ** Your task Your job is to implement a test that checks how many misses occur for a 2 bit branch predictor with 8 slots. The rule table is the same as in question 3. - For simplicitys sake, assume that every value in the table is initialized to 00. + The predictor does not use a branch target buffer (BTB), which means that the address will always be decoded in + the ID stage. + For you this means you do not need to keep track of branch targets, simplifying your simulation quite a bit. + (If not you would need to add logic for when BTB value does not match actual value) + + For simplicity's sake, assume that every value in the table is initialized to 00. For this task it is necessary to use something more sophisticated than ~Map[(Int, Boolean)]~ to represent your branch predictor model. The skeleton code is located in ~testRunner.scala~ and can be run using testOnly FiveStage.ProfileTest. - With a 2 bit 4 slot scheme, how many misses will you incur? + With a 2 bit 8 slot scheme, how many mispredicts will happen? Answer with a number. * Question 5 - Cache profiling From b8225def46fed9b4f75a8999297f41cb9f8a51ba Mon Sep 17 00:00:00 2001 From: peteraa Date: Mon, 28 Oct 2019 15:41:25 +0100 Subject: [PATCH 14/22] Here. Have some merge conflicts for manifest! --- src/test/scala/Manifest.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/scala/Manifest.scala b/src/test/scala/Manifest.scala index 2ab4eed..0186575 100644 --- a/src/test/scala/Manifest.scala +++ b/src/test/scala/Manifest.scala @@ -54,7 +54,7 @@ object Manifest { class ProfileBranching extends FlatSpec with Matchers { it should "profile some branches" in { TestRunner.profileBranching( - Manifest.singleTestOptions.copy(testName = "branchProfiling.s") + Manifest.singleTestOptions.copy(testName = "branchProfiling.s", maxSteps = 50000) ) should be(true) } } @@ -62,7 +62,7 @@ class ProfileBranching extends FlatSpec with Matchers { class ProfileCache extends FlatSpec with Matchers { it should "profile a cache" in { TestRunner.profileCache( - Manifest.singleTestOptions.copy(testName = "convolution.s") + Manifest.singleTestOptions.copy(testName = "convolution.s", maxSteps = 50000) ) should be(true) } } From daf8ea247b3a9febe1c548ea798e869800687c42 Mon Sep 17 00:00:00 2001 From: peteraa Date: Mon, 28 Oct 2019 16:21:52 +0100 Subject: [PATCH 15/22] More embarrasing errors fixed --- src/test/scala/Manifest.scala | 5 ++++- src/test/scala/RISCV/printUtils.scala | 1 + src/test/scala/RISCV/testRunner.scala | 8 +++++--- theory2.org | 4 ++-- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/test/scala/Manifest.scala b/src/test/scala/Manifest.scala index 0186575..58b87d0 100644 --- a/src/test/scala/Manifest.scala +++ b/src/test/scala/Manifest.scala @@ -61,8 +61,11 @@ class ProfileBranching extends FlatSpec with Matchers { class ProfileCache extends FlatSpec with Matchers { it should "profile a cache" in { + say("Warning, this test takes forever to run! 2 minutes on my machine at least.") + say("This happens due to the less than optimal way of storing the update log. Sorry I guess") + say("You probably want to debug this with a smaller program") TestRunner.profileCache( - Manifest.singleTestOptions.copy(testName = "convolution.s", maxSteps = 50000) + Manifest.singleTestOptions.copy(testName = "convolution.s", maxSteps = 150000) ) should be(true) } } diff --git a/src/test/scala/RISCV/printUtils.scala b/src/test/scala/RISCV/printUtils.scala index 02b76fc..4cc61fc 100644 --- a/src/test/scala/RISCV/printUtils.scala +++ b/src/test/scala/RISCV/printUtils.scala @@ -101,6 +101,7 @@ object PrintUtils { def binary: String = String.format("%" + 32 + "s", i.toBinaryString) .replace(' ', '0').grouped(4) .map(x => x + " ").mkString + def binary(n: Int): String = String.format("%" + n + "s", i.toBinaryString).replace(' ', '0') } diff --git a/src/test/scala/RISCV/testRunner.scala b/src/test/scala/RISCV/testRunner.scala index 96e5c4b..3d9d60c 100644 --- a/src/test/scala/RISCV/testRunner.scala +++ b/src/test/scala/RISCV/testRunner.scala @@ -150,9 +150,9 @@ object TestRunner { // called an if guard. case Taken(from, to) :: t if( predictionTable(from)) => helper(t, predictionTable) case Taken(from, to) :: t if(!predictionTable(from)) => 1 + helper(t, predictionTable.updated(from, true)) - case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) - case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) - case _ => 0 + case NotTaken(addr) :: t if( predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) + case NotTaken(addr) :: t if(!predictionTable(addr)) => helper(t, predictionTable) + case Nil => 0 } } @@ -164,9 +164,11 @@ object TestRunner { helper(events, initState) } + say(OneBitInfiniteSlots(events)) } + true } diff --git a/theory2.org b/theory2.org index 0837bb6..bd96f6b 100644 --- a/theory2.org +++ b/theory2.org @@ -172,8 +172,8 @@ // called an if guard. case Taken(from, to) :: t if( predictionTable(from)) => helper(t, predictionTable) case Taken(from, to) :: t if(!predictionTable(from)) => 1 + helper(t, predictionTable.updated(from, true)) - case NotTaken(addr) :: t if(!predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) - case NotTaken(addr) :: t if( predictionTable(addr)) => helper(t, predictionTable) + case NotTaken(addr) :: t if( predictionTable(addr)) => 1 + helper(t, predictionTable.updated(addr, false)) + case NotTaken(addr) :: t if(!predictionTable(addr)) => helper(t, predictionTable) case _ => 0 } } From af823ce0678f789ab8c447166703de4ca2dcf665 Mon Sep 17 00:00:00 2001 From: peteraa Date: Mon, 28 Oct 2019 16:31:09 +0100 Subject: [PATCH 16/22] Add hint to theory --- theory2.org | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/theory2.org b/theory2.org index bd96f6b..2f9557d 100644 --- a/theory2.org +++ b/theory2.org @@ -205,6 +205,21 @@ With a 2 bit 8 slot scheme, how many mispredicts will happen? Answer with a number. + + Hint: Use the getTag method defined on int (in DataTypes.scala) to get the tag for an address. + #+BEGIN_SRC scala + val slots = 8 + say(0x1C40.getTag(slots)) // prints 0 + say(0x1C44.getTag(slots)) // prints 1 + say(0x1C48.getTag(slots)) // prints 2 + say(0x1C4C.getTag(slots)) // prints 3 + say(0x1C50.getTag(slots)) // prints 4 + say(0x1C54.getTag(slots)) // prints 5 + say(0x1C58.getTag(slots)) // prints 6 + say(0x1C5C.getTag(slots)) // prints 7 + say(0x1C60.getTag(slots)) // prints 0 (thus conflicts with 0x1C40) + #+END_SRC + * Question 5 - Cache profiling Unlike our design which has a very limited memory pool, real designs have access to vast amounts of memory, offset From 9f68d1733025ffc5c88a544b600bb8c11a0d71f0 Mon Sep 17 00:00:00 2001 From: peteraa Date: Tue, 29 Oct 2019 09:20:31 +0100 Subject: [PATCH 17/22] Fix branch predictor rule table. --- theory2.org | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/theory2.org b/theory2.org index 2f9557d..e534f30 100644 --- a/theory2.org +++ b/theory2.org @@ -101,8 +101,8 @@ state || predict taken || next state if taken || next state if not taken || =======||=================||=======================||==========================|| 00 || NO || 01 || 00 || - 01 || NO || 11 || 00 || - 10 || YES || 11 || 00 || + 01 || NO || 10 || 00 || + 10 || YES || 11 || 01 || 11 || YES || 11 || 10 || #+end_src From ac79baec5a2707e039dc1878efaf2b8dedb1816c Mon Sep 17 00:00:00 2001 From: peteraa Date: Mon, 4 Nov 2019 16:19:15 +0100 Subject: [PATCH 18/22] Filter out convolution.s from allTests. --- src/test/scala/Manifest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/scala/Manifest.scala b/src/test/scala/Manifest.scala index 58b87d0..4298317 100644 --- a/src/test/scala/Manifest.scala +++ b/src/test/scala/Manifest.scala @@ -79,7 +79,7 @@ class SingleTest extends FlatSpec with Matchers { class AllTests extends FlatSpec with Matchers { it should "just werk" in { - val werks = getAllTestNames.map{testname => + val werks = getAllTestNames.filterNot(_ == "convolution.s").map{testname => say(s"testing $testname") val opts = Manifest.allTestOptions(testname) (testname, TestRunner.run(opts)) From 6223a71046a8ac6651c642ef0c92d378564c1d11 Mon Sep 17 00:00:00 2001 From: peteraa Date: Mon, 4 Nov 2019 16:19:47 +0100 Subject: [PATCH 19/22] aaaa --- theory2.org | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/theory2.org b/theory2.org index e534f30..78b3ec9 100644 --- a/theory2.org +++ b/theory2.org @@ -247,8 +247,8 @@ ** Your task Your job is to implement a model that tests how many delay cycles will occur for a cache which: + Follows a 2-way associative scheme - + Block size is 4 words (128 bits) - + Is write-through write no-allocate + + Block size is 4 words (128 bits) (total cache size: a whopping 256 bits) + + Is write-through write no-allocate (this means that you can ignore stores, only loads will affect the cache) + Eviction policy is LRU (least recently used) Your answer should be the number of cache miss latency cycles when using this cache. From 3c979b1f340e8686f97d16c71e101119ead0a251 Mon Sep 17 00:00:00 2001 From: peteraa Date: Mon, 4 Nov 2019 16:57:26 +0100 Subject: [PATCH 20/22] john madden --- theory2.org | 2 ++ 1 file changed, 2 insertions(+) diff --git a/theory2.org b/theory2.org index 78b3ec9..95b43ec 100644 --- a/theory2.org +++ b/theory2.org @@ -105,6 +105,8 @@ 10 || YES || 11 || 01 || 11 || YES || 11 || 10 || #+end_src + + (This is known as a saturating 2bit counter, it is *not* the same scheme as in the lecture slides) At some point during execution the program counter is ~0xc~ and the branch predictor table looks like this: #+begin_src text From b8ae0092c1594d85b1797ce185c6dce7f5926dbe Mon Sep 17 00:00:00 2001 From: peteraa Date: Wed, 13 Nov 2019 16:49:58 +0100 Subject: [PATCH 21/22] Fix contradiction in question 5. --- theory2.org | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/theory2.org b/theory2.org index 95b43ec..f98e836 100644 --- a/theory2.org +++ b/theory2.org @@ -233,7 +233,7 @@ We will therefore assume the following: + Reads from main memory takes 5 cycles - + cache has a total storage of 32 words (1024 bits) + + cache has a total storage of 8 words (256 bits) + cache reads work as they do now (i.e no additional latency) For this exercise you will write a program that parses a log of memory events, similar to previous task @@ -249,8 +249,29 @@ ** Your task Your job is to implement a model that tests how many delay cycles will occur for a cache which: + Follows a 2-way associative scheme - + Block size is 4 words (128 bits) (total cache size: a whopping 256 bits) + + set size is 4 words (128 bits) (total cache size: a whopping 256 bits) + + Block size is 1 word (32 bits) meaning that we *do not need a block offset*. + Is write-through write no-allocate (this means that you can ignore stores, only loads will affect the cache) + Eviction policy is LRU (least recently used) + + In the typical cache each block has more than 32 bits, requiring an offset, however the + simulated cache does not. + This means that the simulated cache has two sets of 4 words, greatly reducing the complexity + of your implementation. + + Additionally, assume that writes does not change the the LRU counter. + This means that that your cache will only consider which value was most recently loaded, + not written. + It's not realistic, but it allows you to completely disregard write events (you can + just filter them out if you want.) Your answer should be the number of cache miss latency cycles when using this cache. + +*** Further study + If you have the time I strongly encourage you to experiment with a larger cache with bigger + block sizes, forcing you to implement the additional complexity of block offsets. + Likewise, by trying a different scheme than write-through no-allocate you will get a much + better grasp on how exactly the cache works. + This is *not* a deliverable, just something I encourage you to tinker with to get a better + understanding. + From cb1a810317fbdb4bd37fed0220504dc471eedee3 Mon Sep 17 00:00:00 2001 From: Peter Aaser Date: Thu, 14 Nov 2019 12:09:18 +0100 Subject: [PATCH 22/22] Update Manifest.scala Accidentally had babby mode set to true. sorry --- src/test/scala/Manifest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/scala/Manifest.scala b/src/test/scala/Manifest.scala index 4298317..cc6ed6b 100644 --- a/src/test/scala/Manifest.scala +++ b/src/test/scala/Manifest.scala @@ -20,7 +20,7 @@ object Manifest { val singleTest = "forward2.s" - val nopPadded = true + val nopPadded = false val singleTestOptions = TestOptions( printIfSuccessful = true,