from amaranth import Elaboratable, Module, Signal from amaranth.lib.data import ArrayLayout, StructLayout from amaranth.back import rtlil, verilog from amaranth.sim import Simulator from collections import deque import subprocess import re import sys class RunnerError(Exception): pass class NaiveMul(Elaboratable): def __init__(self, width=8): self.a = Signal(width) self.b = Signal(width) self.o = Signal(2*width) def elaborate(self, platform): m = Module() m.d.comb += self.o.eq(self.a * self.b) return m class PipelinedMul(Elaboratable): def __init__(self, width=8, debug=False): self.width = width self.a = Signal(self.width) self.b = Signal(self.width) self.o = Signal(2*self.width) self.debug = debug # x7x6x5x4x3x2x1x0 # * y7y6y5y4y3y2y1y0 # ---------------- # x7x6x5x4x3x2x1x0: z0 = x7x6x5x4x3x2x1x0*y0 # x7x6x5x4x3x2x1x0: z1 = (x7x6x5x4x3x2x1x0*y1 << 1) + z0 # x7x6x5x4x3x2x1x0: z2 = (x7x6x5x4x3x2x1x0*y2 << 2) + z1 # x7x6x5x4x3x2x1x0: z3 = (x7x6x5x4x3x2x1x0*y3 << 3) + z2 # x7x6x5x4x3x2x1x0: z4 = (x7x6x5x4x3x2x1x0*y4 << 4) + z3 # x7x6x5x4x3x2x1x0: z5 = (x7x6x5x4x3x2x1x0*y5 << 5) + z4 # x7x6x5x4x3x2x1x0: z6 = (x7x6x5x4x3x2x1x0*y6 << 6) + z5 # + x7x6x5x4x3x2x1x0: z7 = (x7x6x5x4x3x2x1x0*y7 << 7) + z6 def elaborate(self, platform): def prob_pipeline_stage(i): if i == 0: stage_out = Signal(16) m.d.comb += stage_out.eq(pipeline_out[0]) self.pin = pipeline_in self.pout = pipeline_out else: stage_ina = Signal(8) stage_inb = Signal(8) m.d.comb += stage_ina.eq(self.pin[i - 1].a) m.d.comb += stage_inb.eq(self.pin[i - 1].b) stage_out = Signal(16) m.d.comb += stage_out.eq(self.pout[i]) m = Module() pipeline_in = Signal(ArrayLayout( StructLayout(members={ "a": self.a.shape(), "b": self.b.shape() }), self.width)) # Relies on the optimizer to realize that not all 2*self.width^2 # bits are actually used (we only need 80% of them for width=8). pipeline_out = Signal(ArrayLayout(self.width*2, self.width)) prob_pipeline_stage(0) m.d.sync += [ pipeline_in[0].a.eq(self.a), pipeline_in[0].b.eq(self.b), ] m.d.sync += pipeline_out[0].eq(self.a * self.b[0]) for i in range(1, self.width): if self.debug: prob_pipeline_stage(i) m.d.sync += pipeline_in[i].eq(pipeline_in[i - 1]) # This relies on the optimizer realizing we're doing a mul by a # 1 bit number (pipeline_in[i - 1].b[i]) with leading zeros. m.d.sync += pipeline_out[i].eq(((pipeline_in[i - 1].a * pipeline_in[i - 1].b[i]) << i) + pipeline_out[i - 1]) m.d.comb += self.o.eq(pipeline_out[self.width - 1]) return m def stats(m): # if args.action == "size": # fragment = Fragment.get(design, platform) # rtlil_text = rtlil.convert(fragment, name=name, ports=ports) rtlil_text = rtlil.convert(m, ports=[m.a, m.b, m.o]) # Created from a combination of amaranth._toolchain.yosys and # amaranth.back.verilog. Script comes from nextpnr-generic. script = [] script.append("read_ilang < 1 and sys.argv[1] == "sim": pipe_tb(m) elif len(sys.argv) > 1 and sys.argv[1] == "stats": stats(m) else: print(verilog.convert(m, ports=[m.a, m.b, m.o]))