from amaranth import C, Module, Shape, Signal, unsigned from amaranth.utils import exact_log2 from amaranth.lib.wiring import Component, Out, In, connect, flipped, Signature from amaranth.lib.data import StructLayout, View from amaranth.lib.memory import Memory from amaranth_soc import wishbone def cache_addr_layout(index_len, tag_len): return StructLayout({ "index": unsigned(index_len), "tag": unsigned(tag_len) }) def tag_data_layout(tag_len, num_ways): if num_ways == 2: return StructLayout({ "tag": unsigned(tag_len), "last_way": unsigned(1), "valid": unsigned(1) }) else: return StructLayout({ "tag": unsigned(tag_len), "valid": unsigned(1) }) class WishboneMinimalICache(Component): def __init__(self, *, addr_width, data_width, granularity, num_entries_per_way, num_ways=1): if num_ways not in (1, 2): raise ValueError("Number of ways must be 1 or 2.") self.addr_width = addr_width self.data_width = data_width self.granularity = granularity self.num_ways = num_ways self.index_len = exact_log2(num_entries_per_way) self.tag_len = self.addr_width - self.index_len self.sel_mask = C(-1, Shape(self.data_width // self.granularity, signed=False)) self.data_0 = Memory(shape=self.data_width, depth=2**self.index_len, init=[]) self.tags_0 = Memory(shape=tag_data_layout(self.tag_len, num_ways), depth=2**self.index_len, init=[]) if self.num_ways == 2: self.data_1 = Memory(shape=self.data_width, depth=2**self.index_len, init=[]) self.tags_1 = Memory(shape=tag_data_layout(self.tag_len, num_ways), depth=2**self.index_len, init=[]) sig = { "cpu": In(wishbone.Signature(addr_width=addr_width, data_width=data_width, granularity=granularity)), "en": In(1), "inval": In(Signature({ "req": Out(1), "resp": In(1) })), "system": Out(wishbone.Signature(addr_width=addr_width, data_width=data_width, granularity=granularity)) } super().__init__(sig) def elaborate(self, plat): m = Module() m.submodules.data_0 = self.data_0 m.submodules.tags_0 = self.tags_0 data_0_port_w = self.data_0.write_port() tags_0_port_w = self.tags_0.write_port() data_0_port_r = self.data_0.read_port(transparent_for=(data_0_port_w,)) tags_0_port_r = self.tags_0.read_port(transparent_for=(tags_0_port_w,)) if self.num_ways == 2: m.submodules.data_1 = self.data_1 m.submodules.tags_1 = self.tags_1 data_1_port_w = self.data_1.write_port() data_1_port_r = self.data_1.read_port( transparent_for=(data_1_port_w,)) tags_1_port_w = self.tags_1.write_port() tags_1_port_r = self.tags_1.read_port( transparent_for=(tags_1_port_w,)) cache_in = View(cache_addr_layout(self.index_len, self.tag_len), self.cpu.adr) tag_0_in = View(tag_data_layout(self.tag_len, self.num_ways), tags_0_port_w.data) tag_0_out = View(tag_data_layout(self.tag_len, self.num_ways), tags_0_port_r.data) connect(m, flipped(self.system), flipped(self.cpu)) m.d.comb += [ data_0_port_r.addr.eq(cache_in.index), tags_0_port_r.addr.eq(cache_in.index), data_0_port_w.addr.eq(cache_in.index), tags_0_port_w.addr.eq(cache_in.index), # We keep read-enable normally off so that we can loop back tag # data from a read many cycles ago to the write port. With this, # we can set and clear individual bits in a Memory. # Data path doesn't really need it, but no harm. data_0_port_r.en.eq(0), tags_0_port_r.en.eq(0), data_0_port_w.data.eq(self.system.dat_r), tag_0_in.tag.eq(cache_in.tag) ] curr_line = Signal(self.index_len) uncached_fetch = Signal(1) if self.num_ways == 2: tag_1_in = View(tag_data_layout(self.tag_len, self.num_ways), tags_1_port_w.data) tag_1_out = View(tag_data_layout(self.tag_len, self.num_ways), tags_1_port_r.data) m.d.comb += [ data_1_port_r.addr.eq(cache_in.index), tags_1_port_r.addr.eq(cache_in.index), data_1_port_w.addr.eq(cache_in.index), tags_1_port_w.addr.eq(cache_in.index), data_1_port_r.en.eq(0), tags_1_port_r.en.eq(0), data_1_port_w.data.eq(self.system.dat_r), tag_1_in.tag.eq(cache_in.tag) ] curr_way_match = Signal(1) curr_way_fill = Signal(1) with m.FSM(init="FLUSH"): with m.State("IDLE"): m.d.comb += [ self.system.cyc.eq(0), self.system.stb.eq(0) ] with m.If(self.inval.req == 1): m.next = "FLUSH" with m.Elif(self.cpu.cyc & self.cpu.stb): with m.If(self.en & ~self.cpu.we): m.d.comb += [ data_0_port_r.en.eq(1), tags_0_port_r.en.eq(1), ] if self.num_ways == 2: m.d.comb += [ data_1_port_r.en.eq(1), tags_1_port_r.en.eq(1), ] m.next = "CHECK" with m.Else(): # Get a head-start on fetching. m.d.comb += [ self.system.cyc.eq(1), self.system.stb.eq(1) ] m.d.sync += uncached_fetch.eq(1) m.next = "FETCH" with m.State("CHECK"): m.d.comb += [ self.system.cyc.eq(0), self.system.stb.eq(0) ] with m.If((tag_0_out.tag == cache_in.tag) & tag_0_out.valid): m.d.comb += [ self.cpu.ack.eq(1), self.cpu.dat_r.eq(data_0_port_r.data) ] if self.num_ways == 2: m.d.comb += [ curr_way_match.eq(0), # Mark way 0 as last used. Use the previously-read # data to set/clear the last_way bit. tags_0_port_w.data.eq(tags_0_port_r.data), tags_1_port_w.data.eq(tags_1_port_r.data), tag_0_in.last_way.eq(1), tag_1_in.last_way.eq(0), tags_0_port_w.en.eq(1), tags_1_port_w.en.eq(1), ] m.next = "IDLE" if self.num_ways == 2: with m.Elif((tag_1_out.tag == cache_in.tag) & tag_1_out.valid): m.d.comb += [ self.cpu.ack.eq(1), self.cpu.dat_r.eq(data_1_port_r.data), curr_way_match.eq(1), # Mark way 1 as last used. tags_0_port_w.data.eq(tags_0_port_r.data), tags_1_port_w.data.eq(tags_1_port_r.data), tag_0_in.last_way.eq(0), tag_1_in.last_way.eq(1), tags_0_port_w.en.eq(1), tags_1_port_w.en.eq(1), ] m.next = "IDLE" with m.Else(): # Get a head-start on fetching. m.d.comb += [ self.system.cyc.eq(1), self.system.stb.eq(1) ] # And figure out which way to fill. if self.num_ways == 2: # If both are valid with m.If(tag_0_out.valid & tag_1_out.valid): with m.If(~tag_0_out.last_way): m.d.sync += curr_way_fill.eq(0) with m.Elif(~tag_1_out.last_way): m.d.sync += curr_way_fill.eq(1) with m.Else(): # Just do something... this shouldn't happen. m.d.sync += curr_way_fill.eq(~curr_way_fill) with m.Elif(~tag_0_out.valid): m.d.sync += curr_way_fill.eq(0) with m.Else(): m.d.sync += curr_way_fill.eq(1) m.next = "FETCH" with m.State("FETCH"): with m.If(self.system.cyc & self.system.stb & self.system.ack): m.d.sync += uncached_fetch.eq(0) # For simplicity of write path, we can only cache full # words. And only if requested. with m.If((self.cpu.sel == self.sel_mask) & ~uncached_fetch): # Handles the direct-mapped case. m.d.comb += [ data_0_port_w.en.eq(1), tags_0_port_w.en.eq(1), tag_0_in.valid.eq(1), ] if self.num_ways == 2: m.d.comb += tags_1_port_w.en.eq(1), with m.If(curr_way_fill == 0): m.d.comb += [ tag_0_in.last_way.eq(1), # We haven't done a read from tag memory # since we left IDLE, so read port still # has the current tag. Clear last-way bit, # leave other data alone. tags_1_port_w.data.eq(tags_1_port_r.data), tag_1_in.last_way.eq(0), ] with m.Elif(curr_way_fill == 1): m.d.comb += [ # Whoops, bad assumption above, don't write # data0! data_0_port_w.en.eq(0), data_1_port_w.en.eq(1), tag_1_in.valid.eq(1), tag_1_in.last_way.eq(1), # Clear last-way bit, leave other data # alone. This overrides the accesses from # tag_0_in View above. tags_0_port_w.data.eq(tags_0_port_r.data), tag_0_in.last_way.eq(0), ] # Even if we couldn't cache it, initiator has the data. # So go back to idle. m.next = "IDLE" with m.State("FLUSH"): m.d.comb += [ self.system.cyc.eq(0), self.system.stb.eq(0) ] m.d.sync += curr_line.eq(curr_line + 1) m.d.comb += [ tag_0_in.valid.eq(0), tags_0_port_w.addr.eq(curr_line), tags_0_port_w.en.eq(1), ] if self.num_ways == 2: m.d.comb += [ tag_1_in.valid.eq(0), tags_1_port_w.addr.eq(curr_line), tags_1_port_w.en.eq(1), ] with m.If((curr_line + 1)[0:self.index_len] == 0): m.d.comb += self.inval.resp.eq(1) m.next = "IDLE" return m if __name__ == "__main__": from amaranth.back import verilog print(verilog.convert(WishboneMinimalICache(addr_width=30, data_width=32, granularity=8, num_entries_per_way=256), name="cache")) # print(verilog.convert(WishboneMinimalICache(addr_width=15, # data_width=16, # granularity=8, # num_entries_per_way=256)))