a10182283ae5b08a5647417b97d0d824255b9427
[mw/milkymist.git] / cores / fmlbrg / rtl / fmlbrg.v
1 /*
2  * Milkymist VJ SoC
3  * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, version 3 of the License.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17
18 module fmlbrg #(
19         parameter fml_depth = 26,
20         parameter cache_depth = 14, /* 16kb cache */
21         parameter invalidate_bit = 26
22 ) (
23         input sys_clk,
24         input sys_rst,
25         
26         input [31:0] wb_adr_i,
27         input [2:0] wb_cti_i,
28         input [31:0] wb_dat_i,
29         output [31:0] wb_dat_o,
30         input [3:0] wb_sel_i,
31         input wb_cyc_i,
32         input wb_stb_i,
33         input wb_we_i,
34         output reg wb_ack_o,
35         
36         output reg [fml_depth-1:0] fml_adr,
37         output reg fml_stb,
38         output reg fml_we,
39         input fml_ack,
40         output [7:0] fml_sel,
41         output [63:0] fml_do,
42         input [63:0] fml_di,
43
44         /* Direct Cache Bus */
45         input dcb_stb,
46         input [fml_depth-1:0] dcb_adr,
47         output [63:0] dcb_dat,
48         output dcb_hit
49 );
50
51 /*
52  * Line length is the burst length, that is 4*64 bits, or 32 bytes
53  * Address split up :
54  *
55  * |             TAG            |         INDEX          |   OFFSET   |
56  * |fml_depth-1      cache_depth|cache_depth-1          5|4          0|
57  *
58  */
59
60 wire [4:0] offset = wb_adr_i[4:0];
61 wire [cache_depth-1-5:0] index = wb_adr_i[cache_depth-1:5];
62 wire [fml_depth-cache_depth-1:0] tag = wb_adr_i[fml_depth-1:cache_depth];
63
64 wire [4:0] dcb_offset = dcb_adr[4:0];
65 wire [cache_depth-1-5:0] dcb_index = dcb_adr[cache_depth-1:5];
66 wire [fml_depth-cache_depth-1:0] dcb_tag = dcb_adr[fml_depth-1:cache_depth];
67
68 wire coincidence = tag == dcb_tag;
69
70 /*
71  * TAG MEMORY
72  *
73  * Addressed by index (length cache_depth-5)
74  * Contains valid bit + dirty bit + tag
75  */
76
77 wire [cache_depth-1-5:0] tagmem_a;
78 reg tagmem_we;
79 wire [fml_depth-cache_depth-1+2:0] tagmem_di;
80 wire [fml_depth-cache_depth-1+2:0] tagmem_do;
81
82 wire [cache_depth-1-5:0] tagmem_a2;
83 wire [fml_depth-cache_depth-1+2:0] tagmem_do2;
84
85 fmlbrg_tagmem #(
86         .depth(cache_depth-5),
87         .width(fml_depth-cache_depth+2)
88 ) tagmem (
89         .sys_clk(sys_clk),
90
91         .a(tagmem_a),
92         .we(tagmem_we),
93         .di(tagmem_di),
94         .do(tagmem_do),
95
96         .a2(tagmem_a2),
97         .do2(tagmem_do2)
98 );
99
100 reg index_load;
101 reg [cache_depth-1-5:0] index_r;
102 always @(posedge sys_clk) begin
103         if(index_load)
104                 index_r <= index;
105 end
106
107 assign tagmem_a = index;
108
109 assign tagmem_a2 = dcb_index;
110
111 reg di_valid;
112 reg di_dirty;
113 assign tagmem_di = {di_valid, di_dirty, tag};
114
115 wire do_valid;
116 wire do_dirty;
117 wire [fml_depth-cache_depth-1:0] do_tag;
118 wire cache_hit;
119
120 wire do2_valid;
121 wire [fml_depth-cache_depth-1:0] do2_tag;
122
123 assign do_valid = tagmem_do[fml_depth-cache_depth-1+2];
124 assign do_dirty = tagmem_do[fml_depth-cache_depth-1+1];
125 assign do_tag = tagmem_do[fml_depth-cache_depth-1:0];
126
127 assign do2_valid = tagmem_do2[fml_depth-cache_depth-1+2];
128 assign do2_tag = tagmem_do2[fml_depth-cache_depth-1:0];
129
130 always @(posedge sys_clk)
131         fml_adr <= {do_tag, index, offset};
132
133 /*
134  * DATA MEMORY
135  *
136  * Addressed by index+offset in 64-bit words (length cache_depth-3)
137  * 64-bit memory with 8-bit write granularity
138  */
139
140 wire [cache_depth-3-1:0] datamem_a;
141 wire [7:0] datamem_we;
142 reg [63:0] datamem_di;
143 wire [63:0] datamem_do;
144
145 wire [cache_depth-3-1:0] datamem_a2;
146 wire [63:0] datamem_do2;
147
148 fmlbrg_datamem #(
149         .depth(cache_depth-3)
150 ) datamem (
151         .sys_clk(sys_clk),
152         
153         .a(datamem_a),
154         .we(datamem_we),
155         .di(datamem_di),
156         .do(datamem_do),
157
158         .a2(datamem_a2),
159         .do2(datamem_do2)
160 );
161
162 reg [1:0] bcounter;
163 reg [1:0] bcounter_next;
164 always @(posedge sys_clk) begin
165         if(sys_rst)
166                 bcounter <= 2'd0;
167         else begin
168                 bcounter <= bcounter_next;
169         end
170 end
171
172 reg bcounter_load;
173 reg bcounter_en;
174 always @(*) begin
175         if(bcounter_load)
176                 bcounter_next <= offset[4:3];
177         else if(bcounter_en)
178                 bcounter_next <= bcounter + 2'd1;
179         else
180                 bcounter_next <= bcounter;
181 end
182
183 assign datamem_a = { index_load ? index : index_r, bcounter_next };
184
185 assign datamem_a2 = {dcb_index, dcb_offset[4:3]};
186
187 reg datamem_we_wb;
188 reg datamem_we_fml;
189
190 assign datamem_we = ({8{datamem_we_fml}} & 8'hff)
191         |({8{datamem_we_wb &  wb_adr_i[2]}} & {4'h0, wb_sel_i})
192         |({8{datamem_we_wb & ~wb_adr_i[2]}} & {wb_sel_i, 4'h0});
193
194 always @(*) begin
195         datamem_di = fml_di;
196         if(datamem_we_wb) begin
197                 if(wb_adr_i[2]) begin
198                         /* lower 32-bit word */
199                         if(wb_sel_i[0])
200                                 datamem_di[7:0] = wb_dat_i[7:0];
201                         if(wb_sel_i[1])
202                                 datamem_di[15:8] = wb_dat_i[15:8];
203                         if(wb_sel_i[2])
204                                 datamem_di[23:16] = wb_dat_i[23:16];
205                         if(wb_sel_i[3])
206                                 datamem_di[31:24] = wb_dat_i[31:24];
207                 end else begin
208                         /* upper 32-bit word */
209                         if(wb_sel_i[0])
210                                 datamem_di[39:32] = wb_dat_i[7:0];
211                         if(wb_sel_i[1])
212                                 datamem_di[47:40] = wb_dat_i[15:8];
213                         if(wb_sel_i[2])
214                                 datamem_di[55:48] = wb_dat_i[23:16];
215                         if(wb_sel_i[3])
216                                 datamem_di[63:56] = wb_dat_i[31:24];
217                 end
218         end
219 end
220
221 assign wb_dat_o = wb_adr_i[2] ? datamem_do[31:0] : datamem_do[63:32];
222 assign fml_do = datamem_do;
223 assign fml_sel = 8'hff;
224 assign dcb_dat = datamem_do2;
225
226 /* FSM */
227
228 reg [fml_depth-cache_depth-1:0] tag_r;
229 always @(posedge sys_clk)
230         tag_r = tag;
231 assign cache_hit = do_valid & (do_tag == tag_r);
232
233 reg [3:0] state;
234 reg [3:0] next_state;
235
236 parameter IDLE                  = 4'd0;
237 parameter TEST_HIT              = 4'd1;
238 parameter WRITE_HIT             = 4'd2;
239
240 parameter EVICT                 = 4'd3;
241 parameter EVICT2                = 4'd4;
242 parameter EVICT3                = 4'd5;
243 parameter EVICT4                = 4'd6;
244
245 parameter REFILL                = 4'd7;
246 parameter REFILL_WAIT           = 4'd8;
247 parameter REFILL1               = 4'd9;
248 parameter REFILL2               = 4'd10;
249 parameter REFILL3               = 4'd11;
250 parameter REFILL4               = 4'd12;
251
252 parameter TEST_INVALIDATE       = 4'd13;
253 parameter INVALIDATE            = 4'd14;
254
255 always @(posedge sys_clk) begin
256         if(sys_rst)
257                 state = IDLE;
258         else begin
259                 //$display("state: %d -> %d", state, next_state);
260                 state = next_state;
261         end
262 end
263
264 always @(*) begin
265         tagmem_we = 1'b0;
266         di_valid = 1'b0;
267         di_dirty = 1'b0;
268         
269         bcounter_load = 1'b0;
270         bcounter_en = 1'b0;
271         
272         index_load = 1'b1;
273         
274         datamem_we_wb = 1'b0;
275         datamem_we_fml = 1'b0;
276         
277         wb_ack_o = 1'b0;
278         
279         fml_stb = 1'b0;
280         fml_we = 1'b0;
281         
282         next_state = state;
283         
284         case(state)
285                 IDLE: begin
286                         bcounter_load = 1'b1;
287                         if(wb_cyc_i & wb_stb_i) begin
288                                 if(wb_adr_i[invalidate_bit])
289                                         next_state = TEST_INVALIDATE;
290                                 else
291                                         next_state = TEST_HIT;
292                         end
293                 end
294                 TEST_HIT: begin
295                         if(cache_hit) begin
296                                 if(wb_we_i) begin
297                                         next_state = WRITE_HIT;
298                                 end else begin
299                                         wb_ack_o = 1'b1;
300                                         next_state = IDLE;
301                                 end
302                         end else begin
303                                 if(do_dirty)
304                                         next_state = EVICT;
305                                 else
306                                         next_state = REFILL;
307                         end
308                 end
309                 WRITE_HIT: begin
310                         di_valid = 1'b1;
311                         di_dirty = 1'b1;
312                         tagmem_we = 1'b1;
313                         datamem_we_wb = 1'b1;
314                         wb_ack_o = 1'b1;
315                         next_state = IDLE;
316                 end
317                 
318                 /*
319                  * Burst counter has already been loaded.
320                  * Yes, we evict lines in different order depending
321                  * on the critical word position of the cache miss
322                  * inside the line, but who cares :)
323                  */
324                 EVICT: begin
325                         fml_stb = 1'b1;
326                         fml_we = 1'b1;
327                         if(fml_ack) begin
328                                 bcounter_en = 1'b1;
329                                 next_state = EVICT2;
330                         end
331                 end
332                 EVICT2: begin
333                         bcounter_en = 1'b1;
334                         next_state = EVICT3;
335                 end
336                 EVICT3: begin
337                         bcounter_en = 1'b1;
338                         next_state = EVICT4;
339                 end
340                 EVICT4: begin
341                         bcounter_en = 1'b1;
342                         if(wb_adr_i[invalidate_bit])
343                                 next_state = INVALIDATE;
344                         else
345                                 next_state = REFILL;
346                 end
347                 
348                 REFILL: begin
349                         /* Write the tag first. This will also set the FML address. */
350                         di_valid = 1'b1;
351                         if(wb_we_i)
352                                 di_dirty = 1'b1;
353                         else
354                                 di_dirty = 1'b0;
355                         if(~(dcb_stb & coincidence)) begin
356                                 tagmem_we = 1'b1;
357                                 next_state = REFILL_WAIT;
358                         end
359                 end
360                 REFILL_WAIT: next_state = REFILL1; /* one cycle latency for the FML address */
361                 REFILL1: begin
362                         bcounter_load = 1'b1;
363                         fml_stb = 1'b1;
364                         /* Asserting both
365                          * datamem_we_fml and
366                          * datamem_we_wb write the 64-bit word from FML
367                          * with a 32-bit (at most) overlay from WB
368                          */
369                         datamem_we_fml = 1'b1;
370                         if(wb_we_i)
371                                 datamem_we_wb = 1'b1;
372                         if(fml_ack)
373                                 next_state = REFILL2;
374                 end
375                 REFILL2: begin
376                         /*
377                          * For reads, the critical word has just been written to the datamem
378                          * so by acking the cycle now we get the correct result (because the
379                          * datamem is a write-first SRAM).
380                          * For writes, we could have acked the cycle before but it's simpler this way.
381                          * Otherwise, we have the case of a master releasing WE just after ACK,
382                          * and we must add a reg to tell whether we have a read or a write in REFILL2...
383                          */
384                         wb_ack_o = 1'b1;
385                         /* Now we must use our copy of index, as the WISHBONE
386                          * address may change.
387                          */
388                         index_load = 1'b0;
389                         datamem_we_fml = 1'b1;
390                         bcounter_en = 1'b1;
391                         next_state = REFILL3;
392                 end
393                 REFILL3: begin
394                         index_load = 1'b0;
395                         datamem_we_fml = 1'b1;
396                         bcounter_en = 1'b1;
397                         next_state = REFILL4;
398                 end
399                 REFILL4: begin
400                         index_load = 1'b0;
401                         datamem_we_fml = 1'b1;
402                         bcounter_en = 1'b1;
403                         next_state = IDLE;
404                 end
405                 
406                 TEST_INVALIDATE: begin
407                         if(do_dirty)
408                                 next_state = EVICT;
409                         else
410                                 next_state = INVALIDATE;
411                 end
412                 INVALIDATE: begin
413                         di_valid = 1'b0;
414                         di_dirty = 1'b0;
415                         tagmem_we = 1'b1;
416                         wb_ack_o = 1'b1;
417                         next_state = IDLE;
418                 end
419         endcase
420 end
421
422 /* Do not hit on a line being refilled */
423 reg dcb_can_hit;
424
425 always @(posedge sys_clk) begin
426         dcb_can_hit <= 1'b0;
427         if(dcb_stb) begin
428                 if((state != REFILL_WAIT)
429                 || (state != REFILL2)
430                 || (state != REFILL3)
431                 || (state != REFILL4))
432                         dcb_can_hit <= 1'b1;
433                 if(~coincidence)
434                         dcb_can_hit <= 1'b1;
435         end
436 end
437
438 reg [fml_depth-cache_depth-1:0] dcb_tag_r;
439 always @(posedge sys_clk)
440         dcb_tag_r = dcb_tag;
441
442 assign dcb_hit = dcb_can_hit & do2_valid & (do2_tag == dcb_tag_r);
443
444 endmodule