Bigger texel cache
[mw/milkymist.git] / cores / tmu2 / rtl / tmu2.v
1 /*
2  * Milkymist VJ SoC
3  * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, version 3 of the License.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17
18 module tmu2 #(
19         parameter csr_addr = 4'h0,
20         parameter fml_depth = 26,
21         parameter texel_cache_depth = 15 /* 32kB cache */
22 ) (
23         /* Global clock and reset signals */
24         input sys_clk,
25         input sys_rst,
26         
27         /* Control interface */
28         input [13:0] csr_a,
29         input csr_we,
30         input [31:0] csr_di,
31         output [31:0] csr_do,
32         
33         output irq,
34         
35         /* WB master - Vertex read. */
36         output [31:0] wbm_adr_o,
37         output [2:0] wbm_cti_o,
38         output wbm_cyc_o,
39         output wbm_stb_o,
40         input wbm_ack_i,
41         input [31:0] wbm_dat_i,
42         
43         /* FML master - Texture pixel read. fml_we=0 is assumed. */
44         output [fml_depth-1:0] fmlr_adr,
45         output fmlr_stb,
46         input fmlr_ack,
47         input [63:0] fmlr_di,
48
49         /* FML master - Destination pixel read. fml_we=0 is assumed. */
50         output [fml_depth-1:0] fmldr_adr,
51         output fmldr_stb,
52         input fmldr_ack,
53         input [63:0] fmldr_di,
54         
55         /* FML master - Destination pixel write. fml_we=1 is assumed. */
56         output [fml_depth-1:0] fmlw_adr,
57         output fmlw_stb,
58         input fmlw_ack,
59         output [7:0] fmlw_sel,
60         output [63:0] fmlw_do
61 );
62
63 `define TMU_HAS_ALPHA
64
65 /*
66  * Fixed Point (FP) coordinate format:
67  * 1 sign bit
68  * 11 integer bits
69  * 6 fractional bits
70  * Properties:
71  * - 18-bit coordinate
72  * - Range: -2048 to +2047.984375
73  */
74
75 wire start;
76 reg busy;
77 wire [6:0] vertex_hlast;                /* < 04 last horizontal vertex index */
78 wire [6:0] vertex_vlast;                /* < 08 last vertical vertex index */
79 wire [5:0] brightness;                  /* < 0C output brightness 0-63 */
80 wire chroma_key_en;                     /* < 00 enable/disable chroma key filtering */
81 wire [15:0] chroma_key;                 /* < 10 chroma key (RGB565 color) */
82 wire [28:0] vertex_adr;                 /* < 14 vertex mesh address (64-bit words) */
83 wire [fml_depth-1-1:0] tex_fbuf;        /* < 18 texture address (16-bit words) */
84 wire [10:0] tex_hres;                   /* < 1C texture horizontal resolution (positive int) */
85 wire [10:0] tex_vres;                   /* < 20 texture vertical resolution (positive int) */
86 wire [17:0] tex_hmask;                  /* < 24 binary mask to the X texture coordinates (matches fp width) */
87 wire [17:0] tex_vmask;                  /* < 28 binary mask to the Y texture coordinates (matches fp width) */
88 wire [fml_depth-1-1:0] dst_fbuf;        /* < 2C destination framebuffer address (16-bit words) */
89 wire [10:0] dst_hres;                   /* < 30 destination horizontal resolution (positive int) */
90 wire [10:0] dst_vres;                   /* < 34 destination vertical resolution (positive int) */
91 wire signed [11:0] dst_hoffset;         /* < 38 X offset added to each pixel (signed int) */
92 wire signed [11:0] dst_voffset;         /* < 3C Y offset added to each pixel (signed int) */
93 wire [10:0] dst_squarew;                /* < 40 width of each destination rectangle (positive int)*/
94 wire [10:0] dst_squareh;                /* < 44 height of each destination rectangle (positive int)*/
95 wire alpha_en;
96 wire [5:0] alpha;                       /* < 48 opacity of the output 0-63 */
97
98 tmu2_ctlif #(
99         .csr_addr(csr_addr),
100         .fml_depth(fml_depth)
101 ) ctlif (
102         .sys_clk(sys_clk),
103         .sys_rst(sys_rst),
104         
105         .csr_a(csr_a),
106         .csr_we(csr_we),
107         .csr_di(csr_di),
108         .csr_do(csr_do),
109         
110         .irq(irq),
111         
112         .start(start),
113         .busy(busy),
114
115         .vertex_hlast(vertex_hlast),
116         .vertex_vlast(vertex_vlast),
117         .brightness(brightness),
118         .chroma_key_en(chroma_key_en),
119         .chroma_key(chroma_key),
120         .vertex_adr(vertex_adr),
121         .tex_fbuf(tex_fbuf),
122         .tex_hres(tex_hres),
123         .tex_vres(tex_vres),
124         .tex_hmask(tex_hmask),
125         .tex_vmask(tex_vmask),
126         .dst_fbuf(dst_fbuf),
127         .dst_hres(dst_hres),
128         .dst_vres(dst_vres),
129         .dst_hoffset(dst_hoffset),
130         .dst_voffset(dst_voffset),
131         .dst_squarew(dst_squarew),
132         .dst_squareh(dst_squareh),
133         .alpha_en(alpha_en),
134         .alpha(alpha)
135 );
136
137 /* Stage 1 - Fetch vertices */
138 wire fetchvertex_busy;
139 wire fetchvertex_pipe_stb;
140 wire fetchvertex_pipe_ack;
141 wire signed [17:0] ax;
142 wire signed [17:0] ay;
143 wire signed [17:0] bx;
144 wire signed [17:0] by;
145 wire signed [17:0] cx;
146 wire signed [17:0] cy;
147 wire signed [17:0] dx;
148 wire signed [17:0] dy;
149 wire signed [11:0] drx;
150 wire signed [11:0] dry;
151
152 tmu2_fetchvertex fetchvertex(
153         .sys_clk(sys_clk),
154         .sys_rst(sys_rst),
155
156         .start(start),
157         .busy(fetchvertex_busy),
158
159         .wbm_adr_o(wbm_adr_o),
160         .wbm_cti_o(wbm_cti_o),
161         .wbm_cyc_o(wbm_cyc_o),
162         .wbm_stb_o(wbm_stb_o),
163         .wbm_ack_i(wbm_ack_i),
164         .wbm_dat_i(wbm_dat_i),
165
166         .vertex_hlast(vertex_hlast),
167         .vertex_vlast(vertex_vlast),
168         .vertex_adr(vertex_adr),
169         .dst_hoffset(dst_hoffset),
170         .dst_voffset(dst_voffset),
171         .dst_squarew(dst_squarew),
172         .dst_squareh(dst_squareh),
173
174         .pipe_stb_o(fetchvertex_pipe_stb),
175         .pipe_ack_i(fetchvertex_pipe_ack),
176         .ax(ax),
177         .ay(ay),
178         .bx(bx),
179         .by(by),
180         .cx(cx),
181         .cy(cy),
182         .dx(dx),
183         .dy(dy),
184         .drx(drx),
185         .dry(dry)
186 );
187
188 /* Stage 2 - Vertical interpolation division operands */
189 wire vdivops_busy;
190 wire vdivops_pipe_stb;
191 wire vdivops_pipe_ack;
192 wire signed [17:0] ax_f;
193 wire signed [17:0] ay_f;
194 wire signed [17:0] bx_f;
195 wire signed [17:0] by_f;
196 wire diff_cx_positive;
197 wire [16:0] diff_cx;
198 wire diff_cy_positive;
199 wire [16:0] diff_cy;
200 wire diff_dx_positive;
201 wire [16:0] diff_dx;
202 wire diff_dy_positive;
203 wire [16:0] diff_dy;
204 wire signed [11:0] drx_f;
205 wire signed [11:0] dry_f;
206
207 tmu2_vdivops vdivops(
208         .sys_clk(sys_clk),
209         .sys_rst(sys_rst),
210
211         .busy(vdivops_busy),
212
213         .pipe_stb_i(fetchvertex_pipe_stb),
214         .pipe_ack_o(fetchvertex_pipe_ack),
215         .ax(ax),
216         .ay(ay),
217         .bx(bx),
218         .by(by),
219         .cx(cx),
220         .cy(cy),
221         .dx(dx),
222         .dy(dy),
223         .drx(drx),
224         .dry(dry),
225
226         .pipe_stb_o(vdivops_pipe_stb),
227         .pipe_ack_i(vdivops_pipe_ack),
228         .ax_f(ax_f),
229         .ay_f(ay_f),
230         .bx_f(bx_f),
231         .by_f(by_f),
232         .diff_cx_positive(diff_cx_positive),
233         .diff_cx(diff_cx),
234         .diff_cy_positive(diff_cy_positive),
235         .diff_cy(diff_cy),
236         .diff_dx_positive(diff_dx_positive),
237         .diff_dx(diff_dx),
238         .diff_dy_positive(diff_dy_positive),
239         .diff_dy(diff_dy),
240         .drx_f(drx_f),
241         .dry_f(dry_f)
242 );
243
244 /* Stage 3 - Vertical division */
245 wire vdiv_busy;
246 wire vdiv_pipe_stb;
247 wire vdiv_pipe_ack;
248 wire signed [17:0] ax_f2;
249 wire signed [17:0] ay_f2;
250 wire signed [17:0] bx_f2;
251 wire signed [17:0] by_f2;
252 wire diff_cx_positive_f;
253 wire [16:0] diff_cx_q;
254 wire [16:0] diff_cx_r;
255 wire diff_cy_positive_f;
256 wire [16:0] diff_cy_q;
257 wire [16:0] diff_cy_r;
258 wire diff_dx_positive_f;
259 wire [16:0] diff_dx_q;
260 wire [16:0] diff_dx_r;
261 wire diff_dy_positive_f;
262 wire [16:0] diff_dy_q;
263 wire [16:0] diff_dy_r;
264 wire signed [11:0] drx_f2;
265 wire signed [11:0] dry_f2;
266
267 tmu2_vdiv vdiv(
268         .sys_clk(sys_clk),
269         .sys_rst(sys_rst),
270
271         .busy(vdiv_busy),
272
273         .pipe_stb_i(vdivops_pipe_stb),
274         .pipe_ack_o(vdivops_pipe_ack),
275         .ax(ax_f),
276         .ay(ay_f),
277         .bx(bx_f),
278         .by(by_f),
279         .diff_cx_positive(diff_cx_positive),
280         .diff_cx(diff_cx),
281         .diff_cy_positive(diff_cy_positive),
282         .diff_cy(diff_cy),
283         .diff_dx_positive(diff_dx_positive),
284         .diff_dx(diff_dx),
285         .diff_dy_positive(diff_dy_positive),
286         .diff_dy(diff_dy),
287         .drx(drx_f),
288         .dry(dry_f),
289
290         .dst_squareh(dst_squareh),
291
292         .pipe_stb_o(vdiv_pipe_stb),
293         .pipe_ack_i(vdiv_pipe_ack),
294         .ax_f(ax_f2),
295         .ay_f(ay_f2),
296         .bx_f(bx_f2),
297         .by_f(by_f2),
298         .diff_cx_positive_f(diff_cx_positive_f),
299         .diff_cx_q(diff_cx_q),
300         .diff_cx_r(diff_cx_r),
301         .diff_cy_positive_f(diff_cy_positive_f),
302         .diff_cy_q(diff_cy_q),
303         .diff_cy_r(diff_cy_r),
304         .diff_dx_positive_f(diff_dx_positive_f),
305         .diff_dx_q(diff_dx_q),
306         .diff_dx_r(diff_dx_r),
307         .diff_dy_positive_f(diff_dy_positive_f),
308         .diff_dy_q(diff_dy_q),
309         .diff_dy_r(diff_dy_r),
310         .drx_f(drx_f2),
311         .dry_f(dry_f2)
312 );
313
314 /* Stage 4 - Vertical interpolation */
315 wire vinterp_busy;
316 wire vinterp_pipe_stb;
317 wire vinterp_pipe_ack;
318 wire signed [11:0] vx;
319 wire signed [11:0] vy;
320 wire signed [17:0] tsx;
321 wire signed [17:0] tsy;
322 wire signed [17:0] tex;
323 wire signed [17:0] tey;
324
325 tmu2_vinterp vinterp(
326         .sys_clk(sys_clk),
327         .sys_rst(sys_rst),
328
329         .busy(vinterp_busy),
330
331         .pipe_stb_i(vdiv_pipe_stb),
332         .pipe_ack_o(vdiv_pipe_ack),
333         .ax(ax_f2),
334         .ay(ay_f2),
335         .bx(bx_f2),
336         .by(by_f2),
337         .diff_cx_positive(diff_cx_positive_f),
338         .diff_cx_q(diff_cx_q),
339         .diff_cx_r(diff_cx_r),
340         .diff_cy_positive(diff_cy_positive_f),
341         .diff_cy_q(diff_cy_q),
342         .diff_cy_r(diff_cy_r),
343         .diff_dx_positive(diff_dx_positive_f),
344         .diff_dx_q(diff_dx_q),
345         .diff_dx_r(diff_dx_r),
346         .diff_dy_positive(diff_dy_positive_f),
347         .diff_dy_q(diff_dy_q),
348         .diff_dy_r(diff_dy_r),
349         .drx(drx_f2),
350         .dry(dry_f2),
351
352         .dst_squareh(dst_squareh),
353
354         .pipe_stb_o(vinterp_pipe_stb),
355         .pipe_ack_i(vinterp_pipe_ack),
356         .x(vx),
357         .y(vy),
358         .tsx(tsx),
359         .tsy(tsy),
360         .tex(tex),
361         .tey(tey)
362 );
363
364 /* Stage 5 - Horizontal interpolation division operands */
365 wire hdivops_busy;
366 wire hdivops_pipe_stb;
367 wire hdivops_pipe_ack;
368 wire signed [11:0] vx_f;
369 wire signed [11:0] vy_f;
370 wire signed [17:0] tsx_f;
371 wire signed [17:0] tsy_f;
372 wire diff_x_positive;
373 wire [16:0] diff_x;
374 wire diff_y_positive;
375 wire [16:0] diff_y;
376
377 tmu2_hdivops hdivops(
378         .sys_clk(sys_clk),
379         .sys_rst(sys_rst),
380
381         .busy(hdivops_busy),
382
383         .pipe_stb_i(vinterp_pipe_stb),
384         .pipe_ack_o(vinterp_pipe_ack),
385         .x(vx),
386         .y(vy),
387         .tsx(tsx),
388         .tsy(tsy),
389         .tex(tex),
390         .tey(tey),
391
392         .pipe_stb_o(hdivops_pipe_stb),
393         .pipe_ack_i(hdivops_pipe_ack),
394         .x_f(vx_f),
395         .y_f(vy_f),
396         .tsx_f(tsx_f),
397         .tsy_f(tsy_f),
398         .diff_x_positive(diff_x_positive),
399         .diff_x(diff_x),
400         .diff_y_positive(diff_y_positive),
401         .diff_y(diff_y)
402 );
403
404 /* Stage 6 - Horizontal division */
405 wire hdiv_busy;
406 wire hdiv_pipe_stb;
407 wire hdiv_pipe_ack;
408 wire signed [11:0] vx_f2;
409 wire signed [11:0] vy_f2;
410 wire signed [17:0] tsx_f2;
411 wire signed [17:0] tsy_f2;
412 wire diff_x_positive_f;
413 wire [16:0] diff_x_q;
414 wire [16:0] diff_x_r;
415 wire diff_y_positive_f;
416 wire [16:0] diff_y_q;
417 wire [16:0] diff_y_r;
418
419 tmu2_hdiv hdiv(
420         .sys_clk(sys_clk),
421         .sys_rst(sys_rst),
422
423         .busy(hdiv_busy),
424
425         .pipe_stb_i(hdivops_pipe_stb),
426         .pipe_ack_o(hdivops_pipe_ack),
427         .x(vx_f),
428         .y(vy_f),
429         .tsx(tsx_f),
430         .tsy(tsy_f),
431         .diff_x_positive(diff_x_positive),
432         .diff_x(diff_x),
433         .diff_y_positive(diff_y_positive),
434         .diff_y(diff_y),
435
436         .dst_squarew(dst_squarew),
437
438         .pipe_stb_o(hdiv_pipe_stb),
439         .pipe_ack_i(hdiv_pipe_ack),
440         .x_f(vx_f2),
441         .y_f(vy_f2),
442         .tsx_f(tsx_f2),
443         .tsy_f(tsy_f2),
444         .diff_x_positive_f(diff_x_positive_f),
445         .diff_x_q(diff_x_q),
446         .diff_x_r(diff_x_r),
447         .diff_y_positive_f(diff_y_positive_f),
448         .diff_y_q(diff_y_q),
449         .diff_y_r(diff_y_r)
450 );
451
452 /* Stage 7 - Horizontal interpolation */
453 wire hinterp_busy;
454 wire hinterp_pipe_stb;
455 wire hinterp_pipe_ack;
456 wire signed [11:0] dstx;
457 wire signed [11:0] dsty;
458 wire signed [17:0] tx;
459 wire signed [17:0] ty;
460
461 tmu2_hinterp hinterp(
462         .sys_clk(sys_clk),
463         .sys_rst(sys_rst),
464
465         .busy(hinterp_busy),
466
467         .pipe_stb_i(hdiv_pipe_stb),
468         .pipe_ack_o(hdiv_pipe_ack),
469         .x(vx_f2),
470         .y(vy_f2),
471         .tsx(tsx_f2),
472         .tsy(tsy_f2),
473         .diff_x_positive(diff_x_positive_f),
474         .diff_x_q(diff_x_q),
475         .diff_x_r(diff_x_r),
476         .diff_y_positive(diff_y_positive_f),
477         .diff_y_q(diff_y_q),
478         .diff_y_r(diff_y_r),
479
480         .dst_squarew(dst_squarew),
481
482         .pipe_stb_o(hinterp_pipe_stb),
483         .pipe_ack_i(hinterp_pipe_ack),
484         .dx(dstx),
485         .dy(dsty),
486         .tx(tx),
487         .ty(ty)
488 );
489
490 /* Stage 8 - Mask texture coordinates */
491 wire mask_busy;
492 wire mask_pipe_stb;
493 wire mask_pipe_ack;
494 wire signed [11:0] dstx_f;
495 wire signed [11:0] dsty_f;
496 wire signed [17:0] tx_m;
497 wire signed [17:0] ty_m;
498
499 tmu2_mask mask(
500         .sys_clk(sys_clk),
501         .sys_rst(sys_rst),
502
503         .busy(mask_busy),
504
505         .pipe_stb_i(hinterp_pipe_stb),
506         .pipe_ack_o(hinterp_pipe_ack),
507         .dx(dstx),
508         .dy(dsty),
509         .tx(tx),
510         .ty(ty),
511
512         .tex_hmask(tex_hmask),
513         .tex_vmask(tex_vmask),
514
515         .pipe_stb_o(mask_pipe_stb),
516         .pipe_ack_i(mask_pipe_ack),
517         .dx_f(dstx_f),
518         .dy_f(dsty_f),
519         .tx_m(tx_m),
520         .ty_m(ty_m)
521 );
522
523 /* Stage 9 - Clamp texture coordinates and filter out off-screen points */
524 wire clamp_busy;
525 wire clamp_pipe_stb;
526 wire clamp_pipe_ack;
527 wire [10:0] dstx_c;
528 wire [10:0] dsty_c;
529 wire [16:0] tx_c;
530 wire [16:0] ty_c;
531
532 tmu2_clamp clamp(
533         .sys_clk(sys_clk),
534         .sys_rst(sys_rst),
535
536         .busy(clamp_busy),
537
538         .pipe_stb_i(mask_pipe_stb),
539         .pipe_ack_o(mask_pipe_ack),
540         .dx(dstx_f),
541         .dy(dsty_f),
542         .tx(tx_m),
543         .ty(ty_m),
544
545         .tex_hres(tex_hres),
546         .tex_vres(tex_vres),
547         .dst_hres(dst_hres),
548         .dst_vres(dst_vres),
549
550         .pipe_stb_o(clamp_pipe_stb),
551         .pipe_ack_i(clamp_pipe_ack),
552         .dx_c(dstx_c),
553         .dy_c(dsty_c),
554         .tx_c(tx_c),
555         .ty_c(ty_c)
556 );
557
558 /* Stage 10 - Address generator */
559 wire adrgen_busy;
560 wire adrgen_pipe_stb;
561 wire adrgen_pipe_ack;
562 wire [fml_depth-1-1:0] dadr;
563 wire [fml_depth-1-1:0] tadra;
564 wire [fml_depth-1-1:0] tadrb;
565 wire [fml_depth-1-1:0] tadrc;
566 wire [fml_depth-1-1:0] tadrd;
567 wire [5:0] x_frac;
568 wire [5:0] y_frac;
569
570 tmu2_adrgen #(
571         .fml_depth(fml_depth)
572 ) adrgen (
573         .sys_clk(sys_clk),
574         .sys_rst(sys_rst),
575
576         .busy(adrgen_busy),
577
578         .pipe_stb_i(clamp_pipe_stb),
579         .pipe_ack_o(clamp_pipe_ack),
580         .dx_c(dstx_c),
581         .dy_c(dsty_c),
582         .tx_c(tx_c),
583         .ty_c(ty_c),
584
585         .dst_fbuf(dst_fbuf),
586         .dst_hres(dst_hres),
587         .tex_fbuf(tex_fbuf),
588         .tex_hres(tex_hres),
589
590         .pipe_stb_o(adrgen_pipe_stb),
591         .pipe_ack_i(adrgen_pipe_ack),
592         .dadr(dadr),
593         .tadra(tadra),
594         .tadrb(tadrb),
595         .tadrc(tadrc),
596         .tadrd(tadrd),
597         .x_frac(x_frac),
598         .y_frac(y_frac)
599 );
600
601 /* Stage 11a - Buffer */
602 wire buffer1_busy;
603 wire buffer1_pipe_stb;
604 wire buffer1_pipe_ack;
605 wire [fml_depth-1-1:0] dadr_buf;
606 wire [fml_depth-1-1:0] tadra_buf;
607 wire [fml_depth-1-1:0] tadrb_buf;
608 wire [fml_depth-1-1:0] tadrc_buf;
609 wire [fml_depth-1-1:0] tadrd_buf;
610 wire [5:0] x_frac_buf;
611 wire [5:0] y_frac_buf;
612
613 tmu2_buffer #(
614         .width(5*(fml_depth-1)+6+6)
615 ) buffer1 (
616         .sys_clk(sys_clk),
617         .sys_rst(sys_rst),
618
619         .busy(buffer1_busy),
620
621         .pipe_stb_i(adrgen_pipe_stb),
622         .pipe_ack_o(adrgen_pipe_ack),
623         .dat_i({dadr, tadra, tadrb, tadrc, tadrd, x_frac, y_frac}),
624
625         .pipe_stb_o(buffer1_pipe_stb),
626         .pipe_ack_i(buffer1_pipe_ack),
627         .dat_o({dadr_buf, tadra_buf, tadrb_buf, tadrc_buf, tadrd_buf, x_frac_buf, y_frac_buf})
628 );
629
630 /* Stage 11b - Texel cache */
631 wire texcache_busy;
632 wire texcache_pipe_stb;
633 wire texcache_pipe_ack;
634 wire [fml_depth-1-1:0] dadr_f;
635 wire [15:0] tcolora;
636 wire [15:0] tcolorb;
637 wire [15:0] tcolorc;
638 wire [15:0] tcolord;
639 wire [5:0] x_frac_f;
640 wire [5:0] y_frac_f;
641
642 tmu2_texcache #(
643         .cache_depth(texel_cache_depth),
644         .fml_depth(fml_depth)
645 ) texcache (
646         .sys_clk(sys_clk),
647         .sys_rst(sys_rst),
648
649         .fml_adr(fmlr_adr),
650         .fml_stb(fmlr_stb),
651         .fml_ack(fmlr_ack),
652         .fml_di(fmlr_di),
653
654         .flush(start),
655         .busy(texcache_busy),
656
657         .pipe_stb_i(buffer1_pipe_stb),
658         .pipe_ack_o(buffer1_pipe_ack),
659         .dadr(dadr_buf),
660         .tadra(tadra_buf),
661         .tadrb(tadrb_buf),
662         .tadrc(tadrc_buf),
663         .tadrd(tadrd_buf),
664         .x_frac(x_frac_buf),
665         .y_frac(y_frac_buf),
666
667         .pipe_stb_o(texcache_pipe_stb),
668         .pipe_ack_i(texcache_pipe_ack),
669         .dadr_f(dadr_f),
670         .tcolora(tcolora),
671         .tcolorb(tcolorb),
672         .tcolorc(tcolorc),
673         .tcolord(tcolord),
674         .x_frac_f(x_frac_f),
675         .y_frac_f(y_frac_f)
676 );
677
678 /* Stage 11c - Buffer */
679 wire buffer2_busy;
680 wire buffer2_pipe_stb;
681 wire buffer2_pipe_ack;
682 wire [fml_depth-1-1:0] dadr_f_buf;
683 wire [15:0] tcolora_buf;
684 wire [15:0] tcolorb_buf;
685 wire [15:0] tcolorc_buf;
686 wire [15:0] tcolord_buf;
687 wire [5:0] x_frac_f_buf;
688 wire [5:0] y_frac_f_buf;
689
690 tmu2_buffer #(
691         .width(fml_depth-1+4*16+6+6)
692 ) buffer2 (
693         .sys_clk(sys_clk),
694         .sys_rst(sys_rst),
695
696         .busy(buffer2_busy),
697
698         .pipe_stb_i(texcache_pipe_stb),
699         .pipe_ack_o(texcache_pipe_ack),
700         .dat_i({dadr_f, tcolora, tcolorb, tcolorc, tcolord, x_frac_f, y_frac_f}),
701
702         .pipe_stb_o(buffer2_pipe_stb),
703         .pipe_ack_i(buffer2_pipe_ack),
704         .dat_o({dadr_f_buf, tcolora_buf, tcolorb_buf, tcolorc_buf, tcolord_buf, x_frac_f_buf, y_frac_f_buf})
705 );
706
707 /* Stage 11 - Blend neighbouring pixels for bilinear filtering */
708 wire blend_busy;
709 wire blend_pipe_stb;
710 wire blend_pipe_ack;
711 wire [fml_depth-1-1:0] dadr_f2;
712 wire [15:0] color;
713
714 tmu2_blend #(
715         .fml_depth(fml_depth)
716 ) blend (
717         .sys_clk(sys_clk),
718         .sys_rst(sys_rst),
719
720         .busy(blend_busy),
721         .pipe_stb_i(buffer2_pipe_stb),
722         .pipe_ack_o(buffer2_pipe_ack),
723         .dadr(dadr_f_buf),
724         .colora(tcolora_buf),
725         .colorb(tcolorb_buf),
726         .colorc(tcolorc_buf),
727         .colord(tcolord_buf),
728         .x_frac(x_frac_f_buf),
729         .y_frac(y_frac_f_buf),
730
731         .pipe_stb_o(blend_pipe_stb),
732         .pipe_ack_i(blend_pipe_ack),
733         .dadr_f(dadr_f2),
734         .color(color)
735 );
736
737 /* Stage 11 - Apply decay effect and chroma key filtering. */
738 wire decay_busy;
739 wire decay_pipe_stb;
740 wire decay_pipe_ack;
741 wire [15:0] color_d;
742 wire [fml_depth-1-1:0] dadr_f3;
743
744 tmu2_decay #(
745         .fml_depth(fml_depth)
746 ) decay (
747         .sys_clk(sys_clk),
748         .sys_rst(sys_rst),
749         
750         .busy(decay_busy),
751         
752         .brightness(brightness),
753         .chroma_key_en(chroma_key_en),
754         .chroma_key(chroma_key),
755         
756         .pipe_stb_i(blend_pipe_stb),
757         .pipe_ack_o(blend_pipe_ack),
758         .color(color),
759         .dadr(dadr_f2),
760         
761         .pipe_stb_o(decay_pipe_stb),
762         .pipe_ack_i(decay_pipe_ack),
763         .color_d(color_d),
764         .dadr_f(dadr_f3)
765 );
766
767 `ifdef TMU_HAS_ALPHA
768 /* Stage 12 - Fetch destination pixel for alpha blending */
769 wire fdest_busy;
770 wire fdest_pipe_stb;
771 wire fdest_pipe_ack;
772 wire [15:0] color_d_f;
773 wire [fml_depth-1-1:0] dadr_f4;
774 wire [15:0] dcolor;
775
776 tmu2_fdest #(
777         .fml_depth(fml_depth)
778 ) fdest (
779         .sys_clk(sys_clk),
780         .sys_rst(sys_rst),
781
782         .fml_adr(fmldr_adr),
783         .fml_stb(fmldr_stb),
784         .fml_ack(fmldr_ack),
785         .fml_di(fmldr_di),
786
787         .flush(start),
788         .busy(fdest_busy),
789
790         .fetch_en(alpha_en),
791
792         .pipe_stb_i(decay_pipe_stb),
793         .pipe_ack_o(decay_pipe_ack),
794         .color(color_d),
795         .dadr(dadr_f3),
796
797         .pipe_stb_o(fdest_pipe_stb),
798         .pipe_ack_i(fdest_pipe_ack),
799         .color_f(color_d_f),
800         .dadr_f(dadr_f4),
801         .dcolor(dcolor)
802 );
803
804 /* Stage 13 - Alpha blending */
805 wire alpha_busy;
806 wire alpha_pipe_stb;
807 wire alpha_pipe_ack;
808 wire [fml_depth-1-1:0] dadr_f5;
809 wire [15:0] acolor;
810
811 tmu2_alpha #(
812         .fml_depth(fml_depth)
813 ) u_alpha (
814         .sys_clk(sys_clk),
815         .sys_rst(sys_rst),
816
817         .busy(alpha_busy),
818
819         .alpha(alpha),
820
821         .pipe_stb_i(fdest_pipe_stb),
822         .pipe_ack_o(fdest_pipe_ack),
823         .color(color_d_f),
824         .dadr(dadr_f4),
825         .dcolor(dcolor),
826
827         .pipe_stb_o(alpha_pipe_stb),
828         .pipe_ack_i(alpha_pipe_ack),
829         .dadr_f(dadr_f5),
830         .acolor(acolor)
831 );
832 `else
833 assign fmldr_adr = {fml_depth{1'bx}};
834 assign fmldr_stb = 1'b0;
835 `endif
836
837 /* Stage 14 - Burst assembler */
838 reg burst_flush;
839 wire burst_busy;
840 wire burst_pipe_stb;
841 wire burst_pipe_ack;
842 wire [fml_depth-5-1:0] burst_addr;
843 wire [15:0] burst_sel;
844 wire [255:0] burst_do;
845
846 tmu2_burst #(
847         .fml_depth(fml_depth)
848 ) burst (
849         .sys_clk(sys_clk),
850         .sys_rst(sys_rst),
851
852         .flush(burst_flush),
853         .busy(burst_busy),
854
855 `ifdef TMU_HAS_ALPHA
856         .pipe_stb_i(alpha_pipe_stb),
857         .pipe_ack_o(alpha_pipe_ack),
858         .color(acolor),
859         .dadr(dadr_f5),
860 `else
861         .pipe_stb_i(decay_pipe_stb),
862         .pipe_ack_o(decay_pipe_ack),
863         .color(color_d),
864         .dadr(dadr_f3),
865 `endif
866
867         .pipe_stb_o(burst_pipe_stb),
868         .pipe_ack_i(burst_pipe_ack),
869         .burst_addr(burst_addr),
870         .burst_sel(burst_sel),
871         .burst_do(burst_do)
872 );
873
874 /* Stage 15 - Pixel output */
875 wire pixout_busy;
876
877 tmu2_pixout #(
878         .fml_depth(fml_depth)
879 ) pixout (
880         .sys_clk(sys_clk),
881         .sys_rst(sys_rst),
882
883         .busy(pixout_busy),
884
885         .pipe_stb_i(burst_pipe_stb),
886         .pipe_ack_o(burst_pipe_ack),
887         .burst_addr(burst_addr),
888         .burst_sel(burst_sel),
889         .burst_do(burst_do),
890
891         .fml_adr(fmlw_adr),
892         .fml_stb(fmlw_stb),
893         .fml_ack(fmlw_ack),
894         .fml_sel(fmlw_sel),
895         .fml_do(fmlw_do)
896 );
897
898 /* FSM to flush the burst assembler at the end */
899
900 wire pipeline_busy = fetchvertex_busy
901         |vdivops_busy|vdiv_busy|vinterp_busy
902         |hdivops_busy|hdiv_busy|hinterp_busy
903         |mask_busy|clamp_busy
904         |buffer1_busy|texcache_busy|buffer2_busy
905         |blend_busy|decay_busy
906 `ifdef TMU_HAS_ALPHA
907         |fdest_busy|alpha_busy
908 `endif
909         |burst_busy|pixout_busy;
910
911 parameter IDLE          = 2'd0;
912 parameter WAIT_PROCESS  = 2'd1;
913 parameter FLUSH         = 2'd2;
914 parameter WAIT_FLUSH    = 2'd3;
915
916 reg [1:0] state;
917 reg [1:0] next_state;
918
919 always @(posedge sys_clk) begin
920         if(sys_rst)
921                 state <= IDLE;
922         else
923                 state <= next_state;
924 end
925
926 always @(*) begin
927         next_state = state;
928
929         busy = 1'b1;
930         burst_flush = 1'b0;
931         
932         case(state)
933                 IDLE: begin
934                         busy = 1'b0;
935                         if(start)
936                                 next_state = WAIT_PROCESS;
937                 end
938                 WAIT_PROCESS: begin
939                         if(~pipeline_busy)
940                                 next_state = FLUSH;
941                 end
942                 FLUSH: begin
943                         burst_flush = 1'b1;
944                         next_state = WAIT_FLUSH;
945                 end
946                 WAIT_FLUSH: begin
947                         if(~pipeline_busy)
948                                 next_state = IDLE;
949                 end
950         endcase
951 end
952
953 endmodule