New TMU cache WIP
authorlekernel <sebastien.bourdeauducq@lekernel.net>
Wed, 23 Jun 2010 11:26:10 +0000 (13:26 +0200)
committerlekernel <sebastien.bourdeauducq@lekernel.net>
Wed, 23 Jun 2010 11:26:10 +0000 (13:26 +0200)
19 files changed:
boards/milkymist-one/rtl/setup.v
boards/milkymist-one/sources.mak
boards/milkymist-one/synthesis/common.mak
cores/tmu2/rtl/tmu2.v
cores/tmu2/rtl/tmu2_blend.v
cores/tmu2/rtl/tmu2_buffer.v [new file with mode: 0644]
cores/tmu2/rtl/tmu2_ctlif.v
cores/tmu2/rtl/tmu2_dpram.v
cores/tmu2/rtl/tmu2_dpram_sw.v
cores/tmu2/rtl/tmu2_qpram32_ss.v [deleted file]
cores/tmu2/rtl/tmu2_texcache.v
cores/tmu2/test/Makefile
cores/tmu2/test/tb_tmu2.v
software/demo/ui.c [deleted file]
software/demo/ui.h [deleted file]
software/include/hal/hdlcd.h [deleted file]
software/include/hal/slowout.h [deleted file]
software/libhal/hdlcd.c [deleted file]
software/libhal/slowout.c [deleted file]

index caa6f81..f6bc538 100644 (file)
@@ -23,7 +23,7 @@
  */
 `define ENABLE_AC97
 `define ENABLE_PFPU
-//`define ENABLE_TMU
+`define ENABLE_TMU
 `define ENABLE_ETHERNET
 `define ENABLE_FMLMETER
 
index 1765130..be3abef 100644 (file)
@@ -35,6 +35,7 @@ PFPU_SRC=$(wildcard $(CORES_DIR)/pfpu/rtl/*.v)
 TMU_SRC=                                               \
        $(CORES_DIR)/tmu2/rtl/tmu2_adrgen.v             \
        $(CORES_DIR)/tmu2/rtl/tmu2_clamp.v              \
+       $(CORES_DIR)/tmu2/rtl/tmu2_dpram.v              \
        $(CORES_DIR)/tmu2/rtl/tmu2_dpram_sw.v           \
        $(CORES_DIR)/tmu2/rtl/tmu2_hdiv.v               \
        $(CORES_DIR)/tmu2/rtl/tmu2_burst.v              \
@@ -43,7 +44,7 @@ TMU_SRC=                                              \
        $(CORES_DIR)/tmu2/rtl/tmu2_ctlif.v              \
        $(CORES_DIR)/tmu2/rtl/tmu2_fetchvertex.v        \
        $(CORES_DIR)/tmu2/rtl/tmu2_hinterp.v            \
-       $(CORES_DIR)/tmu2/rtl/tmu2_qpram32_ss.v         \
+       $(CORES_DIR)/tmu2/rtl/tmu2_qpram32.v            \
        $(CORES_DIR)/tmu2/rtl/tmu2_vdivops.v            \
        $(CORES_DIR)/tmu2/rtl/tmu2_decay.v              \
        $(CORES_DIR)/tmu2/rtl/tmu2_geninterp18.v        \
@@ -57,7 +58,8 @@ TMU_SRC=                                              \
        $(CORES_DIR)/tmu2/rtl/tmu2_blend.v              \
        $(CORES_DIR)/tmu2/rtl/tmu2_mult2.v              \
        $(CORES_DIR)/tmu2/rtl/tmu2_fdest.v              \
-       $(CORES_DIR)/tmu2/rtl/tmu2_alpha.v
+       $(CORES_DIR)/tmu2/rtl/tmu2_alpha.v              \
+       $(CORES_DIR)/tmu2/rtl/tmu2_buffer.v
 ETHERNET_SRC=$(wildcard $(CORES_DIR)/minimac/rtl/*.v)
 FMLMETER_SRC=$(wildcard $(CORES_DIR)/fmlmeter/rtl/*.v)
 
index f28d200..044d4b4 100644 (file)
@@ -7,7 +7,7 @@ load: build/system.bit
        cd build && impact -batch ../load.cmd
 
 build/system.ncd: build/system.ngd
-       cd build && map -w system.ngd
+       cd build && map -ol high -w system.ngd
 
 build/system-routed.ncd: build/system.ncd
        cd build && par -ol high -w system.ncd system-routed.ncd
index 142dacf..ea1b0a1 100644 (file)
@@ -95,15 +95,6 @@ wire [10:0] dst_squareh;             /* < 44 height of each destination rectangle (positive
 wire alpha_en;
 wire [5:0] alpha;                      /* < 48 opacity of the output 0-63 */
 
-wire [21:0] c_req_a;                   /* < 50 texel cache requests on channel A */
-wire [21:0] c_hit_a;                   /* < 54 texel cache hits on channel A */
-wire [21:0] c_req_b;                   /* < 58 texel cache requests on channel B */
-wire [21:0] c_hit_b;                   /* < 5C texel cache hits on channel B */
-wire [21:0] c_req_c;                   /* < 60 texel cache requests on channel C */
-wire [21:0] c_hit_c;                   /* < 64 texel cache hits on channel C */
-wire [21:0] c_req_d;                   /* < 68 texel cache requests on channel D */
-wire [21:0] c_hit_d;                   /* < 6C texel cache hits on channel D */
-
 tmu2_ctlif #(
        .csr_addr(csr_addr),
        .fml_depth(fml_depth)
@@ -140,16 +131,7 @@ tmu2_ctlif #(
        .dst_squarew(dst_squarew),
        .dst_squareh(dst_squareh),
        .alpha_en(alpha_en),
-       .alpha(alpha),
-
-       .c_req_a(c_req_a),
-       .c_hit_a(c_hit_a),
-       .c_req_b(c_req_b),
-       .c_hit_b(c_hit_b),
-       .c_req_c(c_req_c),
-       .c_hit_c(c_hit_c),
-       .c_req_d(c_req_d),
-       .c_hit_d(c_hit_d)
+       .alpha(alpha)
 );
 
 /* Stage 1 - Fetch vertices */
@@ -616,7 +598,36 @@ tmu2_adrgen #(
        .y_frac(y_frac)
 );
 
-/* Stage 11 - Texel cache */
+/* Stage 11a - Buffer */
+wire buffer1_busy;
+wire buffer1_pipe_stb;
+wire buffer1_pipe_ack;
+wire [fml_depth-1-1:0] dadr_buf;
+wire [fml_depth-1-1:0] tadra_buf;
+wire [fml_depth-1-1:0] tadrb_buf;
+wire [fml_depth-1-1:0] tadrc_buf;
+wire [fml_depth-1-1:0] tadrd_buf;
+wire [5:0] x_frac_buf;
+wire [5:0] y_frac_buf;
+
+tmu2_buffer #(
+       .width(5*(fml_depth-1)+6+6)
+) buffer1 (
+       .sys_clk(sys_clk),
+       .sys_rst(sys_rst),
+
+       .busy(buffer1_busy),
+
+       .pipe_stb_i(adrgen_pipe_stb),
+       .pipe_ack_o(adrgen_pipe_ack),
+       .dat_i({dadr, tadra, tadrb, tadrc, tadrd, x_frac, y_frac}),
+
+       .pipe_stb_o(buffer1_pipe_stb),
+       .pipe_ack_i(buffer1_pipe_ack),
+       .dat_o({dadr_buf, tadra_buf, tadrb_buf, tadrc_buf, tadrd_buf, x_frac_buf, y_frac_buf})
+);
+
+/* Stage 11b - Texel cache */
 wire texcache_busy;
 wire texcache_pipe_stb;
 wire texcache_pipe_ack;
@@ -643,15 +654,15 @@ tmu2_texcache #(
        .flush(start),
        .busy(texcache_busy),
 
-       .pipe_stb_i(adrgen_pipe_stb),
-       .pipe_ack_o(adrgen_pipe_ack),
-       .dadr(dadr),
-       .tadra(tadra),
-       .tadrb(tadrb),
-       .tadrc(tadrc),
-       .tadrd(tadrd),
-       .x_frac(x_frac),
-       .y_frac(y_frac),
+       .pipe_stb_i(buffer1_pipe_stb),
+       .pipe_ack_o(buffer1_pipe_ack),
+       .dadr(dadr_buf),
+       .tadra(tadra_buf),
+       .tadrb(tadrb_buf),
+       .tadrc(tadrc_buf),
+       .tadrd(tadrd_buf),
+       .x_frac(x_frac_buf),
+       .y_frac(y_frac_buf),
 
        .pipe_stb_o(texcache_pipe_stb),
        .pipe_ack_i(texcache_pipe_ack),
@@ -661,16 +672,36 @@ tmu2_texcache #(
        .tcolorc(tcolorc),
        .tcolord(tcolord),
        .x_frac_f(x_frac_f),
-       .y_frac_f(y_frac_f),
-
-       .c_req_a(c_req_a),
-       .c_hit_a(c_hit_a),
-       .c_req_b(c_req_b),
-       .c_hit_b(c_hit_b),
-       .c_req_c(c_req_c),
-       .c_hit_c(c_hit_c),
-       .c_req_d(c_req_d),
-       .c_hit_d(c_hit_d)
+       .y_frac_f(y_frac_f)
+);
+
+/* Stage 11c - Buffer */
+wire buffer2_busy;
+wire buffer2_pipe_stb;
+wire buffer2_pipe_ack;
+wire [fml_depth-1-1:0] dadr_f_buf;
+wire [15:0] tcolora_buf;
+wire [15:0] tcolorb_buf;
+wire [15:0] tcolorc_buf;
+wire [15:0] tcolord_buf;
+wire [5:0] x_frac_f_buf;
+wire [5:0] y_frac_f_buf;
+
+tmu2_buffer #(
+       .width(fml_depth-1+4*16+6+6)
+) buffer2 (
+       .sys_clk(sys_clk),
+       .sys_rst(sys_rst),
+
+       .busy(buffer2_busy),
+
+       .pipe_stb_i(texcache_pipe_stb),
+       .pipe_ack_o(texcache_pipe_ack),
+       .dat_i({dadr_f, tcolora, tcolorb, tcolorc, tcolord, x_frac_f, y_frac_f}),
+
+       .pipe_stb_o(buffer2_pipe_stb),
+       .pipe_ack_i(buffer2_pipe_ack),
+       .dat_o({dadr_f_buf, tcolora_buf, tcolorb_buf, tcolorc_buf, tcolord_buf, x_frac_f_buf, y_frac_f_buf})
 );
 
 /* Stage 11 - Blend neighbouring pixels for bilinear filtering */
@@ -687,15 +718,15 @@ tmu2_blend #(
        .sys_rst(sys_rst),
 
        .busy(blend_busy),
-       .pipe_stb_i(texcache_pipe_stb),
-       .pipe_ack_o(texcache_pipe_ack),
-       .dadr(dadr_f),
-       .colora(tcolora),
-       .colorb(tcolorb),
-       .colorc(tcolorc),
-       .colord(tcolord),
-       .x_frac(x_frac_f),
-       .y_frac(y_frac_f),
+       .pipe_stb_i(buffer2_pipe_stb),
+       .pipe_ack_o(buffer2_pipe_ack),
+       .dadr(dadr_f_buf),
+       .colora(tcolora_buf),
+       .colorb(tcolorb_buf),
+       .colorc(tcolorc_buf),
+       .colord(tcolord_buf),
+       .x_frac(x_frac_f_buf),
+       .y_frac(y_frac_f_buf),
 
        .pipe_stb_o(blend_pipe_stb),
        .pipe_ack_i(blend_pipe_ack),
@@ -870,7 +901,7 @@ wire pipeline_busy = fetchvertex_busy
        |vdivops_busy|vdiv_busy|vinterp_busy
        |hdivops_busy|hdiv_busy|hinterp_busy
        |mask_busy|clamp_busy
-       |texcache_busy
+       |buffer1_busy|texcache_busy|buffer2_busy
        |blend_busy|decay_busy
 `ifdef TMU_HAS_ALPHA
        |fdest_busy|alpha_busy
index a250cc5..0e70770 100644 (file)
@@ -49,60 +49,71 @@ reg [15:0] colorb_1;
 reg [15:0] colorc_1;
 reg [15:0] colord_1;
 reg [fml_depth-1-1:0] dadr_1;
+reg [5:0] x_frac_1;
+reg [5:0] y_frac_1;
+reg [6:0] ix_frac_1;
+reg [6:0] iy_frac_1;
 
 reg valid_2;
-wire [12:0] pa_2;
-wire [12:0] pb_2;
-wire [12:0] pc_2;
-wire [12:0] pd_2;
 reg [15:0] colora_2;
 reg [15:0] colorb_2;
 reg [15:0] colorc_2;
 reg [15:0] colord_2;
 reg [fml_depth-1-1:0] dadr_2;
-wire [4:0] ra_2 = colora_2[15:11];
-wire [5:0] ga_2 = colora_2[10:5];
-wire [4:0] ba_2 = colora_2[4:0];
-wire [4:0] rb_2 = colorb_2[15:11];
-wire [5:0] gb_2 = colorb_2[10:5];
-wire [4:0] bb_2 = colorb_2[4:0];
-wire [4:0] rc_2 = colorc_2[15:11];
-wire [5:0] gc_2 = colorc_2[10:5];
-wire [4:0] bc_2 = colorc_2[4:0];
-wire [4:0] rd_2 = colord_2[15:11];
-wire [5:0] gd_2 = colord_2[10:5];
-wire [4:0] bd_2 = colord_2[4:0];
 
 reg valid_3;
+wire [12:0] pa_3;
+wire [12:0] pb_3;
+wire [12:0] pc_3;
+wire [12:0] pd_3;
+reg [15:0] colora_3;
+reg [15:0] colorb_3;
+reg [15:0] colorc_3;
+reg [15:0] colord_3;
 reg [fml_depth-1-1:0] dadr_3;
+wire [4:0] ra_3 = colora_3[15:11];
+wire [5:0] ga_3 = colora_3[10:5];
+wire [4:0] ba_3 = colora_3[4:0];
+wire [4:0] rb_3 = colorb_3[15:11];
+wire [5:0] gb_3 = colorb_3[10:5];
+wire [4:0] bb_3 = colorb_3[4:0];
+wire [4:0] rc_3 = colorc_3[15:11];
+wire [5:0] gc_3 = colorc_3[10:5];
+wire [4:0] bc_3 = colorc_3[4:0];
+wire [4:0] rd_3 = colord_3[15:11];
+wire [5:0] gd_3 = colord_3[10:5];
+wire [4:0] bd_3 = colord_3[4:0];
 
 reg valid_4;
-wire [16:0] ra_4;
-wire [17:0] ga_4;
-wire [16:0] ba_4;
-wire [16:0] rb_4;
-wire [17:0] gb_4;
-wire [16:0] bb_4;
-wire [16:0] rc_4;
-wire [17:0] gc_4;
-wire [16:0] bc_4;
-wire [16:0] rd_4;
-wire [17:0] gd_4;
-wire [16:0] bd_4;
 reg [fml_depth-1-1:0] dadr_4;
 
 reg valid_5;
-reg [16:0] r_5;
-reg [17:0] g_5;
-reg [16:0] b_5;
+wire [16:0] ra_5;
+wire [17:0] ga_5;
+wire [16:0] ba_5;
+wire [16:0] rb_5;
+wire [17:0] gb_5;
+wire [16:0] bb_5;
+wire [16:0] rc_5;
+wire [17:0] gc_5;
+wire [16:0] bc_5;
+wire [16:0] rd_5;
+wire [17:0] gd_5;
+wire [16:0] bd_5;
 reg [fml_depth-1-1:0] dadr_5;
 
 reg valid_6;
-reg [4:0] r_6;
-reg [5:0] g_6;
-reg [4:0] b_6;
+reg [16:0] r_6;
+reg [17:0] g_6;
+reg [16:0] b_6;
 reg [fml_depth-1-1:0] dadr_6;
 
+reg valid_7;
+reg [4:0] r_7;
+reg [5:0] g_7;
+reg [4:0] b_7;
+reg [fml_depth-1-1:0] dadr_7;
+
 always @(posedge sys_clk) begin
        if(sys_rst) begin
                valid_1 <= 1'b0;
@@ -111,6 +122,7 @@ always @(posedge sys_clk) begin
                valid_4 <= 1'b0;
                valid_5 <= 1'b0;
                valid_6 <= 1'b0;
+               valid_7 <= 1'b0;
        end else if(pipe_en) begin
                valid_1 <= pipe_stb_i;
                dadr_1 <= dadr;
@@ -118,7 +130,11 @@ always @(posedge sys_clk) begin
                colorb_1 <= colorb;
                colorc_1 <= colorc;
                colord_1 <= colord;
-
+               x_frac_1 <= x_frac;
+               y_frac_1 <= y_frac;
+               ix_frac_1 <= 7'd64 - x_frac;
+               iy_frac_1 <= 7'd64 - y_frac;
+       
                valid_2 <= valid_1;
                dadr_2 <= dadr_1;
                colora_2 <= colora_1;
@@ -128,147 +144,154 @@ always @(posedge sys_clk) begin
 
                valid_3 <= valid_2;
                dadr_3 <= dadr_2;
+               colora_3 <= colora_2;
+               colorb_3 <= colorb_2;
+               colorc_3 <= colorc_2;
+               colord_3 <= colord_2;
 
                valid_4 <= valid_3;
                dadr_4 <= dadr_3;
 
                valid_5 <= valid_4;
-               r_5 <= ra_4 + rb_4 + rc_4 + rd_4;
-               g_5 <= ga_4 + gb_4 + gc_4 + gd_4;
-               b_5 <= ba_4 + bb_4 + bc_4 + bd_4;
                dadr_5 <= dadr_4;
 
                valid_6 <= valid_5;
-               r_6 <= r_5[16:12] + (r_5[11] & (|r_5[10:0] | r_5[12]));
-               g_6 <= g_5[17:12] + (g_5[11] & (|g_5[10:0] | g_5[12]));
-               b_6 <= b_5[16:12] + (b_5[11] & (|b_5[10:0] | b_5[12]));
+               r_6 <= ra_5 + rb_5 + rc_5 + rd_5;
+               g_6 <= ga_5 + gb_5 + gc_5 + gd_5;
+               b_6 <= ba_5 + bb_5 + bc_5 + bd_5;
                dadr_6 <= dadr_5;
+
+               valid_7 <= valid_6;
+               r_7 <= r_6[16:12] + (r_6[11] & (|r_6[10:0] | r_6[12]));
+               g_7 <= g_6[17:12] + (g_6[11] & (|g_6[10:0] | g_6[12]));
+               b_7 <= b_6[16:12] + (b_6[11] & (|b_6[10:0] | b_6[12]));
+               dadr_7 <= dadr_6;
        end
 end
 
 tmu2_mult2 m_pa(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(7'd64 - x_frac),
-       .b(7'd64 - y_frac),
-       .p(pa_2)
+       .a(ix_frac_1),
+       .b(iy_frac_1),
+       .p(pa_3)
 );
 tmu2_mult2 m_pb(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(x_frac),
-       .b(7'd64 - y_frac),
-       .p(pb_2)
+       .a(x_frac_1),
+       .b(iy_frac_1),
+       .p(pb_3)
 );
 tmu2_mult2 m_pc(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(7'd64 - x_frac),
-       .b(y_frac),
-       .p(pc_2)
+       .a(ix_frac_1),
+       .b(y_frac_1),
+       .p(pc_3)
 );
 tmu2_mult2 m_pd(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(x_frac),
-       .b(y_frac),
-       .p(pd_2)
+       .a(x_frac_1),
+       .b(y_frac_1),
+       .p(pd_3)
 );
 
 tmu2_mult2 m_ra(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(pa_2),
-       .b(ra_2),
-       .p(ra_4)
+       .a(pa_3),
+       .b(ra_3),
+       .p(ra_5)
 );
 tmu2_mult2 m_ga(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(pa_2),
-       .b(ga_2),
-       .p(ga_4)
+       .a(pa_3),
+       .b(ga_3),
+       .p(ga_5)
 );
 tmu2_mult2 m_ba(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(pa_2),
-       .b(ba_2),
-       .p(ba_4)
+       .a(pa_3),
+       .b(ba_3),
+       .p(ba_5)
 );
 tmu2_mult2 m_rb(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(pb_2),
-       .b(rb_2),
-       .p(rb_4)
+       .a(pb_3),
+       .b(rb_3),
+       .p(rb_5)
 );
 tmu2_mult2 m_gb(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(pb_2),
-       .b(gb_2),
-       .p(gb_4)
+       .a(pb_3),
+       .b(gb_3),
+       .p(gb_5)
 );
 tmu2_mult2 m_bb(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(pb_2),
-       .b(bb_2),
-       .p(bb_4)
+       .a(pb_3),
+       .b(bb_3),
+       .p(bb_5)
 );
 tmu2_mult2 m_rc(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(pc_2),
-       .b(rc_2),
-       .p(rc_4)
+       .a(pc_3),
+       .b(rc_3),
+       .p(rc_5)
 );
 tmu2_mult2 m_gc(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(pc_2),
-       .b(gc_2),
-       .p(gc_4)
+       .a(pc_3),
+       .b(gc_3),
+       .p(gc_5)
 );
 tmu2_mult2 m_bc(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(pc_2),
-       .b(bc_2),
-       .p(bc_4)
+       .a(pc_3),
+       .b(bc_3),
+       .p(bc_5)
 );
 tmu2_mult2 m_rd(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(pd_2),
-       .b(rd_2),
-       .p(rd_4)
+       .a(pd_3),
+       .b(rd_3),
+       .p(rd_5)
 );
 tmu2_mult2 m_gd(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(pd_2),
-       .b(gd_2),
-       .p(gd_4)
+       .a(pd_3),
+       .b(gd_3),
+       .p(gd_5)
 );
 tmu2_mult2 m_bd(
        .sys_clk(sys_clk),
        .ce(pipe_en),
-       .a(pd_2),
-       .b(bd_2),
-       .p(bd_4)
+       .a(pd_3),
+       .b(bd_3),
+       .p(bd_5)
 );
 
 /* Glue logic */
 
-assign pipe_stb_o = valid_6;
-assign dadr_f = dadr_6;
-assign color = {r_6, g_6, b_6};
+assign pipe_stb_o = valid_7;
+assign dadr_f = dadr_7;
+assign color = {r_7, g_7, b_7};
 
-assign pipe_en = ~valid_6 | pipe_ack_i;
-assign pipe_ack_o = ~valid_6 | pipe_ack_i;
+assign pipe_en = ~valid_7 | pipe_ack_i;
+assign pipe_ack_o = ~valid_7 | pipe_ack_i;
 
-assign busy = valid_1 | valid_2 | valid_3 | valid_4 | valid_5 | valid_6;
+assign busy = valid_1 | valid_2 | valid_3 | valid_4 | valid_5 | valid_6 | valid_7;
 
 endmodule
diff --git a/cores/tmu2/rtl/tmu2_buffer.v b/cores/tmu2/rtl/tmu2_buffer.v
new file mode 100644 (file)
index 0000000..69d7ed9
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Milkymist VJ SoC
+ * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+module tmu2_buffer #(
+       parameter width = 8
+) (
+       input sys_clk,
+       input sys_rst,
+
+       output busy,
+
+       input pipe_stb_i,
+       output pipe_ack_o,
+       input [width-1:0] dat_i,
+
+       output pipe_stb_o,
+       input pipe_ack_i,
+       output [width-1:0] dat_o
+);
+
+reg [width-1:0] storage1;
+reg [width-1:0] storage2;
+
+reg produce;
+reg consume;
+reg [1:0] level;
+
+always @(posedge sys_clk) begin
+       if(sys_rst) begin
+               produce <= 1'b0;
+               consume <= 1'b0;
+       end else begin
+               if(pipe_stb_i & pipe_ack_o)
+                       produce <= ~produce;
+               if(pipe_stb_o & pipe_ack_i)
+                       consume <= ~consume;
+       end
+end
+
+always @(posedge sys_clk) begin
+       if(sys_rst)
+               level = 2'd0;
+       else begin
+               if(pipe_stb_i & pipe_ack_o)
+                       level = level + 2'd1;
+               if(pipe_stb_o & pipe_ack_i)
+                       level = level - 2'd1;
+       end
+end
+
+always @(posedge sys_clk) begin
+       if(pipe_stb_i & pipe_ack_o) begin
+               if(produce)
+                       storage2 <= dat_i;
+               else
+                       storage1 <= dat_i;
+       end
+end
+
+assign dat_o = consume ? storage2 : storage1;
+
+assign busy = |level;
+assign pipe_ack_o = ~level[1];
+assign pipe_stb_o = |level;
+
+endmodule
index 7503e8d..4fb984a 100644 (file)
@@ -51,16 +51,7 @@ module tmu2_ctlif #(
        output reg [10:0] dst_squarew,
        output reg [10:0] dst_squareh,
        output reg alpha_en,
-       output reg [5:0] alpha,
-
-       input [21:0] c_req_a,
-       input [21:0] c_hit_a,
-       input [21:0] c_req_b,
-       input [21:0] c_hit_b,
-       input [21:0] c_req_c,
-       input [21:0] c_hit_c,
-       input [21:0] c_req_d,
-       input [21:0] c_hit_d
+       output reg [5:0] alpha
 );
 
 reg old_busy;
@@ -173,14 +164,6 @@ always @(posedge sys_clk) begin
 
                                5'b10010: csr_do <= alpha;
 
-                               5'b10100: csr_do <= c_req_a;
-                               5'b10101: csr_do <= c_hit_a;
-                               5'b10110: csr_do <= c_req_b;
-                               5'b10111: csr_do <= c_hit_b;
-                               5'b11000: csr_do <= c_req_c;
-                               5'b11001: csr_do <= c_hit_c;
-                               5'b11010: csr_do <= c_req_d;
-                               5'b11011: csr_do <= c_hit_d;
                                default: csr_do <= 32'bx;
                        endcase
                end
index 1f647e3..5eb5b28 100644 (file)
@@ -36,16 +36,22 @@ module tmu2_dpram #(
 
 reg [width-1:0] ram[0:(1 << depth)-1];
 
+reg [width-1:0] do_tmp;
+
 always @(posedge sys_clk) begin
        if(we)
                ram[a] <= di;
-       do <= ram[a];
+       do_tmp <= ram[a];
+       do <= do_tmp;
 end
 
+reg [width-1:0] do2_tmp;
+
 always @(posedge sys_clk) begin
        if(we2)
                ram[a2] <= di2;
-       do2 <= ram[a2];
+       do2_tmp <= ram[a2];
+       do2 <= do2_tmp;
 end
 
 // synthesis translate_off
index b05a180..0fc82c2 100644 (file)
@@ -34,11 +34,16 @@ module tmu2_dpram_sw #(
 
 reg [width-1:0] ram[0:(1 << depth)-1];
 
+reg [width-1:0] do_tmp;
+reg [width-1:0] do2_tmp;
+
 always @(posedge sys_clk) begin
        if(we)
                ram[a] <= di;
-       do <= ram[a];
-       do2 <= ram[a2];
+       do_tmp <= ram[a];
+       do2_tmp <= ram[a2];
+       do <= do_tmp;
+       do2 <= do2_tmp;
 end
 
 
diff --git a/cores/tmu2/rtl/tmu2_qpram32_ss.v b/cores/tmu2/rtl/tmu2_qpram32_ss.v
deleted file mode 100644 (file)
index 77f5897..0000000
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Milkymist VJ SoC
- * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-/* qpram32 for Shitty Synthesizers (e.g. Synplify) that cannot use the other BRAM write port */
-
-module tmu2_qpram32 #(
-       parameter depth = 11 /* < log2 of the capacity in 32-bit words */
-) (
-       input sys_clk,
-
-       /* 32-bit read port 1 */
-       input [depth-1:0] a1,
-       output [31:0] d1,
-
-       /* 32-bit read port 2 */
-       input [depth-1:0] a2,
-       output [31:0] d2,
-
-       /* 32-bit read port 3 */
-       input [depth-1:0] a3,
-       output [31:0] d3,
-
-       /* 32-bit read port 4 */
-       input [depth-1:0] a4,
-       output [31:0] d4,
-
-       /* 64-bit write port - we=1 disables read ports */
-       input we,
-       input [depth-1-1:0] aw,
-       input [63:0] dw
-);
-
-wire [63:0] mem_d1;
-wire [63:0] mem_d2;
-wire [63:0] mem_d3;
-wire [63:0] mem_d4;
-reg r1, r2, r3, r4;
-always @(posedge sys_clk) begin
-       r1 <= a1[0];
-       r2 <= a2[0];
-       r3 <= a3[0];
-       r4 <= a4[0];
-end
-tmu2_qpram #(
-       .depth(depth-1),
-       .width(64)
-) workaround (
-       .sys_clk(sys_clk),
-
-       .a1(a1[depth-1:1]),
-       .d1(mem_d1),
-       .a2(a2[depth-1:1]),
-       .d2(mem_d2),
-       .a3(a3[depth-1:1]),
-       .d3(mem_d3),
-       .a4(a4[depth-1:1]),
-       .d4(mem_d4),
-
-       .we(we),
-       .aw(aw),
-       .dw(dw)
-);
-assign d1 = r1 ? mem_d1[31:0] : mem_d1[63:32];
-assign d2 = r2 ? mem_d2[31:0] : mem_d2[63:32];
-assign d3 = r3 ? mem_d3[31:0] : mem_d3[63:32];
-assign d4 = r4 ? mem_d4[31:0] : mem_d4[63:32];
-
-endmodule
index 7c06eec..68d6f1b 100644 (file)
@@ -28,10 +28,10 @@ module tmu2_texcache #(
        input [63:0] fml_di,
 
        input flush,
-       output busy,
+       output reg busy,
 
        input pipe_stb_i,
-       output pipe_ack_o,
+       output reg pipe_ack_o,
        input [fml_depth-1-1:0] dadr, /* in 16-bit words */
        input [fml_depth-1-1:0] tadra,
        input [fml_depth-1-1:0] tadrb,
@@ -40,24 +40,15 @@ module tmu2_texcache #(
        input [5:0] x_frac,
        input [5:0] y_frac,
 
-       output pipe_stb_o,
+       output reg pipe_stb_o,
        input pipe_ack_i,
-       output reg [fml_depth-1-1:0] dadr_f, /* in 16-bit words */
+       output [fml_depth-1-1:0] dadr_f, /* in 16-bit words */
        output [15:0] tcolora,
        output [15:0] tcolorb,
        output [15:0] tcolorc,
        output [15:0] tcolord,
-       output reg [5:0] x_frac_f,
-       output reg [5:0] y_frac_f,
-
-       output reg [21:0] c_req_a,
-       output reg [21:0] c_hit_a,
-       output reg [21:0] c_req_b,
-       output reg [21:0] c_hit_b,
-       output reg [21:0] c_req_c,
-       output reg [21:0] c_hit_c,
-       output reg [21:0] c_req_d,
-       output reg [21:0] c_hit_d
+       output [5:0] x_frac_f,
+       output [5:0] y_frac_f
 );
 
 /*
@@ -74,28 +65,11 @@ module tmu2_texcache #(
  *
  */
 
-/* MEMORIES & HIT HANDLING */
-
-wire [fml_depth-1:0] tadra8 = {tadra, 1'b0};
-wire [fml_depth-1:0] tadrb8 = {tadrb, 1'b0};
-wire [fml_depth-1:0] tadrc8 = {tadrc, 1'b0};
-wire [fml_depth-1:0] tadrd8 = {tadrd, 1'b0};
-
-reg [fml_depth-1:0] tadra8_r;
-reg [fml_depth-1:0] tadrb8_r;
-reg [fml_depth-1:0] tadrc8_r;
-reg [fml_depth-1:0] tadrd8_r;
-
-always @(posedge sys_clk) begin
-       if(pipe_ack_o) begin
-               tadra8_r <= tadra8;
-               tadrb8_r <= tadrb8;
-               tadrc8_r <= tadrc8;
-               tadrd8_r <= tadrd8;
-       end
-end
-
-wire retry; /* < retry the old address after a miss */
+/* MEMORIES */
+reg [fml_depth-1:0] indexa;
+reg [fml_depth-1:0] indexb;
+reg [fml_depth-1:0] indexc;
+reg [fml_depth-1:0] indexd;
 
 wire [31:0] datamem_d1;
 wire [31:0] datamem_d2;
@@ -110,23 +84,19 @@ tmu2_qpram32 #(
 ) datamem (
        .sys_clk(sys_clk),
 
-       .a1(retry ? tadra8_r[cache_depth-1:2] : tadra8[cache_depth-1:2]),
+       .a1(indexa[cache_depth-1:2]),
        .d1(datamem_d1),
-       .a2(retry ? tadrb8_r[cache_depth-1:2] : tadrb8[cache_depth-1:2]),
+       .a2(indexb[cache_depth-1:2]),
        .d2(datamem_d2),
-       .a3(retry ? tadrc8_r[cache_depth-1:2] : tadrc8[cache_depth-1:2]),
+       .a3(indexc[cache_depth-1:2]),
        .d3(datamem_d3),
-       .a4(retry ? tadrd8_r[cache_depth-1:2] : tadrd8[cache_depth-1:2]),
+       .a4(indexd[cache_depth-1:2]),
        .d4(datamem_d4),
 
        .we(datamem_we),
        .aw(datamem_aw),
        .dw(fml_di)
 );
-assign tcolora = tadra8_r[1] ? datamem_d1[15:0] : datamem_d1[31:16];
-assign tcolorb = tadrb8_r[1] ? datamem_d2[15:0] : datamem_d2[31:16];
-assign tcolorc = tadrc8_r[1] ? datamem_d3[15:0] : datamem_d3[31:16];
-assign tcolord = tadrd8_r[1] ? datamem_d4[15:0] : datamem_d4[31:16];
 
 wire [1+fml_depth-cache_depth-1:0] tagmem_d1; /* < valid bit + tag */
 wire [1+fml_depth-cache_depth-1:0] tagmem_d2;
@@ -143,13 +113,13 @@ tmu2_qpram #(
 ) tagmem (
        .sys_clk(sys_clk),
 
-       .a1(retry ? tadra8_r[cache_depth-1:5] : tadra8[cache_depth-1:5]),
+       .a1(indexa[cache_depth-1:5]),
        .d1(tagmem_d1),
-       .a2(retry ? tadrb8_r[cache_depth-1:5] : tadrb8[cache_depth-1:5]),
+       .a2(indexb[cache_depth-1:5]),
        .d2(tagmem_d2),
-       .a3(retry ? tadrc8_r[cache_depth-1:5] : tadrc8[cache_depth-1:5]),
+       .a3(indexc[cache_depth-1:5]),
        .d3(tagmem_d3),
-       .a4(retry ? tadrd8_r[cache_depth-1:5] : tadrd8[cache_depth-1:5]),
+       .a4(indexd[cache_depth-1:5]),
        .d4(tagmem_d4),
 
        .we(tagmem_we),
@@ -157,36 +127,113 @@ tmu2_qpram #(
        .dw(tagmem_dw)
 );
 
-/* HIT HANDLING */
+/* REQUEST TRACKER */
+reg invalidate_req;
+wire rqvalid_0 = pipe_stb_i & ~invalidate_req;
+wire [fml_depth-1-1:0] dadr_0 = dadr;
+wire [5:0] x_frac_0 = x_frac;
+wire [5:0] y_frac_0 = y_frac;
+wire [fml_depth-1:0] tadra8_0 = {tadra, 1'b0};
+wire [fml_depth-1:0] tadrb8_0 = {tadrb, 1'b0};
+wire [fml_depth-1:0] tadrc8_0 = {tadrc, 1'b0};
+wire [fml_depth-1:0] tadrd8_0 = {tadrd, 1'b0};
+
+reg rqvalid_1;
+reg [fml_depth-1-1:0] dadr_1;
+reg [5:0] x_frac_1;
+reg [5:0] y_frac_1;
+reg [fml_depth-1:0] tadra8_1;
+reg [fml_depth-1:0] tadrb8_1;
+reg [fml_depth-1:0] tadrc8_1;
+reg [fml_depth-1:0] tadrd8_1;
+
+reg rqvalid_2;
+reg [fml_depth-1-1:0] dadr_2;
+reg [5:0] x_frac_2;
+reg [5:0] y_frac_2;
+reg ignore_b_2;
+reg ignore_c_2;
+reg ignore_d_2;
+reg [fml_depth-1:0] tadra8_2;
+reg [fml_depth-1:0] tadrb8_2;
+reg [fml_depth-1:0] tadrc8_2;
+reg [fml_depth-1:0] tadrd8_2;
+
+reg rqt_ce;
 
-reg flush_mode;
-
-reg access_requested;
 always @(posedge sys_clk) begin
-       if(sys_rst)
-               access_requested <= 1'b0;
-       else if(pipe_ack_o)
-               access_requested <= pipe_stb_i;
+       if(sys_rst) begin
+               rqvalid_1 <= 1'b0;
+               rqvalid_2 <= 1'b0;
+       end else begin
+               if(rqt_ce) begin
+                       rqvalid_1 <= rqvalid_0;
+                       dadr_1 <= dadr_0;
+                       x_frac_1 <= x_frac_0;
+                       y_frac_1 <= y_frac_0;
+                       tadra8_1 <= tadra8_0;
+                       tadrb8_1 <= tadrb8_0;
+                       tadrc8_1 <= tadrc8_0;
+                       tadrd8_1 <= tadrd8_0;
+
+                       rqvalid_2 <= rqvalid_1;
+                       dadr_2 <= dadr_1;
+                       x_frac_2 <= x_frac_1;
+                       y_frac_2 <= y_frac_1;
+                       ignore_b_2 <= x_frac_1 == 6'd0;
+                       ignore_c_2 <= y_frac_1 == 6'd0;
+                       ignore_d_2 <= (x_frac_1 == 6'd0) | (y_frac_1 == 6'd0);
+                       tadra8_2 <= tadra8_1;
+                       tadrb8_2 <= tadrb8_1;
+                       tadrc8_2 <= tadrc8_1;
+                       tadrd8_2 <= tadrd8_1;
+               end
+       end
 end
 
-/* The cycle after the tag memory has been written, data is invalid */
-reg tagmem_we_r;
-always @(posedge sys_clk) tagmem_we_r <= tagmem_we;
+/* OUTPUT DATA GENERATOR */
+assign dadr_f = dadr_2;
+assign x_frac_f = x_frac_2;
+assign y_frac_f = y_frac_2;
 
-/* If some coordinates are integer, B, C or D can be ignored
- * and safely assumed to be cache hits.
- */
-reg ignore_b;
-reg ignore_c;
-reg ignore_d;
-always @(posedge sys_clk) begin
-       if(pipe_ack_o) begin
-               ignore_b <= x_frac == 6'd0;
-               ignore_c <= y_frac == 6'd0;
-               ignore_d <= (x_frac == 6'd0) | (y_frac == 6'd0);
-       end
+assign tcolora = tadra8_2[1] ? datamem_d1[15:0] : datamem_d1[31:16];
+assign tcolorb = tadrb8_2[1] ? datamem_d2[15:0] : datamem_d2[31:16];
+assign tcolorc = tadrc8_2[1] ? datamem_d3[15:0] : datamem_d3[31:16];
+assign tcolord = tadrd8_2[1] ? datamem_d4[15:0] : datamem_d4[31:16];
+
+/* INDEX GENERATOR */
+reg [1:0] index_sel;
+
+always @(*) begin
+       case(index_sel)
+               2'd0: begin
+                       indexa = tadra8_0;
+                       indexb = tadrb8_0;
+                       indexc = tadrc8_0;
+                       indexd = tadrd8_0;
+               end
+               2'd1: begin
+                       indexa = tadra8_1;
+                       indexb = tadrb8_1;
+                       indexc = tadrc8_1;
+                       indexd = tadrd8_1;
+               end
+               2'd2: begin
+                       indexa = tadra8_2;
+                       indexb = tadrb8_2;
+                       indexc = tadrc8_2;
+                       indexd = tadrd8_2;
+               end
+               default: begin
+                       indexa = {fml_depth{1'bx}};
+                       indexb = {fml_depth{1'bx}};
+                       indexc = {fml_depth{1'bx}};
+                       indexd = {fml_depth{1'bx}};
+               end
+       endcase
 end
 
+/* HIT DETECTION */
 wire valid_a = tagmem_d1[1+fml_depth-cache_depth-1];
 wire [fml_depth-1-cache_depth:0] tag_a = tagmem_d1[fml_depth-cache_depth-1:0];
 wire valid_b = tagmem_d2[1+fml_depth-cache_depth-1];
@@ -196,94 +243,44 @@ wire [fml_depth-1-cache_depth:0] tag_c = tagmem_d3[fml_depth-cache_depth-1:0];
 wire valid_d = tagmem_d4[1+fml_depth-cache_depth-1];
 wire [fml_depth-1-cache_depth:0] tag_d = tagmem_d4[fml_depth-cache_depth-1:0];
 
-wire hit_a = ~tagmem_we_r & valid_a & (tag_a == tadra8_r[fml_depth-1:cache_depth]);
-wire hit_b = ignore_b | (~tagmem_we_r & valid_b & (tag_b == tadrb8_r[fml_depth-1:cache_depth]));
-wire hit_c = ignore_c | (~tagmem_we_r & valid_c & (tag_c == tadrc8_r[fml_depth-1:cache_depth]));
-wire hit_d = ignore_d | (~tagmem_we_r & valid_d & (tag_d == tadrd8_r[fml_depth-1:cache_depth]));
-
-assign pipe_stb_o = access_requested & hit_a & hit_b & hit_c & hit_d;
-assign pipe_ack_o = ~flush_mode & ((pipe_ack_i & pipe_stb_o) | ~access_requested);
-
-assign retry = ~pipe_ack_o;
-
-/* STATISTICS COLLECTION */
-reg pipe_ack_o_r;
-
-always @(posedge sys_clk) begin
-       if(sys_rst)
-               pipe_ack_o_r <= 1'b0;
-       else
-               pipe_ack_o_r <= pipe_ack_o;
-end
-
-always @(posedge sys_clk) begin
-       if(sys_rst|flush) begin
-               c_req_a <= 22'd0;
-               c_hit_a <= 22'd0;
-               c_req_b <= 22'd0;
-               c_hit_b <= 22'd0;
-               c_req_c <= 22'd0;
-               c_hit_c <= 22'd0;
-               c_req_d <= 22'd0;
-               c_hit_d <= 22'd0;
-       end else begin
-               if(pipe_ack_o_r & access_requested) begin
-                       c_req_a <= c_req_a + 22'd1;
-                       if(hit_a)
-                               c_hit_a <= c_hit_a + 22'd1;
-                       if(~ignore_b) begin
-                               c_req_b <= c_req_b + 22'd1;
-                               if(hit_b)
-                                       c_hit_b <= c_hit_b + 22'd1;
-                       end
-                       if(~ignore_c) begin
-                               c_req_c <= c_req_c + 22'd1;
-                               if(hit_c)
-                                       c_hit_c <= c_hit_c + 22'd1;
-                       end
-                       if(~ignore_d) begin
-                               c_req_d <= c_req_d + 22'd1;
-                               if(hit_d)
-                                       c_hit_d <= c_hit_d + 22'd1;
-                       end
-               end
-       end
-end
+wire hit_a = valid_a & (tag_a == tadra8_2[fml_depth-1:cache_depth]);
+wire hit_b = ignore_b_2 | (valid_b & (tag_b == tadrb8_2[fml_depth-1:cache_depth]));
+wire hit_c = ignore_c_2 | (valid_c & (tag_c == tadrc8_2[fml_depth-1:cache_depth]));
+wire hit_d = ignore_d_2 | (valid_d & (tag_d == tadrd8_2[fml_depth-1:cache_depth]));
 
 `ifdef VERIFY_TEXCACHE
-
 integer x, y;
 reg [15:0] expected;
 always @(posedge sys_clk) begin
        if(pipe_stb_o & pipe_ack_i) begin
-               x = (tadra8_r/2) % 512;
-               y = (tadra8_r/2) / 512;
+               x = (tadra8_2/2) % 512;
+               y = (tadra8_2/2) / 512;
                $image_get(x, y, expected);
                if(tcolora != expected) begin
                        $display("CACHE TEST FAILED [A]! (%d, %d): expected %x, got %x", x, y, expected, tcolora);
                        $finish;
                end
-               if(~ignore_b) begin
-                       x = (tadrb8_r/2) % 512;
-                       y = (tadrb8_r/2) / 512;
+               if(~ignore_b_2) begin
+                       x = (tadrb8_2/2) % 512;
+                       y = (tadrb8_2/2) / 512;
                        $image_get(x, y, expected);
                        if(tcolorb != expected) begin
                                $display("CACHE TEST FAILED [B]! (%d, %d): expected %x, got %x", x, y, expected, tcolorb);
                                $finish;
                        end
                end
-               if(~ignore_c) begin
-                       x = (tadrc8_r/2) % 512;
-                       y = (tadrc8_r/2) / 512;
+               if(~ignore_c_2) begin
+                       x = (tadrc8_2/2) % 512;
+                       y = (tadrc8_2/2) / 512;
                        $image_get(x, y, expected);
                        if(tcolorc != expected) begin
                                $display("CACHE TEST FAILED [C]! (%d, %d): expected %x, got %x", x, y, expected, tcolorc);
                                $finish;
                        end
                end
-               if(~ignore_d) begin
-                       x = (tadrd8_r/2) % 512;
-                       y = (tadrd8_r/2) / 512;
+               if(~ignore_d_2) begin
+                       x = (tadrd8_2/2) % 512;
+                       y = (tadrd8_2/2) / 512;
                        $image_get(x, y, expected);
                        if(tcolord != expected) begin
                                $display("CACHE TEST FAILED [D]! (%d, %d): expected %x, got %x", x, y, expected, tcolord);
@@ -292,42 +289,23 @@ always @(posedge sys_clk) begin
                end
        end
 end
-
 `endif
 
-/* FORWARDING */
-
-always @(posedge sys_clk) begin
-       if(pipe_ack_o & pipe_stb_i) begin
-               dadr_f <= dadr;
-               x_frac_f <= x_frac;
-               y_frac_f <= y_frac;
-       end
-end
-
-/* MISS HANDLING */
-
-reg fetch_needed;
+/* FLUSH & MISS HANDLING */
 reg [fml_depth-1:0] fetch_adr;
 
 always @(posedge sys_clk) begin
-       if(sys_rst)
-               fetch_needed <= 1'b0;
-       else begin
-               if(access_requested) begin
-                       fetch_needed <= ~(hit_a & hit_b & hit_c & hit_d);
-                       if(~hit_a)
-                               fetch_adr <= tadra8_r;
-                       else if(~hit_b)
-                               fetch_adr <= tadrb8_r;
-                       else if(~hit_c)
-                               fetch_adr <= tadrc8_r;
-                       else if(~hit_d)
-                               fetch_adr <= tadrd8_r;
-               end
-       end
+       if(~hit_a)
+               fetch_adr <= tadra8_2;
+       else if(~hit_b)
+               fetch_adr <= tadrb8_2;
+       else if(~hit_c)
+               fetch_adr <= tadrc8_2;
+       else if(~hit_d)
+               fetch_adr <= tadrd8_2;
 end
 
+reg flush_mode;
 wire flush_done;
 reg [cache_depth-1-5:0] flush_counter;
 always @(posedge sys_clk) begin
@@ -342,31 +320,24 @@ reg write_valid;
 assign tagmem_aw = flush_mode ? flush_counter : fetch_adr[cache_depth-1:5];
 assign tagmem_dw = {write_valid, fetch_adr[fml_depth-1:cache_depth]};
 
-reg burst_count;
 reg [1:0] burst_counter;
-always @(posedge sys_clk) begin
-       if(burst_count)
-               burst_counter <= burst_counter + 2'd1;
-       else
-               burst_counter <= 2'd0;
-end
 assign datamem_aw = {fetch_adr[cache_depth-1:5], burst_counter};
 
 assign fml_adr = {fetch_adr[fml_depth-1:5], 5'd0};
 
-/* FSM controller */
-
-reg [2:0] state;
-reg [2:0] next_state;
+/* FSM-BASED CONTROLLER */
+reg [3:0] state;
+reg [3:0] next_state;
 
-parameter IDLE         = 3'd0;
-parameter DATA1                = 3'd1;
-parameter DATA2                = 3'd2;
-parameter DATA3                = 3'd3;
-parameter DATA4                = 3'd4;
-parameter WAIT         = 3'd5;
-parameter WAIT2                = 3'd6;
-parameter FLUSH                = 3'd7;
+parameter IDLE         = 4'd0;
+parameter DATA1                = 4'd1;
+parameter DATA2                = 4'd2;
+parameter DATA3                = 4'd3;
+parameter DATA4                = 4'd4;
+parameter HANDLED_MISS = 4'd5;
+parameter CHECK_REPLAY0        = 4'd6;
+parameter CHECK_REPLAY = 4'd7;
+parameter FLUSH                = 4'd8;
 
 always @(posedge sys_clk) begin
        if(sys_rst)
@@ -375,7 +346,15 @@ always @(posedge sys_clk) begin
                state <= next_state;
 end
 
-reg fsm_busy;
+reg replaying;
+reg next_replaying;
+
+always @(posedge sys_clk) begin
+       if(sys_rst)
+               replaying <= 1'b0;
+       else
+               replaying <= next_replaying;
+end
 
 always @(*) begin
        next_state = state;
@@ -384,47 +363,94 @@ always @(*) begin
        write_valid = 1'b1;
 
        datamem_we = 1'b0;
-       burst_count = 1'b0;
+       burst_counter = 2'bx;
 
        flush_mode = 1'b0;
 
        fml_stb = 1'b0;
 
-       fsm_busy = 1'b1;
+       busy = 1'b1;
+       pipe_stb_o = 1'b0;
+       pipe_ack_o = 1'b0;
+
+       invalidate_req = 1'b0;
+       rqt_ce = 1'b0;
+
+       index_sel = 2'd0;
+
+       next_replaying = replaying;
 
        case(state)
                IDLE: begin
-                       fsm_busy = 1'b0;
-                       if(fetch_needed)
+                       busy = rqvalid_1|rqvalid_2;
+                       pipe_stb_o = rqvalid_2 & hit_a & hit_b & hit_c & hit_d;
+                       pipe_ack_o = ~rqvalid_2 | (hit_a & hit_b & hit_c & hit_d);
+                       rqt_ce = ~rqvalid_2 | (hit_a & hit_b & hit_c & hit_d);
+                       if(rqvalid_2 & (~hit_a | ~hit_b | ~hit_c | ~hit_d))
                                next_state = DATA1;
                        if(flush)
                                next_state = FLUSH;
                end
                DATA1: begin
+                       index_sel = 2'd2;
                        fml_stb = 1'b1;
+                       burst_counter = 2'd0;
                        datamem_we = 1'b1;
-                       if(fml_ack) begin
-                               burst_count = 1'b1;
+                       tagmem_we = 1'b1;
+                       if(fml_ack)
                                next_state = DATA2;
-                       end
                end
                DATA2: begin
+                       index_sel = 2'd2;
+                       burst_counter = 2'd1;
                        datamem_we = 1'b1;
-                       burst_count = 1'b1;
                        next_state = DATA3;
                end
                DATA3: begin
+                       index_sel = 2'd2;
+                       burst_counter = 2'd2;
                        datamem_we = 1'b1;
-                       burst_count = 1'b1;
                        next_state = DATA4;
                end
                DATA4: begin
+                       index_sel = 2'd2;
+                       burst_counter = 2'd3;
                        datamem_we = 1'b1;
-                       tagmem_we = 1'b1; /* write tag last as it may unlock the pipeline */
-                       next_state = WAIT;
+                       if(~hit_a | ~hit_b | ~hit_c | ~hit_d)
+                               next_state = DATA1;
+                       else
+                               next_state = HANDLED_MISS;
+               end
+               HANDLED_MISS: begin
+                       index_sel = 2'd1;
+                       pipe_stb_o = 1'b1;
+                       if(pipe_ack_i) begin
+                               rqt_ce = 1'b1;
+                               if(replaying) begin
+                                       index_sel = 2'd0;
+                                       next_replaying = 1'b0;
+                                       next_state = IDLE;
+                               end else begin
+                                       invalidate_req = 1'b1;
+                                       next_state = CHECK_REPLAY0;
+                               end
+                       end
+               end
+               CHECK_REPLAY0: begin
+                       index_sel = 2'd2;
+                       next_state = CHECK_REPLAY;
+               end
+               CHECK_REPLAY: begin
+                       index_sel = 2'd2;
+                       if(rqvalid_2 & (~hit_a | ~hit_b | ~hit_c | ~hit_d)) begin
+                               next_replaying = 1'b1;
+                               next_state = DATA1;
+                       end else begin
+                               index_sel = 2'd0;
+                               rqt_ce = 1'b1;
+                               next_state = IDLE;
+                       end
                end
-               WAIT: next_state = WAIT2; /* wait for fetch_needed to reflect the updated tag */
-               WAIT2: next_state = IDLE;
                FLUSH: begin
                        tagmem_we = 1'b1;
                        write_valid = 1'b0;
@@ -435,6 +461,4 @@ always @(*) begin
        endcase
 end
 
-assign busy = fsm_busy | access_requested;
-
 endmodule
index 9720bef..a8f60ea 100644 (file)
@@ -29,6 +29,7 @@ SOURCES= \
 ../rtl/tmu2_vinterp.v \
 ../rtl/tmu2_fdest.v \
 ../rtl/tmu2_alpha.v \
+../rtl/tmu2_buffer.v \
 $(ARBSRC)
 
 SOURCES_VIRTEX4= \
index 4cc66b3..489ba2a 100644 (file)
@@ -207,7 +207,7 @@ always @(posedge sys_clk) begin
                        
                        handle_read(0, read_addr);
 
-                       //$display("Starting   FML burst READ at address %x, data=%x", read_addr, fmlr_di);
+                       $display("Starting   FML burst READ at address %x, data=%x", read_addr, fmlr_di);
                        
                        fmlr_ack = 1'b1;
                end
@@ -217,7 +217,7 @@ always @(posedge sys_clk) begin
                
                handle_read(0, read_addr);
 
-               //$display("Continuing FML burst READ at address %x, data=%x", read_addr, fmlr_di);
+               $display("Continuing FML burst READ at address %x, data=%x", read_addr, fmlr_di);
                
                if(read_burstcount == 4)
                        read_burstcount = 0;
diff --git a/software/demo/ui.c b/software/demo/ui.c
deleted file mode 100644 (file)
index 48e4541..0000000
+++ /dev/null
@@ -1,332 +0,0 @@
-/*
- * Milkymist VJ SoC (Software)
- * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <stdio.h>
-#include <string.h>
-#include <cffat.h>
-#include <irq.h>
-#include <version.h>
-#include <hw/interrupts.h>
-#include <hw/sysctl.h>
-#include <hw/gpio.h>
-
-#include <hal/hdlcd.h>
-#include <hal/time.h>
-
-#include "renderer.h"
-#include "rpipe.h"
-#include "cpustats.h"
-#include "ui.h"
-
-enum {
-       STATE_MAIN,
-       STATE_PATCHLIST,
-       STATE_RENDERING
-};
-
-static int state;
-static int switch_count;
-static int display_title;
-static char patch_author[128];
-static char patch_title[128];
-static int patch_lifetime;
-
-#define MAX_PATCHES 64
-static char patch_list[MAX_PATCHES][64];
-static int patch_list_count;
-static int patch_list_index;
-
-static void refresh_screen()
-{
-       switch(state) {
-               case STATE_MAIN:
-                       hdlcd_clear();
-                       hdlcd_printf("Milkymist v"VERSION"\n[UP]List [DN]Rnd");
-                       break;
-               case STATE_PATCHLIST:
-                       hdlcd_printf("%16s\n[UP] [DOWN] [OK]", patch_list[patch_list_index]);
-                       break;
-               case STATE_RENDERING: {
-                       char *orig;
-                       char firstline[17];
-                       int i;
-
-                       orig = display_title ? patch_title : patch_author;
-                       for(i=0;orig[i] && i<16;i++)
-                               firstline[i] = orig[i];
-                       for(;i<16;i++)
-                               firstline[i] = ' ';
-                       firstline[16] = 0;
-                       if(display_title || (patch_lifetime == -1))
-                               hdlcd_printf("%s\nFPS:%02d  CPU:%02d%% ", firstline, rpipe_fps(), cpustats_load());
-                       else
-                               hdlcd_printf("%s\nTime rem.: %02d   ", firstline, patch_lifetime);
-                       break;
-               }
-       }
-}
-
-static int getname_cb(const char *filename, const char *longname, void *param)
-{
-       if(strcmp(filename, (char *)param) == 0) {
-               char *c;
-               char *c2;
-               int len;
-
-               c = strchr(longname, '-');
-               if(c == NULL) return 0;
-               len = (c - longname)-1;
-               if(len < 1) return 0;
-               strncpy(patch_author, longname, len);
-               patch_author[len] = 0;
-
-               c++;
-               if(*c == 0) return 0;
-               c++;
-               if(*c == 0) return 0;
-               c2 = strrchr(longname, '.');
-               len = c2 - c;
-               if(len < 1) return 0;
-               strncpy(patch_title, c, len);
-               patch_title[len] = 0;
-               
-               return 0;
-       }
-       return 1;
-}
-
-int ui_render_from_file(const char *filename, int random)
-{
-       char buffer[8192];
-       int size;
-
-       if(!cffat_init()) return 0;
-       if(!cffat_load(filename, buffer, sizeof(buffer), &size)) return 0;
-       strcpy(patch_author, "<unknown author>");
-       strcpy(patch_title, "<unknown title>");
-       cffat_list_files(getname_cb, (void *)filename);
-       cffat_done();
-       buffer[size] = 0;
-
-       if(!renderer_start(buffer)) return 0;
-       if(random)
-               patch_lifetime = (rand() % 70) + 10;
-       else
-               patch_lifetime = -1;
-       state = STATE_RENDERING;
-       switch_count = 0;
-       display_title = 0;
-       refresh_screen();
-       return 1;
-}
-
-void ui_render_stop()
-{
-       renderer_stop();
-       state = STATE_MAIN;
-       refresh_screen();
-}
-
-static int listpatches_cb(const char *filename, const char *longname, void *param)
-{
-       int len;
-
-       len = strlen(filename);
-       if((len > 4) && (strcmp(filename+len-4, ".MIL") == 0)) {
-               if(patch_list_count < MAX_PATCHES) {
-                       strcpy(patch_list[patch_list_count], filename);
-                       patch_list_count++;
-               }
-       }
-       return 1;
-}
-
-static void list_patches()
-{
-       patch_list_index = 0;
-       patch_list_count = 0;
-       if(!cffat_init()) return;
-       cffat_list_files(listpatches_cb, NULL);
-       cffat_done();
-       if(patch_list_count > 0) {
-               state = STATE_PATCHLIST;
-               refresh_screen();
-       }
-}
-
-static void select_patch(int up)
-{
-       if(up) {
-               if(patch_list_index > 0)
-                       patch_list_index--;
-       } else {
-               if(patch_list_index < (patch_list_count-1))
-                       patch_list_index++;
-       }
-       refresh_screen();
-}
-
-static void start_patch()
-{
-       if(!ui_render_from_file(patch_list[patch_list_index], 0)) {
-               state = STATE_MAIN;
-               refresh_screen();
-       }
-}
-
-static void random_mode()
-{
-       patch_list_count = 0;
-       if(!cffat_init()) return;
-       cffat_list_files(listpatches_cb, NULL);
-       cffat_done();
-       if(patch_list_count > 0)
-               ui_render_from_file(patch_list[rand() % patch_list_count], 1);
-}
-
-enum {
-       KEY_N = 0,
-       KEY_S,
-       KEY_C,
-
-       KEY_COUNT /* must be last */
-};
-
-static struct timestamp last_press[KEY_COUNT];
-
-enum {
-       CMD_NONE,
-       CMD_KEY,
-       CMD_TICK
-};
-
-/* this may lose some events if two happen at the same time,
- * but this is a rare condition without much consequences.
- */
-static int ui_cmd;
-static int ui_key;
-
-#define UI_GPIO (GPIO_PBN|GPIO_PBS|GPIO_PBC)
-
-void ui_init()
-{
-       unsigned int mask;
-       int i;
-
-       state = STATE_MAIN;
-       ui_cmd = CMD_NONE;
-
-       time_get(&last_press[0]);
-       for(i=1;i<KEY_COUNT;i++)
-               last_press[i] = last_press[0];
-
-       CSR_GPIO_INTEN |= UI_GPIO;
-
-       mask = irq_getmask();
-       mask |= IRQ_GPIO;
-       irq_setmask(mask);
-
-       refresh_screen();
-}
-
-static void handle_key(unsigned int n)
-{
-       struct timestamp now;
-       struct timestamp diff;
-       unsigned int msec;
-
-       /* Debounce */
-       time_get(&now);
-       time_diff(&diff, &now, &last_press[n]);
-       msec = diff.sec*1000+diff.usec/1000;
-       if(msec < 100) return;
-       last_press[n] = now;
-
-       if(ui_cmd != CMD_NONE) return;
-       ui_key = n;
-       ui_cmd = CMD_KEY;
-}
-
-void ui_isr_key()
-{
-       unsigned int keys;
-
-       keys = CSR_GPIO_IN;
-
-       if(keys & GPIO_PBN)
-               handle_key(KEY_N);
-       if(keys & GPIO_PBS)
-               handle_key(KEY_S);
-       if(keys & GPIO_PBC)
-               handle_key(KEY_C);
-}
-
-void ui_tick()
-{
-       if(ui_cmd == CMD_NONE)
-               ui_cmd = CMD_TICK;
-}
-
-void ui_service()
-{
-       if(ui_cmd == CMD_NONE) return;
-       cpustats_enter();
-       if(ui_cmd == CMD_KEY) {
-               switch(state) {
-                       case STATE_MAIN:
-                               if(ui_key == KEY_N) list_patches();
-                               else if(ui_key == KEY_S) random_mode();
-                               break;
-                       case STATE_PATCHLIST:
-                               switch(ui_key) {
-                                       case KEY_N:
-                                               select_patch(1);
-                                               break;
-                                       case KEY_S:
-                                               select_patch(0);
-                                               break;
-                                       case KEY_C:
-                                               start_patch();
-                                               break;
-                               }
-                               break;
-                       case STATE_RENDERING:
-                               if(ui_key == KEY_C) ui_render_stop();
-                               break;
-               }
-       }
-       if(ui_cmd == CMD_TICK) {
-               if(state == STATE_RENDERING) {
-                       if(patch_lifetime != -1) {
-                               patch_lifetime--;
-                               if(patch_lifetime == 0) {
-                                       ui_render_from_file(patch_list[rand() % patch_list_count], 1);
-                                       goto end_service;
-                               }
-                       }
-                       switch_count++;
-                       if(switch_count > 2) {
-                               display_title = !display_title;
-                               switch_count = 0;
-                       }
-                       refresh_screen();
-               }
-       }
-end_service:
-       ui_cmd = CMD_NONE;
-       cpustats_leave();
-}
diff --git a/software/demo/ui.h b/software/demo/ui.h
deleted file mode 100644 (file)
index b5aa37a..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Milkymist VJ SoC (Software)
- * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __UI_H
-#define __UI_H
-
-int ui_render_from_file(const char *filename, int random);
-void ui_render_stop();
-
-void ui_init();
-void ui_isr_key();
-void ui_tick();
-void ui_service();
-
-#endif /* __UI_H */
diff --git a/software/include/hal/hdlcd.h b/software/include/hal/hdlcd.h
deleted file mode 100644 (file)
index 4aba1d0..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Milkymist VJ SoC (Software)
- * Copyright (C) 2007, 2008, 2009 Sebastien Bourdeauducq
- * 
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 3 of the License.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- * 
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __HAL_HDLCD_H
-#define __HAL_HDLCD_H
-
-void hdlcd_init();
-void hdlcd_clear();
-int hdlcd_printf(const char *fmt, ...);
-
-#endif /* __HAL_HDLCD_H */
diff --git a/software/include/hal/slowout.h b/software/include/hal/slowout.h
deleted file mode 100644 (file)
index 73449a0..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Milkymist VJ SoC (Software)
- * Copyright (C) 2007, 2008, 2009 Sebastien Bourdeauducq
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __HAL_SLOWOUT_H
-#define __HAL_SLOWOUT_H
-
-void slowout_init();
-void slowout_isr();
-int slowout_queue(unsigned int duration, unsigned int mask);
-
-#endif /* __HAL_SLOWOUT_H */
diff --git a/software/libhal/hdlcd.c b/software/libhal/hdlcd.c
deleted file mode 100644 (file)
index 9c933a4..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Milkymist VJ SoC (Software)
- * Copyright (C) 2007, 2008, 2009 Sebastien Bourdeauducq
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <board.h>
-#include <hw/sysctl.h>
-#include <hw/gpio.h>
-
-#include <hal/brd.h>
-#include <hal/slowout.h>
-#include <hal/hdlcd.h>
-
-/* In tenths of microseconds */
-#define SETUP_DELAY 3
-#define CLOCK_DELAY 130
-#define OP_DELAY 10000
-
-/* In clock cycles */
-static unsigned int setup_delay;
-static unsigned int clock_delay;
-static unsigned int op_delay;
-
-static void hdlcd_send_byte(char c, int isdata)
-{
-       unsigned int mask;
-       unsigned int b = c;
-
-       if(isdata)
-               mask = GPIO_HDLCDRS;
-       else
-               mask = 0;
-
-       slowout_queue(setup_delay, mask|(((b & 0xf0) >> 4) << GPIO_HDLCDD_SHIFT));
-       slowout_queue(clock_delay, mask|GPIO_HDLCDE|(((b & 0xf0) >> 4) << GPIO_HDLCDD_SHIFT));
-       slowout_queue(clock_delay, mask|(((b & 0xf0) >> 4) << GPIO_HDLCDD_SHIFT));
-
-       slowout_queue(setup_delay, mask|((b & 0x0f) << GPIO_HDLCDD_SHIFT));
-       slowout_queue(clock_delay, mask|GPIO_HDLCDE|((b & 0x0f) << GPIO_HDLCDD_SHIFT));
-       slowout_queue(clock_delay+op_delay, mask|((b & 0x0f) << GPIO_HDLCDD_SHIFT));
-}
-
-void hdlcd_init()
-{
-       setup_delay = SETUP_DELAY*(brd_desc->clk_frequency/10000000);
-       clock_delay = CLOCK_DELAY*(brd_desc->clk_frequency/10000000);
-       op_delay = OP_DELAY*(brd_desc->clk_frequency/10000000);
-
-       if(!(CSR_GPIO_IN & GPIO_DIP3)) {
-               /* Select 4-bit operation on the LCD */
-               slowout_queue(setup_delay, GPIO_HDLCDD5);
-               slowout_queue(clock_delay, GPIO_HDLCDE|GPIO_HDLCDD5);
-               slowout_queue(clock_delay+op_delay, GPIO_HDLCDD5);
-       }
-       
-       /* Set up the LCD */
-       hdlcd_send_byte(0x28, 0);
-       hdlcd_send_byte(0x0c, 0);
-       hdlcd_send_byte(0x06, 0);
-       hdlcd_send_byte(0x01, 0);
-
-       printf("LCD: ready\n");
-}
-
-void hdlcd_clear()
-{
-       hdlcd_send_byte(0x01, 0);
-}
-
-int hdlcd_printf(const char *fmt, ...)
-{
-       va_list args;
-       char buffer[34];
-       unsigned i;
-       int len;
-
-       va_start(args, fmt);
-       len = vscnprintf(buffer, sizeof(buffer), fmt, args);
-       va_end(args);
-       
-       hdlcd_send_byte(0x80, 0);
-
-       for(i=0;i<len;i++) {
-               if(buffer[i] == '\n')
-                       hdlcd_send_byte(0xc0, 0);
-               else
-                       hdlcd_send_byte(buffer[i], 1);
-       }
-
-       return len;
-}
diff --git a/software/libhal/slowout.c b/software/libhal/slowout.c
deleted file mode 100644 (file)
index ad42bcf..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Milkymist VJ SoC (Software)
- * Copyright (C) 2007, 2008, 2009 Sebastien Bourdeauducq
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-/* Asynchronous slow outputs - Using HW timer 1 */
-
-#include <stdio.h>
-#include <irq.h>
-#include <hw/interrupts.h>
-#include <hw/sysctl.h>
-
-#include <hal/slowout.h>
-
-struct slowout_operation {
-       unsigned int duration;
-       unsigned int mask;
-};
-
-#define OPQ_SIZE 1024 /* < must be a power of 2 */
-#define OPQ_MASK (OPQ_SIZE-1)
-
-static struct slowout_operation queue[OPQ_SIZE];
-static unsigned int produce;
-static unsigned int consume;
-static unsigned int level;
-static int cts;
-
-void slowout_init()
-{
-       unsigned int mask;
-
-       produce = 0;
-       consume = 0;
-       level = 0;
-       cts = 1;
-
-       /* Reset timer */
-       CSR_TIMER1_CONTROL = 0;
-       irq_ack(IRQ_TIMER1);
-
-       mask = irq_getmask();
-       mask |= IRQ_TIMER1;
-       irq_setmask(mask);
-
-       printf("SLO: slow outputs initialized\n");
-}
-
-static void slowout_start(struct slowout_operation *op)
-{
-       CSR_GPIO_OUT = op->mask;
-       CSR_TIMER1_COUNTER = 0;
-       CSR_TIMER1_COMPARE = op->duration;
-       CSR_TIMER1_CONTROL = TIMER_ENABLE;
-}
-
-void slowout_isr()
-{
-       irq_ack(IRQ_TIMER1);
-       consume = (consume + 1) & OPQ_MASK;
-       level--;
-       if(level > 0)
-               slowout_start(&queue[consume]);
-       else
-               cts = 1;
-}
-
-int slowout_queue(unsigned int duration, unsigned int mask)
-{
-       unsigned int oldmask;
-
-       oldmask = irq_getmask();
-       irq_setmask(oldmask & (~IRQ_TIMER1));
-
-       if(level >= OPQ_SIZE) {
-               irq_setmask(oldmask);
-               printf("SLO: opq overflow\n");
-               return 0;
-       }
-
-       queue[produce].duration = duration;
-       queue[produce].mask = mask;
-       if(cts) {
-               cts = 0;
-               slowout_start(&queue[produce]);
-       }
-       produce = (produce + 1) & OPQ_MASK;
-       level++;
-
-       irq_setmask(oldmask);
-
-       return 1;
-}
-