Simpler and smaller PFPU DMA engine
authorlekernel <sebastien.bourdeauducq@lekernel.net>
Mon, 15 Feb 2010 23:02:10 +0000 (00:02 +0100)
committerlekernel <sebastien.bourdeauducq@lekernel.net>
Mon, 15 Feb 2010 23:02:10 +0000 (00:02 +0100)
cores/pfpu/rtl/pfpu.v
cores/pfpu/rtl/pfpu_ctlif.v
cores/pfpu/rtl/pfpu_dma.v
cores/pfpu/rtl/pfpu_seq.v
cores/pfpu/test/complete/tb_pfpu.v
software/demo/shell.c
software/include/hw/pfpu.h

index 059cc64..e8f0390 100644 (file)
@@ -105,7 +105,6 @@ pfpu_regf regf(
 wire [31:0] dma_adr;
 wire dma_busy;
 wire dma_ack;
-wire [2:0] dma_pending;
 pfpu_dma dma(
        .sys_clk(sys_clk),
        .sys_rst(sys_rst),
@@ -124,9 +123,7 @@ pfpu_dma dma(
        .wbm_adr_o(wbm_adr_o),
        .wbm_cyc_o(wbm_cyc_o),
        .wbm_stb_o(wbm_stb_o),
-       .wbm_ack_i(wbm_ack_i),
-
-       .dma_pending(dma_pending)       /* < to control interface */
+       .wbm_ack_i(wbm_ack_i)
 );
 
 wire vfirst;
@@ -231,8 +228,10 @@ pfpu_ctlif #(
        .vnext(vnext),                  /* < from sequencer */
        .err_collision(err_collision),  /* < from ALU */
        .err_stray(err_stray),          /* < from register file */
-       .dma_pending(dma_pending),      /* < from DMA engine */
-       .pc(pc)                         /* < from program memory */
+       .pc(pc),                        /* < from program memory */
+
+       .wbm_adr_o(wbm_adr_o),          /* < from DMA engine */
+       .wbm_ack_i(wbm_ack_i)           /* < from DMA engine */
 );
 
 endmodule
index 2f20dae..0328fbd 100644 (file)
@@ -53,10 +53,17 @@ module pfpu_ctlif #(
        input vnext,
        input err_collision,
        input err_stray,
-       input [2:0] dma_pending,
-       input [10:0] pc
+       input [10:0] pc,
+
+       input [31:0] wbm_adr_o,
+       input wbm_ack_i
 );
 
+reg [31:0] last_dma;
+always @(posedge sys_clk)
+       if(wbm_ack_i)
+               last_dma <= wbm_adr_o;
+
 reg old_busy;
 always @(posedge sys_clk) begin
        if(sys_rst)
@@ -119,7 +126,7 @@ always @(posedge sys_clk) begin
                        4'b0101: csr_do_r <= vertex_counter;
                        4'b0110: csr_do_r <= collision_counter;
                        4'b0111: csr_do_r <= stray_counter;
-                       4'b1000: csr_do_r <= dma_pending;
+                       4'b1000: csr_do_r <= last_dma;
                        4'b1001: csr_do_r <= pc;
 
                        default: csr_do_r <= 32'bx;
index 2b91922..6feece2 100644 (file)
@@ -29,93 +29,46 @@ module pfpu_dma(
        output ack,
        output busy,
 
-       output reg [31:0] wbm_dat_o,
-       output reg [31:0] wbm_adr_o,
+       output [31:0] wbm_dat_o,
+       output [31:0] wbm_adr_o,
        output wbm_cyc_o,
-       output wbm_stb_o,
-       input wbm_ack_i,
-
-       output [2:0] dma_pending
+       output reg wbm_stb_o,
+       input wbm_ack_i
 );
 
-/* FIFO logic */
-
-parameter q_width = 7+7+64;
-
-wire q_p;
-wire q_c;
-wire [q_width-1:0] q_i;
-wire [q_width-1:0] q_o;
-wire full;
-wire empty;
-
-reg [1:0] produce;
-reg [1:0] consume;
-reg [2:0] level;
-reg [q_width-1:0] wq[0:3];
+reg write_y;
+reg [28:0] vector_start;
+reg [31:0] dma_d1_r;
+reg [31:0] dma_d2_r;
 
 always @(posedge sys_clk) begin
        if(sys_rst) begin
-               produce = 2'd0;
-               consume = 2'd0;
-               level = 3'd0;
+               vector_start <= 29'd0;
+               write_y <= 1'b0;
+               wbm_stb_o <= 1'b0;
        end else begin
-               if(q_p) begin
-                       wq[produce] = q_i;
-                       produce = produce + 2'd1;
-                       level = level + 3'd1;
+               if(dma_en) begin
+                       wbm_stb_o <= 1'b1;
+                       write_y <= 1'b0;
+                       vector_start <= dma_base + {y, x};
+                       dma_d1_r <= dma_d1;
+                       dma_d2_r <= dma_d2;
                end
-               if(q_c) begin
-                       consume = consume + 2'd1;
-                       level = level - 3'd1;
+               if(wbm_ack_i) begin
+                       if(write_y)
+                               wbm_stb_o <= 1'b0;
+                       else
+                               write_y <= ~write_y;
                end
        end
 end
 
-assign q_o = wq[consume];
-assign full = level[2];
-assign empty = (level == 3'd0);
-
-// synthesis translate_off
-always @(posedge sys_clk) begin
-       if(full & q_p) begin
-               $display("ERROR - Writing to full DMA write queue");
-               $finish;
-       end
-       if(empty & q_c) begin
-               $display("ERROR - Reading from empty DMA write queue");
-               $finish;
-       end
-end
-// synthesis translate_on
-
-/* Interface */
-reg write_y;
-
-assign q_p = dma_en;
-assign q_c = wbm_ack_i & write_y;
-assign q_i = {dma_d1, dma_d2, y, x};
-
-always @(posedge sys_clk) begin
-       wbm_adr_o <= {dma_base, 3'd0} + {q_o[13:0], write_y, 2'd0};
-       wbm_dat_o <= write_y ? q_o[45:14] : q_o[q_width-1:46];
-end
-
-reg bus_not_valid_yet;
-always @(posedge sys_clk)
-       bus_not_valid_yet <= wbm_ack_i;
-
-always @(posedge sys_clk) begin
-       if(sys_rst)
-               write_y <= 1'b0;
-       else if(wbm_ack_i) write_y <= ~write_y;
-end
+assign wbm_adr_o = {vector_start, write_y, 2'b00};
+assign wbm_dat_o = write_y ? dma_d2_r : dma_d1_r;
 
-assign wbm_cyc_o = ~empty & ~bus_not_valid_yet;
-assign wbm_stb_o = ~empty & ~bus_not_valid_yet;
+assign wbm_cyc_o = wbm_stb_o;
 
-assign ack = ~full;
-assign busy = ~empty;
-assign dma_pending = level;
+assign ack = ~wbm_stb_o;
+assign busy = wbm_stb_o;
 
 endmodule
index 3900f94..5cd256a 100644 (file)
@@ -85,8 +85,8 @@ always @(*) begin
                        alu_rst = 1'b1;
                        pcount_rst = 1'b1;
                        if(dma_ack) begin
-                               /* we will be able to insert immediately our word
-                                * into the DMA write queue - carry on.
+                               /* we will be able to send immediately our word
+                                * to the DMA engine - carry on.
                                 */
                                pcount_rst = 1'b0;
                                next_state = RUNNING;
index 9a082c2..385823b 100644 (file)
@@ -47,7 +47,7 @@ always @(posedge sys_clk) begin
                wbm_ack_i <= 1'b0;
        else begin
                wbm_ack_i <= 1'b0;
-               if(wbm_stb_o & ~wbm_ack_i & (($random % 200) == 0)) begin
+               if(wbm_stb_o & ~wbm_ack_i & (($random % 3) == 0)) begin
                        wbm_ack_i <= 1'b1;
                        $fromfloat(wbm_dat_o, r);
                        $display("DMA write addr %x:%x (%b - %f)", wbm_adr_o, wbm_dat_o, wbm_adr_o[2], r);
index 8b2627a..65594c1 100644 (file)
@@ -282,17 +282,12 @@ static void pfputest()
        pfpu_code[ 9] = 0x00000005;
        pfpu_code[10] = 0x00142b80;
 
-       /*printf("Program:\n");
-       for(x=0;x<10;x++)
-               printf("%08x ", pfpu_code[x]);
-       printf("\n");*/
-
        CSR_PFPU_CTL = PFPU_CTL_START;
        printf("Waiting for PFPU...\n");
        timeout = 30;
        do {
-               printf("%08x vertices:%d collisions:%d strays:%d dma:%d pc:%04x\n",
-                       CSR_PFPU_CTL, CSR_PFPU_VERTICES, CSR_PFPU_COLLISIONS, CSR_PFPU_STRAYWRITES, CSR_PFPU_DMAPENDING, 4*CSR_PFPU_PC);
+               printf("%08x vertices:%d collisions:%d strays:%d last:%08x pc:%04x\n",
+                       CSR_PFPU_CTL, CSR_PFPU_VERTICES, CSR_PFPU_COLLISIONS, CSR_PFPU_STRAYWRITES, CSR_PFPU_LASTDMA, 4*CSR_PFPU_PC);
        } while((timeout--) && (CSR_PFPU_CTL & PFPU_CTL_BUSY));
        if(timeout > 0)
                printf("OK\n");
index 650690a..8ffbbf3 100644 (file)
@@ -33,7 +33,7 @@
 #define CSR_PFPU_VERTICES      MMPTR(0x80005014)
 #define CSR_PFPU_COLLISIONS    MMPTR(0x80005018)
 #define CSR_PFPU_STRAYWRITES   MMPTR(0x8000501C)
-#define CSR_PFPU_DMAPENDING    MMPTR(0x80005020)
+#define CSR_PFPU_LASTDMA       MMPTR(0x80005020)
 #define CSR_PFPU_PC            MMPTR(0x80005024)
 
 #define CSR_PFPU_DREGBASE      (0x80005400)