0% found this document useful (0 votes)
2 views7 pages

Systolic Array (1)

The document describes a Verilog implementation of a systolic array, which includes multiple processing elements for matrix multiplication using Vedic multiplication. It defines the architecture, input/output ports, and the necessary modules for arithmetic operations, including adders and multipliers. Additionally, a testbench is provided to simulate the functionality of the systolic array.

Uploaded by

charanyajessie09
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views7 pages

Systolic Array (1)

The document describes a Verilog implementation of a systolic array, which includes multiple processing elements for matrix multiplication using Vedic multiplication. It defines the architecture, input/output ports, and the necessary modules for arithmetic operations, including adders and multipliers. Additionally, a testbench is provided to simulate the functionality of the systolic array.

Uploaded by

charanyajessie09
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 7

`timescale 1ns / 1ps

module systolic_array(ina_0, ina_4, ina_8, ina_12,


inb_0, inb_1, inb_2, inb_3 ,
outa_3,outa_7,outa_11,outa_15,
outb_12,outb_13,outb_14,outb_15,
clk, reset,sel,
ARin_0,ARin_4,ARin_8,ARin_12,
ARout_3,ARout_7,ARout_11,ARout_15);
input [7:0] ina_0, ina_4, ina_8, ina_12,
inb_0, inb_1, inb_2, inb_3;
output [7:0] outa_3,outa_7,outa_11,outa_15,
outb_12,outb_13,outb_14,outb_15;

input clk, reset;


input sel;
input [17:0] ARin_0,ARin_4,ARin_8,ARin_12;

output [17:0] ARout_3,ARout_7,ARout_11,ARout_15;

wire [7:0] outa_0, outa_1, outa_2, outa_4, outa_5, outa_6, outa_8, outa_9,
outa_10, outa_12, outa_13, outa_14;
wire [7:0] outb_0, outb_1, outb_2, outb_3, outb_4, outb_5, outb_6, outb_7,
outb_8, outb_9, outb_10, outb_11;
wire [17:0] ARin_1, ARin_2, ARin_3, ARin_5, ARin_6, ARin_7, ARin_9,
ARin_10, ARin_11, ARin_13, ARin_14, ARin_15;
wire [17:0] ARout_0, ARout_1, ARout_2, ARout_4, ARout_5, ARout_6, ARout_8,
ARout_9, ARout_10, ARout_12, ARout_13, ARout_14;

PROCESSING_ELEMENT P0
(.ina(ina_0), .inb(inb_0), .clk(clk), .reset(reset), .outa(outa_0), .outb(ou
tb_0), .ARin(ARin_0), .ARout(ARout_0), .sel(sel));
PROCESSING_ELEMENT P1
(.ina(outa_0), .inb(inb_1), .clk(clk), .reset(reset), .outa(outa_1), .outb(ou
tb_1), .ARin(ARout_0), .ARout(ARout_1), .sel(sel));
PROCESSING_ELEMENT P2
(.ina(outa_1), .inb(inb_2), .clk(clk), .reset(reset), .outa(outa_2), .outb(ou
tb_2), .ARin(ARout_1), .ARout(ARout_2), .sel(sel));
PROCESSING_ELEMENT P3 (.ina(outa_2), .inb(inb_3), .clk(clk), .reset(reset),
.outa(outa_3), .outb(outb_3), .ARin(ARout_2), .ARout(ARout_3), .sel(sel));

PROCESSING_ELEMENT P4
(.ina(ina_4), .inb(outb_0), .clk(clk), .reset(reset), .outa(outa_4), .outb(ou
tb_4), .ARin(ARin_4), .ARout(ARout_4), .sel(sel));
PROCESSING_ELEMENT P5
(.ina(outa_4), .inb(outb_1), .clk(clk), .reset(reset), .outa(outa_5), .outb(ou
tb_5), .ARin(ARout_4), .ARout(ARout_5), .sel(sel));
PROCESSING_ELEMENT P6
(.ina(outa_5), .inb(outb_2), .clk(clk), .reset(reset), .outa(outa_6), .outb(ou
tb_6), .ARin(ARout_5), .ARout(ARout_6), .sel(sel));
PROCESSING_ELEMENT P7
(.ina(outa_6), .inb(outb_3), .clk(clk), .reset(reset), .outa(outa_7), .outb(ou
tb_7), .ARin(ARout_6), .ARout(ARout_7), .sel(sel));
PROCESSING_ELEMENT P8
(.ina(ina_8), .inb(outb_4), .clk(clk), .reset(reset), .outa(outa_8), .outb(ou
tb_8), .ARin(ARin_8), .ARout(ARout_8), .sel(sel));
PROCESSING_ELEMENT P9
(.ina(outa_8), .inb(outb_5), .clk(clk), .reset(reset), .outa(outa_9), .outb(ou
tb_9), .ARin(ARout_8), .ARout(ARout_9), .sel(sel));
PROCESSING_ELEMENT P10
(.ina(outa_9), .inb(outb_6), .clk(clk), .reset(reset), .outa(outa_10), .outb(ou
tb_10), .ARin(ARout_9), .ARout(ARout_10), .sel(sel));
PROCESSING_ELEMENT P11
(.ina(outa_10), .inb(outb_7), .clk(clk), .reset(reset), .outa(outa_11), .outb(ou
tb_11), .ARin(ARout_10), .ARout(ARout_11), .sel(sel));

PROCESSING_ELEMENT P12
(.ina(ina_12), .inb(outb_8), .clk(clk), .reset(reset), .outa(outa_12), .outb(o
utb_12), .ARin(ARin_12), .ARout(ARout_12), .sel(sel));
PROCESSING_ELEMENT P13
(.ina(outa_12), .inb(outb_9), .clk(clk), .reset(reset), .outa(outa_13), .outb(ou
tb_13), .ARin(ARout_12), .ARout(ARout_13), .sel(sel));
PROCESSING_ELEMENT P14
(.ina(outa_13), .inb(outb_10), .clk(clk), .reset(reset), .outa(outa_14), .outb(ou
tb_14), .ARin(ARout_13), .ARout(ARout_14), .sel(sel));
PROCESSING_ELEMENT P15
(.ina(outa_14), .inb(outb_11), .clk(clk), .reset(reset), .outa(outa_15), .outb(ou
tb_15), .ARin(ARout_14), .ARout(ARout_15), .sel(sel));

endmodule

`timescale 1ns / 1ps


module PROCESSING_ELEMENT (clk,reset,ina,inb,outa,outb,ARin,sel,ARout);

input clk;
input reset;
input [7:0] ina;
input [7:0] inb;
output reg [7:0] outa;
output reg [7:0] outb;
input [17:0] ARin;
input sel;
output wire [17:0] ARout;

wire [15:0] mult_out;


reg [17:0] ar_reg;
wire [17:0] ar_x;
wire [17:0] mux_out;

// Instantiate the adder module


adder_18bit adder (
.ar_reg(ar_reg),
.mult_out(mult_out),
.ar_x(ar_x)
);
// Instantiate the multiplexer
assign mux_out = sel ? ar_x : ARin; // If sel is high, pass AR_in, else pass
adder output

assign ARout = ar_reg;

vedic_8x8 px1 (.a(ina),.b(inb),.mult(mult_out));

// Update the always block to use the mux's output


always @(posedge clk or negedge reset) begin
if (reset) begin
ar_reg <= 0;
outa <= 0;
outb <= 0;
end else begin
ar_reg <= mux_out; // Use the output from the mux
outa <= ina;
outb <= inb;
end
end

endmodule

// Adder module
module adder_18bit(
input [17:0] ar_reg,
input [15:0] mult_out,
output [17:0] ar_x
);

// Extend mult_out to 18 bits


wire [17:0] extended_mult_out = {2'b00, mult_out};

// Add extended mult_out to ar_reg


assign ar_x = ar_reg + extended_mult_out;

endmodule

//multiplier
`timescale 1ns / 1ps

module vedic_8x8(a, b, mult);

input [7:0] a,b;


output [15:0] mult;
wire [15:0] mult;

wire [7:0] temp1;


wire [7:0] temp2;
wire [7:0] temp3;
wire [9:0] temp4;
wire [9:0] temp5;
wire [7:0] temp6;
wire [7:0] temp7;
vedic4x4 M1(a[3:0], b[3:0], temp1);
assign mult[3:0] = temp1[3:0];

vedic4x4 M2(a[7:4], b[3:0], temp2);


vedic4x4 M3(a[3:0], b[7:4], temp3);

adder10 A1({2'b00, temp2}, {2'b00,temp3}, temp4);


adder10 A2(temp4, {6'b000000, temp1[7:4]}, temp5);
assign mult[7:4] = temp5[3:0];

vedic4x4 M4(a[7:4], b[7:4], temp6);


adder8 A3(temp6, {2'b00,temp5[9:4]}, temp7);

assign mult[15:8] = temp7;

endmodule

module vedic4x4(a, b, mult);

input [3:0] a,b;


output [7:0] mult;
wire [7:0] mult;

wire [3:0] temp1;


wire [3:0] temp2;
wire [3:0] temp3;
wire [5:0] temp4;
wire [5:0] temp5;
wire [3:0] temp6;
wire [3:0] temp7;
wire [5:0] w1;

vedic_2x2 V1(a[1:0], b[1:0], temp1);


assign mult[1:0] = temp1[1:0];

vedic_2x2 V2(a[3:2], b[1:0], temp2);


vedic_2x2 V3(a[1:0], b[3:2], temp3);

assign w1 = {4'b0000, temp1[3:2]};

adder6 A1({2'b00, temp3}, {2'b00, temp2}, temp4);


adder6 A2(temp4, w1, temp5);

assign mult[3:2] = temp5[1:0];

vedic_2x2 V4(a[3:2], b[3:2], temp6);

adder4 A3(temp6, temp5[5:2], temp7);


assign mult[7:4] = temp7;

endmodule

module vedic_2x2 (a, b, mult);


input [1:0] a,b;
output [3:0] mult;

wire [3:0] w;
assign mult[0]= a[0]&b[0];
assign w[0] = a[1]&b[0];
assign w[1] = a[0]&b[1];
assign w[2] = a[1]&b[1];

halfAdder H0(w[0], w[1], mult[1], w[3]);


halfAdder H1(w[2], w[3], mult[2], mult[3]);

endmodule

module halfAdder(a,b,sum,carry);
input a,b;
output sum, carry;

assign sum = a ^ b;
assign carry = a & b;

endmodule

module adder4(a,b,sum);

input [3:0] a,b;


output [3:0] sum;
wire [3:0] sum;

assign sum = a + b;

endmodule

module adder6(a,b,sum);

input [5:0] a,b;


output [5:0] sum;
wire [5:0] sum;

assign sum = a + b;

endmodule

module adder8(a,b,sum);

input [7:0] a,b;


output [7:0] sum;
wire [7:0] sum;

assign sum = a + b;

endmodule

module adder10(a,b,sum);

input [9:0] a,b;


output [9:0] sum;
wire [9:0] sum;

assign sum = a + b;
endmodule

testbench

`timescale 1ns / 1ps

module systolic_array_tb;

// Inputs
reg [7:0] ina_0, ina_4, ina_8, ina_12;
reg [7:0] inb_0, inb_1, inb_2, inb_3;
reg clk, reset, sel;
reg [17:0] ARin_0, ARin_4, ARin_8, ARin_12;

// Outputs
wire [7:0] outa_3, outa_7, outa_11, outa_15;
wire [7:0] outb_12, outb_13, outb_14, outb_15;
wire [17:0] ARout_3, ARout_7, ARout_11, ARout_15;

// Instantiate the Unit Under Test (UUT)


systolic_array uut (
.ina_0(ina_0), .ina_4(ina_4), .ina_8(ina_8), .ina_12(ina_12),
.inb_0(inb_0), .inb_1(inb_1), .inb_2(inb_2), .inb_3(inb_3),
.outa_3(outa_3), .outa_7(outa_7), .outa_11(outa_11), .outa_15(outa_15),
.outb_12(outb_12), .outb_13(outb_13), .outb_14(outb_14), .outb_15(outb_15),
.clk(clk), .reset(reset), .sel(sel),
.ARin_0(ARin_0), .ARin_4(ARin_4), .ARin_8(ARin_8), .ARin_12(ARin_12),
.ARout_3(ARout_3), .ARout_7(ARout_7), .ARout_11(ARout_11), .ARout_15(ARout_
15)
);

initial begin
// Initialize Inputs
ina_0 = 0; ina_4 = 0; ina_8 = 0; ina_12 = 0;
inb_0 = 0; inb_1 = 0; inb_2 = 0; inb_3 = 0;
clk = 0; reset = 0; sel = 0;
ARin_0 = 0; ARin_4 = 0; ARin_8 = 0; ARin_12 = 0;

// Wait for global reset


#100;

// Apply test vectors


reset = 1;
#10;
reset = 0;
sel = 1;

// Test case 1
ina_0 = 8'h01; ina_4 = 8'h02; ina_8 = 8'h03; ina_12 = 8'h04;
inb_0 = 8'h05; inb_1 = 8'h06; inb_2 = 8'h07; inb_3 = 8'h08;
ARin_0 = 18'h00001; ARin_4 = 18'h00002; ARin_8 = 18'h00003; ARin_12 =
18'h00004;
#20;
// Test case 2
ina_0 = 8'h09; ina_4 = 8'h0A; ina_8 = 8'h0B; ina_12 = 8'h0C;
inb_0 = 8'h0D; inb_1 = 8'h0E; inb_2 = 8'h0F; inb_3 = 8'h10;
ARin_0 = 18'h00005; ARin_4 = 18'h00006; ARin_8 = 18'h00007; ARin_12 =
18'h00008;
#20;

// Add more test cases as needed


end

always #5 clk = ~clk; // Clock generation

endmodule

You might also like