编程辅导 SIZE 4096

afin: .asciz “matrix_files/a_matrix.txt”
bfin: .asciz “matrix_files/b_matrix.txt”
cfin: .asciz “matrix_files/c_matrix.txt”
a_mat: .space 16384

Copyright By PowCoder代写 加微信 powcoder

b_mat: .space 16384
c_mat: .space 16384
c_expected: .space 16384

# you can add functions/macros from previous assignments for your convenience to this file.
# make sure that rars.jar file is in the same folder as the matrix text files

.eqv MAT_SIZE 4096

.macro exit ()
.end_macro

.macro print_str (%str)
str_label: .string %str
la x10, str_label
.end_macro

# to avoid unwanted values creeping in
.macro reg_reset()
andi a3, a3, 0
andi a4, a4, 0
andi a5, a5, 0
andi a6, a6, 0
andi a7, a7, 0
andi t0, t0, 0
andi t1, t1, 0
andi t2, t2, 0
andi t3, t3, 0
andi t4, t4, 0
andi t5, t5, 0
andi t6, t6, 0
andi s0, s0, 0
andi s1, s1, 0
andi s2, s2, 0
andi s3, s3, 0
andi s4, s4, 0
andi s5, s5, 0
andi s6, s6, 0
andi s7, s7, 0
andi s8, s8, 0
andi s9, s9, 0
andi s10, s10, 0
andi s11, s11, 0
.end_macro

# macro for function call
# store the necessary registers before
# function call and clear everthing else
.macro func_call(%func)
addi sp, sp, -28
sw t0, 0(sp)
sw t1, 4(sp)
sw t2, 8(sp)
sw t3, 12(sp)
sw t4, 16(sp)
sw t5, 20(sp)
sw t6, 24(sp)
reg_reset()
call %func
lw t0, 0(sp)
lw t1, 4(sp)
lw t2, 8(sp)
lw t3, 12(sp)
lw t4, 16(sp)
lw t5, 20(sp)
lw t6, 24(sp)
addi sp, sp, 28
.end_macro

.globl main
# read A matrix
print_str(“Reading A matrix from file…”)
la a0, afin
li a1, MAT_SIZE # Mmtrix is 64×64
la a2, a_mat
func_call(read_mtx_file)
mv t0, a2 # starting address of matrix data
print_str(“Done\n”)

# read B matrix
print_str(“Reading B matrix from file…”)
la a0 bfin
li a1, MAT_SIZE # matrix is 64×64
la a2, b_mat
func_call(read_mtx_file)
mv t1, a2 # starting address of matrix data
print_str(“Done\n”)

# read C expected matrix
print_str(“Reading C matrix from file…”)
la a0 cfin
li a1, MAT_SIZE # matrix is 64×64
la a2, c_expected
func_call(read_mtx_file)
mv t2, a2 # starting address of matrix data
print_str(“Done\n”)

# load address of empty C matrix array and clear memory
la t6, c_mat
print_str(“Clearing memory for C output…”)
li a2, MAT_SIZE
slli a2, a2, 2 # number of bytes to clear
mv a1, zero
func_call(memset)
print_str(“Done\n”)

# call naive gemm
print_str(“Running naive GEMM…”)
func_call(naive_gemm)
print_str(“Done\n”)

# compare with expected output
print_str(“Checking naive GEMM output…”)
mv a1, t6,
li a3, MAT_SIZE
mul a2, a2, a3
func_call(memcmp)
mv a6, a0 # move result from a0 since print_str() will override it
print_str(“Done\n”)
beqz a6, naive_correct
print_str(“Naive GEMM is INCORRECT\n”)
naive_correct:
print_str(“Naive GEMM is CORRECT\n”)

print_str(“Clearing memory for C output…”)
li a2, MAT_SIZE
slli a2, a2, 2 # number of bytes to clear
mv a1, zero
func_call(memset)
print_str(“Done\n”)

# call blocked gemm
print_str(“Running blocked GEMM…”)
func_call(blocked_gemm)
print_str(“Done\n”)

# compare with expected output
print_str(“Checking blocked GEMM output…”)
mv a1, t6,
li a3, MAT_SIZE
mul a2, a2, a3
func_call(memcmp)
mv a6, a0 # move result from a0 since print_str() will override it
print_str(“Done\n”)
beqz a6, blocked_correct
print_str(“Blocked GEMM is INCORRECT”)
blocked_correct:
print_str(“Blocked GEMM is CORRECT”)

# a0: mem address
# a1: mem address
# a2: number of bytes to compare
# a0: 0 if memory is the same, or the difference between the first unequal bytes
memcmp: # @memcmp
beqz a2, .LBB0_3
.LBB0_1: # =>This Inner Loop Header: Depth=1
lbu a3, 0(a0)
lbu a4, 0(a1)
bne a3, a4, .LBB0_4
addi a2, a2, -1
addi a0, a0, 1
addi a1, a1, 1
bnez a2, .LBB0_1
li a0, 0
sub a0, a3, a4

# a0: pointer to start of memory range to set
# a1: 8-bit value to set at each byte
# a2: number of bytes to set
memset: # @memset
beqz a2, .LBB0_6
.LBB0_5: # =>This Inner Loop Header: Depth=1
sb a1, 0(a0)
addi a2, a2, -1
addi a0, a0, 1
bnez a2, .LBB0_5

.include “file_read.asm”
.include “gemm.asm”

程序代写 CS代考 加微信: powcoder QQ: 1823890830 Email: powcoder@163.com