MIPS linked list
I'm confused exactly about how to create structures in MIPS. I want to create a linked list implementation which computes the length of the strings stored, and sorts them in stored-order. Here is my code so far:
# Global symbols
#
# string routines
.globl read_string
.globl strcmp
.globl strlen
.globl trim
.globl strloop
.globl replace
# list routines
.globl insert
.globl insert_here
.globl print_list
.globl main
# pseudo-standard library
.globl get_string
.globl malloc
.globl print_newline
.globl print_string
##################################################
# Constants
#
.data
MAX_STR_LEN: .word 50
STR_NEWLINE: .asciiz "\n"
STR_ENTER: .asciiz "enter a string: "
#################################################################
#################################################################
# Code
#
.text
##################################################
# main: repeatedly gets strings from user and enters them in list
# until a string of length less than two is entered;
# prints list in order when done
#
main:
# lines commented out - not needed in simulation:
# addi $sp, $sp, -12
# sw $ra, 0($sp)
# sw $s0, 4($sp) #$s0 will be linked list
# sw $s1, 8($sp) #$s1 will be the current string
li $s0, 0 # initialize the list to NULL
Loop_main:
la $a0, STR_ENTER
jal print_string
jal read_string
move $s1, $v0
jal trim
jal strlen
addi $t0, $zero, 2
beq $v0, $t0, Exit_loop_main
jal strcmp
jal insert
# replace newline with null terminator
# ...
# check string length; exit loop if less than 2
# ...
# insert string into list
# ...
# reassign front of list
j Loop_main
Exit_loop_main:
move $a0, $s0
jal print_list
jal print_newline
# lines commented out - not needed in simulation:
# lw $s1, 8($sp)
# lw $s0, 4($sp)
# lw $ra, 0($sp)
# addi $sp, $sp, 12
# jr $ra
# exit simulation via syscall
li $v0, 10
syscall
##################################################
# String routines
#
# read_string: allocates MAX_STR_LEN bytes for a string
# and then reads a string from standard input into that memory address
# and returns the address in $v0
read_string:
addi $sp, $sp, -8 #allocate space for 2 items on the stack
sw $ra, 0($sp) #push the jump register onto the stack
sw $s0, 4($sp) #push the head of the list onto the stack
add $t0, $t0, $zero #$t0 gets 0
la $t1, MAX_STR_LEN #$a0 gets MAX_STR_LEN
lw $a0, 0($t1) #move MAX_STR_LEN from $t1 into $a0
jal malloc #jump to malloc to allocate space for string
move $a0, $v0 #move pointer to allocated memory to $a0
add $t1, $t1, $zero #get zero
move $a1, $t1 #move zero to a1
la $a1, MAX_STR_LEN #$a1 gets MAX_STR_LEN
jal get_string #get the string into $v0
lw $s0, 4($sp) #load the head of the list
lw $ra, 0($sp) #load the jump address
addi $sp, $sp, 8 #push onto the stack space for 2 elements
jr $ra #jump back to caller function
# trim: modifies string stored at address in $a0 so that
# first occurrence of a newline is replaced by null terminator
trim:
li $t0, 10 #$t1 gets 10, ASCII value for newline
strloop:
lb $t1, 0($a0) #get byte of character of string and loop
beq $t1, $t0, replace #if $a0 = go to replace
addi $a0, $a0, 8 #increment $a0 by 8 to piont to first bit of next char
j strloop #jump back to beginning
replace:
add $t2, $t2, $zero #$t2 is set to zero, ASCII value for null terminator
sb $t2, 0($a0) #$t2 is stored into the byte starting at $a0
jr $ra #jump back to caller
# strlen: given string stored at address in $a0
# returns its length in $v0
strlen:
add $t0, $t0, $zero #$t0 gets zero
lenloop:
lb $t1, 0($a0) #get the first byte for first char in $a0
beq $t1, $zero, exitline #if $t1 == 0 (null terminator), jump to exit
addi $a0, $a0, 8 #else, increment to next byte of string for next char
addi $t0, $t0, 1 #increment $t0 for each character in string
j lenloop #jump back up to loop
exitline:
sw $t0, 0($v0) #store $t0 into $v0 to return lenght of string
jr $ra #jump back to caller
# strcmp: given strings s, t stored at addresses in $a0, $a1
# returns -1 if s < t; 0 if s == t, 1 if s > t
strcmp:
lb $t0, 0($a0) #get byte of first char in string s
lb $t1, 0($a1) #get byte of first char in string t
# lb $t3, 0($t0)
#lb $t4, 0($t1)
addi $t3, $t3, 1 #get 1 to compare
slt $t2, $t0, $t1 #if s[0] < t[0] $t2 = 1, else $t2 = 0
bne $t2, $t3, lessthan #if $t2 == 1, jump to lessthan
slt $t2, $t1, $t0 #if t[0] < s[1], $t2 = 1, else $t2 = 0
beq $t2, $t3, greaterthan #if $t2 == 1, jump to greaterthan
sw $zero, 0($v0) #$v0 gets zero
j end
lessthan:
addi $t4, $t4, -1 #$t4 gets -1
sw $t4, 0($v0) #$v0 gets -1
j end #jump to end
greaterthan:
addi $t4, $t4, 1 #$t4 gets 1
sw $t4, 0($v0) #$v0 gets 1
j end #jump to end
end:
jr $ra
# insert_here: given address of front of list in $a0
# and address of string to insert in $a1,
# inserts new linked-list node in front of list;
# returns address of new front of list in $v0
insert_here:
lw $t0, 0($a0) #$t0 get $a0
lw $t1, 0($a1) #$t1 gets $a1
addi $t2, $zero, 8 #$t2 gets 8
sw $t2, 0($a0) #$t2 gets stored into $a0
jal malloc #allocate 1 byte for the memory
move $t3, $v0 #get address of new memory from $v0 and move to $t3
sw $t1, 0($t3) #store the string pointer into bytes 0-3 of the new memory
sw $t0, 4($t3) #store the pointer to the original front of the list
sw $t3, 0($s0) #store the new node into $s0
lw $ra, 0($sp) #pop the register to jump back to off the stack
addi $sp, $sp, 4 #add to the stack
jr $ra #jump back to caller
##################################################
# List routines
#
# insert: given address of front of list in $a0
# and address of string to insert in $a1,
# inserts new linked-list node in appropriate place in list
# ...
# returns address of new front of list in $v0 (which may be same as old)
insert:
addi $sp, $sp, 4 #add space on the stack
sw $ra, 0($sp) #store jump register onto the stack
lw $t9, 0($a0) #load head of the list for later use
lw $t0, 0($a0) #load head of list into $t0
andi $t0, $t0, 240 #bitwise and with 240 (1111 0000) to extract first 4 bits for pointer to string
sw $t0, 0($a0) #store $t0 into $a0 for strcmp call
lb $t6, 0($t0) #get the byte of the first string char in the list
lw $t7, 0($a1) #get address of string
lb $t1, 0($t7) #get the byte of the first char of the string
addi $t3, $zero, 1 #$t3 gets 1
addi $t4, $zero, -1 #$t3 gets -1
alphloop: #be careful in this function may have a bug with front of the list
# slt $t2, $t1, $t0 #if $t1 < $t0, then $t2 = 1, else $t2 = 0
# beq $t2, $t3, put #if
# beq $t2, $zero, nextchar
jal strcmp #compare the strings in $a0 and $a1
move $t5, $v0 #move the value returned from strcmp into $t5
beq $t5, $t4, put #if $t5 = -1, then value is less and then put new string at head of list
beq $t5, $t3, nextstring #if $t5 = 1, then the head of the list is larger than the string and go to next string
beq $t5, $zero, close #check if it is zero, if so it is already in the list so step out
nextstring:
lw $t2, 0($a0) #store pointer to next node in $t2
andi $t8, $t9, 15 #get address of next node string
beq $t8, $zero, put #if it points to null then add node at the end
sw $t8, 0($a0) #store into $a0
j alphloop #check against the next string in loop
put:
li $t5, 8 #$t5 gets 8
move $a0, $t5 #$t5 moved into $a0
jal malloc #allocate size for node
move $t5, $v0 #move address returned by malloc to $t5
sw $a1, 0($t5) #store $a1 into address allocated
beq $t2, $zero, front #node is at front of the list, so there is no need to update pointer
sw $t2, 4($t5) #store pointer to current node into new node
addi $t0, $a0, -8 #subtract from the current node back one
sw $t5, 0($t0) #store new pointer into the node
jr $ra
front:
sw $t5, 0($s0) #make global reference to front of the node the new node if its at the front
close:
jr $ra #jump back
# print_list: given address of front of list in $a0
# prints each string in list, one per line, in order
print_list:
addi $sp, $sp, -8
sw $ra, 0($sp)
sw $s0, 4($sp)
move $s0, $a0
beq $s0, $zero, Exit_print_list
Loop_print_list:
lw $a0, 0($s0)
jal print_string
jal print_newline
lw $s0, 4($s0) # node = node->next
bne $s0, $zero, Loop_print_list
Exit_print_list:
lw $s0, 4($sp)
lw $ra, 0($sp)
addi $sp, $sp, 8
jr $ra
##################################################
# Pseudo-standard library routines:
# wrappers around SPIM/MARS syscalls
#
# assumes buffer to read into is in $a0, and max length is in $a1
get_string:
li $v0, 8
syscall
jr $ra
# malloc: takes one argument (in $a0) which indicates how many bytes
# to allocate; returns a pointer to the allocated memory (in $v0)
malloc:
li $v0, 9 # SPIM/MARS code for "sbrk" memory allocation
syscall
jr $ra
# print_newline: displays newline to standard output
print_newline:
li $v0, 4
la $a0, STR_NEWLINE
syscall
jr $ra
# print_string: displays supplied string (in $a0) to standard output
print_string:
li $v0, 4
syscall
jr $ra
Where should I go from here? My code assembles but does not do anything after reading in two inputs.
You did a great job with your level of commenting, style, and the program layout. Some of the best I've seen on SO. Overall, a good effort.
However, there were bugs.
I've produced an annotated version of your code and a refactored one. See below.
There were a number of bugs throughout the code, not related to the struct or insert code [there were at least 26 such bugs].
A frequent one was often repeating a wrong single instruction. This was often wanting to set a register with a constant. Your code used (e.g.) addi $t3,$t3,7
instead of the correct addi $t3,$zero,7
. I replaced those with li $t3,7
. In a few places, you did use the correct version, so I'd call this a "typo" bug, but there were a lot of them.
Your strcmp
only compared the first character and not the whole string.
The actual insertion code was a bit convoluted and much more complicated than it needed to be (e.g. insert_here
wasn't used). It also had some severe logic/implementation bugs. You were on the right track, but, after fixing so many other unrelated bugs, I decided to rewrite it rather than try to patch it.
Here's the annotated version [stripped down for SO space limits] where I fixed most of the one line bugs [annotated with "BUG"], but the code is still not yet runnable and no fixes to the struct/insert logic. I tried to remain faithful to your original code [please pardon the gratuitous style cleanup]:
main:
# BUGBAD: this is the list pointer but it is _never_ set to a non-null
# value but things get stored relative to it
li $s0,0 # initialize the list to NULL
Loop_main:
la $a0,STR_ENTER
jal print_string
jal read_string
move $s1,$v0
# BUG: trim uses and trashes a0 but strlen needs the original value
jal trim
jal strlen
addi $t0,$zero,2
beq $v0,$t0,Exit_loop_main
# BUG: this strcmp serves _no_ purpose
jal strcmp
jal insert
# replace newline with null terminator
# ...
# check string length; exit loop if less than 2
# ...
# insert string into list
# ...
# reassign front of list
j Loop_main
Exit_loop_main:
move $a0,$s0
jal print_list
jal print_newline
li $v0,10
syscall
read_string:
addi $sp,$sp,-8
sw $ra,0($sp)
sw $s0,4($sp)
# BUG: this does _not_ set t0 = 0
###add $t0,$t0,$zero # $t0 gets 0
li $t0,0
# BUGFIX
# BUG: MAX_STR_LEN should be a _constant_ (e.g. 80) but this is an _address_
###la $t1,MAX_STR_LEN # $a0 gets MAX_STR_LEN
lw $t1,MAX_STR_LEN # $a0 gets MAX_STR_LEN
# BUGFIX
lw $a0,0($t1) # move MAX_STR_LEN from $t1 into $a0
jal malloc # allocate space for string
move $a0,$v0 # move pointer to allocated memory to $a0
# BUG: this does _not_ set t1 = 0
###add $t1,$t1,$zero # get zero
li $t1,0 # get zero
# BUGFIX
move $a1,$t1 # move zero to a1
# BUG: this does not set a1 = 50
###la $a1,MAX_STR_LEN # $a1 gets MAX_STR_LEN
lw $a1,MAX_STR_LEN # $a1 gets MAX_STR_LEN
# BUGFIX
jal get_string # get the string into $v0
lw $s0,4($sp)
lw $ra,0($sp)
addi $sp,$sp,8
jr $ra
# trim: modifies string stored at address in $a0 so that
# first occurrence of a newline is replaced by null terminator
trim:
# NOTE: using hex for this would be better (e.g. 0x0A)
li $t0,10 # $t1 gets 10, ASCII value for newline
strloop:
lb $t1,0($a0) # get byte of char of string and loop
beq $t1,$t0,replace # if $a0 = go to replace
# BUG: the increment should be 1
###addi $a0,$a0,8 # increment $a0 by 8 to piont to first bit of next char
addi $a0,$a0,1 # increment $a0 by 1 to point to next char
# BUGFIX
j strloop # jump back to beginning
replace:
# BUG: this does _not_ set t2 to 0
###add $t2,$t2,$zero # $t2 is set to zero, ASCII value for null terminator
li $t2,0 # t2 = zero, ASCII value for EOS
# BUGFIX
sb $t2,0($a0) # $t2 is stored into byte at $a0
jr $ra # jump back to caller
# strlen: given string stored at address in $a0
# returns its length in $v0
strlen:
# BUG: this does _not_ set t0 to zero
###add $t0,$t0,$zero # $t0 gets zero
li $t0,0
# BUGFIX
lenloop:
lb $t1,0($a0) # get the first byte for first char in $a0
beq $t1,$zero,exitline # if $t1 == 0 (null terminator), exit
# BUG: the increment here is wrong -- it should be 1
###addi $a0,$a0,8 # else, increment to next byte of string for next char
addi $a0,$a0,4 # else, increment to next byte of string
# BUGFIX
addi $t0,$t0,1 # increment $t0 for each char in string
j lenloop # jump back up to loop
exitline:
# BUG: this stores the length at the _address_ pointed to in v0
###sw $t0,0($v0) # store $t0 into $v0 to return lenght of string
move $v0,$t0
# BUGFIX
jr $ra # jump back to caller
# BUG: this only compares the first character
# strcmp: given strings s, t stored at addresses in $a0, $a1
# returns -1 if s < t; 0 if s == t, 1 if s > t
strcmp:
lb $t0,0($a0) # get byte of first char in string s
lb $t1,0($a1) # get byte of first char in string t
# lb $t3, 0($t0)
# lb $t4, 0($t1)
# BUG: this does not set t3 = 1
###addi $t3,$t3,1 # get 1 to compare
li $t3,1
# BUGFIX
slt $t2,$t0,$t1 # if s[0] < t[0] $t2 = 1, else $t2 = 0
bne $t2,$t3,lessthan # if $t2 == 1, jump to lessthan
slt $t2,$t1,$t0 # if t[0] < s[1], $t2 = 1, else $t2 = 0
beq $t2,$t3,greaterthan # if $t2 == 1, jump to greaterthan
# BUG: this does not set v0 = 0
###sw $zero,0($v0) # $v0 gets zero
li $v0,0
# BUGFIX
j end
lessthan:
# BUG: this does _not_ set t4 = -1
###addi $t4,$t4,-1 # $t4 gets -1
li $t4,-1
# BUGFIX
# BUG: this does not set v0
###sw $t4,0($v0) # $v0 gets -1
move $v0,$t4
# BUGFIX
j end # jump to end
greaterthan:
# BUG: this does _not_ set t4 = 1
###addi $t4,$t4,1 # $t4 gets 1
li $t4,1
# BUGFIX
# BUG: this does not set v0
###sw $t4,0($v0) # $v0 gets 1
move $v0,$t4
# BUGFIX
j end # jump to end
end:
jr $ra
# BUG: the front of the list is _always_ s0
# insert: given address of front of list in $a0
# and address of string to insert in $a1,
# inserts new linked-list node in appropriate place in list
# ...
# returns address of new front of list in $v0 (which may be same as old)
insert:
# BUG: should be -4
###addi $sp,$sp,4
addi $sp,$sp,-4
# BUGFIX
sw $ra,0($sp)
lw $t9,0($a0) # load head of the list for later use
lw $t0,0($a0) # load head of list into $t0
# BUG: anding a _pointer_ against 0xF0 makes _no_ sense
# NOTE: better to use hex for bit patterns
###andi $t0,$t0,240 # bitwise and with 240 (1111 0000) to extract first 4 bits for pointer to string
# BUGFIX
# BUG: this block of code is on the right track, but, wrong
# storing into a0 (the struct) for strcmp makes _no_ sense
sw $t0,0($a0) # store $t0 into $a0 for strcmp call
lb $t6,0($t0) # get the byte of the first string char in the list
lw $t7,0($a1) # get address of string
lb $t1,0($t7) # get the byte of the first char of the string
# NOTE: while we can set these here, we're burning two regs across the
# strcmp call -- cleaner to move this below the call
addi $t3,$zero,1 # $t3 gets 1
addi $t4,$zero,-1 # $t3 gets -1
# be careful in this function may have a bug with front of the list
alphloop:
# slt $t2, $t1, $t0 #if $t1 < $t0, then $t2 = 1, else $t2 = 0
# beq $t2, $t3, put #if
# beq $t2, $zero, nextchar
# BUG: strcmp destroys the values of a0 and a1, so the second time through
# here they have bogus values
# BUGBAD: strcmp uses them as pointers to the _strings_ but here, we're using
# a0 as a _struct_ pointer!!!
jal strcmp # compare the strings in $a0 and $a1
move $t5,$v0 # move the value returned from strcmp into $t5
beq $t5,$t4,put # if $t5 == -1, then value is less and then put new string at head of list
beq $t5,$t3,nextstring # if $t5 == 1, then the head of the list is larger than the string and go to next string
beq $t5,$zero,close # check if it is zero, if so it is already in the list so step out
nextstring:
lw $t2,0($a0) # store pointer to next node in $t2
# NOTE: use hex for bit masks (e.g. 0x0F)
# BUG: this makes no sense
andi $t8,$t9,15 # get address of next node string
beq $t8,$zero,put # if it points to null then add node at the end
sw $t8,0($a0) # store into $a0
j alphloop # check against the next string in loop
put:
# NOTE: what is 8??? obviously, it's the size in bytes of a node, so the
# comment should say that
li $t5,8 # $t5 gets 8
move $a0,$t5 # $t5 moved into $a0
jal malloc # allocate size for node
move $t5,$v0 # move address returned by malloc to $t5
sw $a1,0($t5) # store $a1 into address allocated
beq $t2,$zero,front # node is at front of the list, so there is no need to update pointer
sw $t2,4($t5) # store pointer to current node into new node
addi $t0,$a0,-8 # subtract from the current node back one
sw $t5,0($t0) # store new pointer into the node
jr $ra
front:
sw $t5,0($s0) # make global reference to front of the node the new node if its at the front
close:
jr $ra
Here's the cleaned up, refactored, corrected, and runnable code.
A few things I recommend:
(1) For labels within a function, to avoid conflicts with other functions, the labels should be prefixed with the function name (e.g. for your trim
function [which I renamed to nltrim
], you had a label strloop
[which I renamed to nltrim_loop
])
(2) Comments should describe intent in real world terms and not just describe the actual asm instruction.
I realize you're just starting out, but (e.g.) this:
addi $t3,$zero,7 # sets the value of $t3 to 7
Should be replaced with something more descriptive:
addi $t3,$zero,7 # count = 7
(3) The general rule is to put a sidebar comment on every line [which you did]. It's what I do, mostly. But, for some boilerplate, that is well understood, the comments may be overkill [and may actually interfere with readability].
For example, the code that establishes a stack frame for a function and the code that restores from that frame at function exit is well understood. So, maybe a single block comment at the top like # set up stack frame
for the few lines and # restore from stack frame
at the bottom rather comments on each inst
(4) Keep sidebar comments short so that they fit in 80 columns. If you need more, elevate the comment to a full line block comment above the instruction [and use as many lines as you wish]
(5) For difficult/complex stuff, it's okay to prototype using pseudo-code, or actual C [or a language of your choice]. It's a reasonable assumption that anyone writing [or reading] asm code is familiar with at least one high level language [C being the most likely].
For the struct code, I added a C struct definition in a top block comment. In the insert
routine, I added the C pseudo-code in the top comment block. The sidebar comments for the asm often referred to the symbols and actions in the pseudo code
Actually, doing this prototyping even for simpler functions has benefits [even if you don't add the code as comments]. (e.g.) It may have helped when writing your strcmp
(6) Keep the code as simple as possible. When the code is needlessly complicated, it makes it very easy to introduce bugs in your program logic or use incorrect instructions to implement that logic. It also makes it difficult to spot these bugs later.
(e.g.) In certain cases, your code was loading a register, only to have to move it later. Thus, using 2-3 insts where only one was necessary. Try to minimize unnecessary register movement [not just for speed, but simpler code].
For example, your strcmp
had 24 lines, and only compared the first char [i.e. a bug], and had several branches. My version was only 12 lines, did the full string compare, and was a simple loop.
Likewise, in your insertion code, insert_here
[not used] was 17 lines and insert
was 47 lines for a total of 64. My working version was 31 lines.
Note: I used the .eqv
pseudo-op to "define" struct offsets. I use mars
and this works for it but I don't know if spim
supports .eqv
. You can always hard code the offsets, but it makes the code less readable and prone to error. With structs that have [say] 10 elements, some form of this is quite handy. Most other assemblers have some form of .eqv
equivalent.
Anyway, here's the code:
# Global symbols
# struct node {
# struct node *node_next;
# char *node_str;
# };
.eqv node_next 0
.eqv node_str 4
.eqv node_size 8 # sizeof(struct node)
# NOTE: we don't actually use this struct
# struct list {
# struct node *list_head;
# struct node *list_tail;
# };
.eqv list_head 0
.eqv list_tail 4
# string routines
.globl read_string
.globl strcmp
.globl strlen
.globl nltrim
# list routines
.globl insert
.globl print_list
.globl main
# pseudo-standard library
.globl get_string
.globl malloc
.globl print_newline
.globl print_string
# Constants
.data
MAX_STR_LEN: .word 50
STR_NEWLINE: .asciiz "\n"
STR_ENTER: .asciiz "enter a string: "
# global registers:
# s0 -- list head pointer (list_head)
# Code
.text
# main: repeatedly gets strings from user and enters them in list
# until a string of length less than two is entered;
# prints list in order when done
#
main:
li $s0,0 # list_head = NULL
main_loop:
# prompt user for string
la $a0,STR_ENTER
jal print_string
# read in string from user
jal read_string
# save the string pointer as we'll use it repeatedly
move $s1,$v0
# strip newline
move $a0,$s1
jal nltrim
# get string length and save the length
move $a0,$s1
jal strlen
# stop if given empty string
blez $v0,main_exit
# insert the string
jal insert
j main_loop
main_exit:
move $a0,$s0
jal print_list
jal print_newline
# exit simulation via syscall
li $v0,10
syscall
##################################################
# String routines
#
# read_string: allocates MAX_STR_LEN bytes for a string
# and then reads a string from standard input into that memory address
# and returns the address in $v0
read_string:
addi $sp,$sp,-8
sw $ra,0($sp)
sw $s0,4($sp)
lw $a1,MAX_STR_LEN # $a1 gets MAX_STR_LEN
move $a0,$a1 # tell malloc the size
jal malloc # allocate space for string
move $a0,$v0 # move pointer to allocated memory to $a0
lw $a1,MAX_STR_LEN # $a1 gets MAX_STR_LEN
jal get_string # get the string into $v0
move $v0,$a0 # restore string address
lw $s0,4($sp)
lw $ra,0($sp)
addi $sp,$sp,8
jr $ra
# nltrim: modifies string stored at address in $a0 so that
# first occurrence of a newline is replaced by null terminator
nltrim:
li $t0,0x0A # ASCII value for newline
nltrim_loop:
lb $t1,0($a0) # get next char in string
beq $t1,$t0,nltrim_replace # is it newline? if yes, fly
beqz $t1,nltrim_done # is it EOS? if yes, fly
addi $a0,$a0,1 # increment by 1 to point to next char
j nltrim_loop # loop
nltrim_replace:
sb $zero,0($a0) # zero out the newline
nltrim_done:
jr $ra # return
# strlen: given string stored at address in $a0
# returns its length in $v0
#
# clobbers:
# t1 -- current char
strlen:
move $v0,$a0 # remember base address
strlen_loop:
lb $t1,0($a0) # get the current char
addi $a0,$a0,1 # pre-increment to next byte of string
bnez $t1,strlen_loop # is char 0? if no, loop
subu $v0,$a0,$v0 # get length + 1
subi $v0,$v0,1 # get length (compensate for pre-increment)
jr $ra # return
# strcmp: given strings s, t stored at addresses in $a0, $a1
# returns <0 if s < t; 0 if s == t, >0 if s > t
# clobbers: t0, t1
strcmp:
lb $t0,0($a0) # get byte of first char in string s
lb $t1,0($a1) # get byte of first char in string t
sub $v0,$t0,$t1 # compare them
bnez $v0,strcmp_done # mismatch? if yes, fly
addi $a0,$a0,1 # advance s pointer
addi $a1,$a1,1 # advance t pointer
bnez $t0,strcmp # at EOS? no=loop, otherwise v0 == 0
strcmp_done:
jr $ra # return
# insert: inserts new linked-list node in appropriate place in list
#
# returns address of new front of list in $s0 (which may be same as old)
#
# arguments:
# s0 -- pointer to node at front of list (can be NULL)
# s1 -- address of string to insert (strptr)
#
# registers:
# s2 -- address of new node to be inserted (new)
# s3 -- address of previous node in list (prev)
# s4 -- address of current node in list (cur)
#
# clobbers:
# a0, a1 (from strcmp)
#
# pseudo-code:
# // allocate new node
# new = malloc(node_size);
# new->node_next = NULL;
# new->node_str = strptr;
#
# // for loop:
# prev = NULL;
# for (cur = list_head; cur != NULL; cur = cur->node_next) {
# if (strcmp(new->node_str,cur->node_str) < 0)
# break;
# prev = cur;
# }
#
# // insertion:
# new->node_next = cur;
# if (prev != NULL)
# prev->node_next = new;
# else
# list_head = new;
insert:
addi $sp,$sp,-4
sw $ra,0($sp)
# allocate a new node -- do this first as we'll _always_ need it
li $a0,node_size # get the struct size
jal malloc
move $s2,$v0 # remember the address
# initialize the new node
sw $zero,node_next($s2) # new->node_next = NULL
sw $s1,node_str($s2) # new->node_str = strptr
# set up for loop
li $s3,0 # prev = NULL
move $s4,$s0 # cur = list_head
j insert_test
insert_loop:
lw $a0,node_str($s2) # get new string address
lw $a1,node_str($s4) # get current string address
jal strcmp # compare them -- new < cur?
bltz $v0,insert_now # if yes, insert after prev
move $s3,$s4 # prev = cur
lw $s4,node_next($s4) # cur = cur->node_next
insert_test:
bnez $s4,insert_loop # cur == NULL? if no, loop
insert_now:
sw $s4,node_next($s2) # new->node_next = cur
beqz $s3,insert_front # prev == NULL? if yes, fly
sw $s2,node_next($s3) # prev->node_next = new
j insert_done
insert_front:
move $s0,$s2 # list_head = new
insert_done:
lw $ra,0($sp)
addi $sp,$sp,4
jr $ra
# print_list: given address of front of list in $a0
# prints each string in list, one per line, in order
print_list:
addi $sp,$sp,-8
sw $ra,0($sp)
sw $s0,4($sp)
beq $s0,$zero,print_list_exit
print_list_loop:
lw $a0,node_str($s0)
jal print_string
jal print_newline
lw $s0,node_next($s0) # node = node->node_next
bnez $s0,print_list_loop
print_list_exit:
lw $s0,4($sp)
lw $ra,0($sp)
addi $sp,$sp,8
jr $ra
# Pseudo-standard library routines:
# wrappers around SPIM/MARS syscalls
#
# assumes buffer to read into is in $a0, and max length is in $a1
get_string:
li $v0,8
syscall
jr $ra
# malloc: takes one argument (in $a0) which indicates how many bytes
# to allocate; returns a pointer to the allocated memory (in $v0)
malloc:
li $v0,9 # SPIM/MARS code for "sbrk"
syscall
jr $ra
# print_newline: displays newline to standard output
print_newline:
li $v0,4
la $a0,STR_NEWLINE
syscall
jr $ra
# print_string: displays supplied string (in $a0) to standard output
print_string:
li $v0,4
syscall
jr $ra
UPDATE:
I agree with the line-by-line comments, as it helps me understand exactly what registers I intended to access(or messup in the previous case).
The rationale is simple. Imagine the converse: a large [5000 line] asm file with no comments that is known to have a bug. You can't trust the logic/algorithm [it may have the bug]. You can't trust the implementation [it may have the bug]. Whenever I get a case like that, I add the comments as I've described before even looking for the bug (i.e. I'm learning the algorithm and the code as I do this).
This is doing a code review. I'll do it even if the file already has comments [as yours did]. I often do this review for code I've just written that I think is "complete" [i.e. all code that needs to be written, has been].
If I finish late in the day, I'll do the review as the first thing next day. Often, "sleeping on it" makes it easy to spot bugs that I wouldn't have found in a review on the prior day [because I was still "too close" to the problem]. If what I'm writing is going to take multiple days, I always review the prior day's work as the first thing.
Even with your original comments that were like (2), it helped me find your "typo" bugs. When I saw add $t1,$t1,$zero #get zero
the comment mismatch made it easy to find/fix. It would have 10x harder stepping through the code with a debugger.
Comments always help. When originally coding insert
, I had:
jal strcmp # compare them -- new < cur?
bgtz $v0,insert_now # if yes, insert after prev
I got high-to-low output.
At first, I suspected strcmp
as it was [equally] new code. I usually review the lower level function before I review the call(s) to it. I did a code review of strcmp
and it seemed fine. But, I still wasn't convinced. I wrote some diagnostic code [unit tests] for strcmp
, but it passed.
Then, I noticed the comments vs the code in insert
above. The fix was easy: Change bgtz
to bltz
.
Another good rule is: Don't break [introduce a bug into] existing [working] code.
When I first reviewed print_list
, I thought: "it's using [and trashing] s0". This was okay because after calling it, the program was terminating. But, not if it were called multiple times. I had missed the fact that you were saving/restoring s0 on the stack [which I realized on the second reading].
Its refreshing to see an experienced be so encouraging to a newbie like me
We've all been newbies at some point. No harm, no foul.
Even veteran programmers create bugs [sometimes, multiple ones / day]. Bugs are not an indictment of one's soul/character. Bugs are just a normal by-product of being a programmer [just as finding/fixing them is].
IMO, the keys are:
(1) Willingness to learn
(2) Admit mistakes (e.g.) President Kennedy [after the "Bay of Pigs"]: "Mistakes are not errors, if we admit them"
(3) Most importantly, leave ego out of it.
The weakest programmers, when a bug [or suspected bug] is reported are the ones that:
(1) Say that their code is working [without even checking to see if the bug report has merit].
(2) Deny the bug as not really a bug [when it is]
(3) Refuse to generate [or accept] test cases that can/do prove/disprove a bug
(4) Be [deliberately] "slow" to produce the bug fix [it's not as much fun as new code]
(5) During slack time, refuse to "clean up" code that is working, but is also poorly structured [and should be refactored]
(6) Treat customers with disdain [i.e. "they're all idiots"]
IMO, good programmers, on the other hand, are proactive when it comes to bugs [or potential bugs].
At one company I was at, we had a shipping product [a realtime video processing platform] that had no apparent bugs. We were in slack time. But, my boss [who was also a good programmer] and I had a suspicion about some code. No bug report, no hard evidence, just a hunch. So, we did a review.
Sure enough, there was a bug. But, it would only trigger for an obscure edge case. We believed that it was too obscure for a customer to ever see it, as we had never actually seen it ourselves in our lab despite all our test video clips. We only found it by the code review. But, "bugs are bugs", so I started the fix.
About two weeks later, the customer support rep for our major client comes in with a report of some intermittent distortion in the video they were seeing [about once every 2-3 days].
Could this distortion come from the bug I was working on? I didn't have my full fix yet, but, by that time, I did have a unit test that could generate the bug. In the lab, I triggered it for the rep. He said: "Yes, that's it--the distortion the client is seeing"
The bug had been introduced about 3 months earlier. The client just had different video than we had, that made the bug occurrence much more likely.
By being proactive, we were able to [honestly] tell the client that we were "already on top of it" and we shortened the turnaround time for the fix to the client by two weeks. Both of which, endeared us to them [i.e. they bought more gear from us].