ARM Assembly

ARM Functions and User Input
It's time to learn more about stack management in ARM assembly by creating a program with function calls and user input. Examine the following source code:
.section .rodata
	// Linux Syscall constants
	STDIN  = 0x00
	STDOUT = 0x01
	EXIT   = 0x01
	READ   = 0x03
	WRITE  = 0x04

	ERR_INVALID_INPUT = 0x01
	ERR_BUFF_OVERFLOW = 0x02

	// Valid ACII values for decimal numbers
	b_MIN_ASCII = 0x30
	b_MAX_ASCII = 0x39

	// Termination character
	b_NEWLINE = 0x0a

	// Input buffer size
	b_BUFFER_SIZE = 0x08

	.section .data

	// String variables used to prompt user and show output
first_num_msg:
	.ascii "Enter the first number to add: "
	len_first_num_msg = ( . - first_num_msg)

second_num_msg:
	.ascii "Enter the second number to add: "
	len_second_num_msg = ( . - second_num_msg)

sum_msg:
	.ascii "The sum is: "
	len_sum_msg = ( . - sum_msg)

invalid_msg:
	.ascii "ERROR: Invalid input detected\n"
	len_invalid_msg = ( . - invalid_msg)

overflow_msg:
	.ascii "ERROR: Buffer overflow detected\n"
	len_overflow_msg = ( . - overflow_msg)

	.section .bss
/* The block started by symbol (bss) section stores unitialized variables.
					They will be zero initialized in memory, so we will start with clean buffers */
first_number_buffer:
	.skip b_BUFFER_SIZE

second_number_buffer:
	.skip b_BUFFER_SIZE

	.section .text
	.global  _start

_start:
/*
	Prompt user to enter integers numbers, add them, and print the result
*/

	movw r4, #0xbeef  // Load the lower-half (16-bits) of r4
	movt r4, #0xdead  // Load the upper-half of r4
	movw r5, #0xbabe
	movt r5, #0xdeed
	movw r6, #0xface
	movt r6, #0xcafe
	movw r7, #0xdeaf
	movt r7, #0xfade
	movw r8, #0xbabe
	movt r8, #0xbead
	movw r9, #0xface
	movt r9, #0xdeaf
	movw r10, #0xbade
	movt r10, #0xcade

/*
	The  above instructions set all the variable registers 
	this is done only for demonstration purposes to provide
	 easy data to view on the stack when debugging. 
	The movw and movt instructions are used because ARM32 cannot load 
	some 32-bit constants into registers with a single instruction,
	so you must load the lower and upper haves separately.
*/
prompt_for_first_number:
	ldr r7, =WRITE
	ldr r0, =STDOUT
	ldr r1, =first_num_msg
	ldr r2, =len_first_num_msg
	svc #0

get_first_number:
	ldr  r0, =first_number_buffer
	ldr  r1, =b_BUFFER_SIZE
	bl   get_number               // r0: buffer address r1: buffer length --> r0: unsigned integer
	push {r0}                     // Save the first number to the stack
prompt_for_second_number:
	ldr r7, =WRITE
	ldr r0, =STDOUT
	ldr r1, =second_num_msg
	ldr r2, =len_second_num_msg
	svc #0

get_second_number:
	ldr  r0, =second_number_buffer
	ldr  r1, =b_BUFFER_SIZE
	bl   get_number
	push {r0}                      // Save the second number to the stack

print_sum_msg:
	ldr r7, =WRITE
	ldr r0, =STDOUT
	ldr r1, =sum_msg
	ldr r2, =len_sum_msg
	svc #0

	pop {r0, r1}  // Pop both numbers off the stack
	bl  print_sum // r0: unsigned integer r1: unsigned integer --> void

exit_normally:
	ldr r7, =EXIT
	mov r0, #0
	svc #0

exit_with_invalid_error:
	ldr r7, =WRITE
	ldr r0, =STDOUT
	ldr r1, =invalid_msg
	ldr r2, =len_invalid_msg
	svc #0

	ldr r7, =EXIT
	ldr r0, =ERR_INVALID_INPUT
	svc #0

exit_with_overflow_error:
	ldr r7, =WRITE
	ldr r0, =STDOUT
	ldr r1, =overflow_msg
	ldr r2, =len_overflow_msg
	svc #0

	ldr r7, =EXIT
	ldr r0, =ERR_BUFF_OVERFLOW
	svc #0

get_number:
/*
	purpose: 
	read a natural number from user input

	usage:
	arg0 (r0) the memory address to store ASCII input from STDIN
	arg1 (r1) the size of the memory buffer to store the input

	returns:
	r0: 32-bit positive integer

	error handling: Invalid input will result in no return and a program exit with error code 0x1
	Only characters 0123456789 \n (0x0a) and (0x00) are valid 
*/

	push {fp, lr} // Preserve the caller's frame pointer and link register (previous pc)
	mov  fp, sp   // Set the frame pointer to the current stack pointer
	push {r4-r10} // Preserve the caller's variable registers

	// r1 and r0 are scratch registers that will get cloberred by the read syscall's arguments so we need to preserve them
	push {r1} // Store r1 on the stack, which is the length of our buffer
	push {r0} // Store r0 on the stack, which is the memory address we will save our input to

	ldr r7, =READ
	ldr r0, =STDIN
	pop {r1}       // r1=r0 from stack, this sets the address for the read syscall to our function's arg0 input
	pop {r2}       // r2=r1 from stack, this sets the size of the input buffer for the read syscall to our functions arg1 input

	svc #0

	cmp r0, r2              // Compare the number of bytes read to our buffer (r0) to the size of our buffer (r2)
	bge if_newline_check    // A full buffer should always end with a newline character
	b   endif_newline_check

if_newline_check:
	sub  r6, r2, #0x1                // The byte offset is 1 less than the length
	ldrb r4, [r1, r6]                // Load the last byte from the buffer
	ldr  r5, =b_NEWLINE
	cmp  r4, r5                      // If the last character isn't a newline, then there was a buffer overflow
	bne  if_buffer_overflow_found
	b    endif_buffer_overflow_found

if_buffer_overflow_found:
	b exit_with_overflow_error // Unconditional branch to exit the program with an overflow error

endif_buffer_overflow_found:
endif_newline_check:

	// Buffer length was valid
	mov r0, r1 // Move the buffer address into r0 to pass to validate_input
	mov r1, r2 // Move the buffer length into r1 to pass to validate_input

	bl validate_input // r0: buffer address, r1: buffer length --> r0: 0x0 is valid 0x1 is invalid

	cmp r0, #0x1       // Test for invalid flag
	beq invalid_number // branch to invalid number error handling

valid_number:
	mov r0, r1   // validate_input passes any valid number back in r1, get number passes that back in r0
	pop {r4-r10} // This restores the original values of r4-r11 from the stack
	pop {fp}     // Restore the previous fp to the current fp
	pop {pc}     // This sets the pc to the lr value, so that execution resumes where this function was called from in the caller's function

invalid_number:
	pop {r4-r10, fp}
	b   exit_with_invalid_error // Unconditional branch to exit the program with an invalid input error

validate_input:
/*
	parameters:
	arg0 (r0) the memory address to validate ASCII decimal input from
	arg1 (r1) the size the input memory address buffer

	returns:
	r0: 0x0 for valid decimal number, 
	0x1 for invalid decimal number

	r1: Unchanged if number was invalid, 
	the value of the number if the number was valid
*/

	push {fp, lr} // Preserve the callers fp and pc
	mov  fp, sp   // Set the frame pointer to the current stack pointer
	push {r4-r10} // Preserve the caller's variable registers

/*
	Register use:
	r0: buffer address passed to function    
*/
	mov r3, #0x0 // Loop counter for each byte stored in the input buffer
	mov r6, #0x0 // This will hold a flag which indicates a terminating character was found

	// We need to check that all characters are valid decimal characters, and count them
validate_loop:

	ldrb r4, [r0] // Load one byte from the buffer memory location

	// Newline is a valid termination
	ldr   r5, =b_NEWLINE
	cmp   r4, r5
	moveq r6, #0x1       // Flag the terminiation character if the comparison was equal
	beq   valid

	ldr r5, =b_MAX_ASCII

	// If character is greater than b_MAX_ASCII then it is invalid
	cmp r4, r5
	bgt invalid

	ldr r5, =b_MIN_ASCII

	// If character is less than b_MIN_ASCII then it is invalid
	cmp r4, r5
	blt invalid

	add r3, r3, #0x01     // Increment counter by 1
	cmp r3, r1            // Check if we have looped through all the characters
	bge end_validate_loop // End loop

	add r0, r0, #0x01 // Increment the memory buffer address by 1

	b validate_loop // Continue loop

end_validate_loop:

valid:
convert_to_decimal:
/*
	If all characters were valid, we can convert them to a decimal value.

	Register use:
	r0: buffer address passed to function
	r1: length of buffer passed to function
	r3: counter (starting with actual length)
	r4: current character from buffer
	r5: min ASCII value
	r6: running total
	r7: exponent
	r8: base
	r9: product of base and exponent
	r10: temp var
*/

	cmp r6, #0x1
	beq if_terminating_char
	b   endif_terminating_char

if_terminating_char:
	// Check if a number wasn't entered and only enter was pressed
	cmp r3, #0x0
	beq empty
	sub r0, r0, #0x1 // Point to the previous character before the newline

endif_terminating_char:

	mov r1, r3           // Clobber r1 with the actual length of our number string
	ldr r5, =b_MIN_ASCII // Reset r5 to the minimum ASCII value
	mov r6, #0           // Reset r6 to 0 for the running total
	mov r8, #10          // Set r8 to base 10
	mov r9, #1           // Set r9 to 1 for the first exponent multiplication

	// The first digit doesn't need to be multiplied by the base and exponent
	ldrb r4, [r0]
	sub  r4, r4, r5 // subtract b_MIN_ASCII value from the current character to get the decimal digit
	add  r6, r6, r4 // Add the decimal digit to the running total
	sub  r3, r3, #1 // Decrement our counter by one
	sub  r0, r0, #1 // Decrement our buffer address by one

digit_loop:
	cmp r3, #0
	ble end_digit_loop

	ldrb r4, [r0]   // Load one character from the current memory position from the buffer
	sub  r4, r4, r5 // subtract b_MIN_ASCII value from the current character to get the decimal digit

	sub r10, r1, r3 // Get the exponent value
	mov r7, r10

	// Exponent loop
exponent_loop:
	mul r10, r9, r8       // Find the product of the exponent and base
	mov r9, r10
	sub r7, r7, #1        // Decrement the exponent counter
	cmp r7, #0            // Our exponent will increase for each digit
	ble end_exponent_loop
	b   exponent_loop

end_exponent_loop:
	mul r10, r4, r9 // The new digit value is the product of the exponent product and the digit
	add r6, r6, r10 // Add the digit to the running total
	mov r9, #1      // Reset r9 to 1

	sub r3, r3, #1 // Decrement our counter by one
	sub r0, r0, #1 // Decrement our buffer address by one
	b   digit_loop

end_digit_loop:

	mov r0, #0x0 // Return code of 0 indicates a valid number
	mov r1, r6   // The final running total is passed back as the number

	pop {r4-r10} // Restore variable registers
	pop {fp, pc} // Restore variable fp and resume execution from lr address

invalid:
	mov r0, #0x1 // Return code of 1 indicates an invalid number
	pop {r4-r10} // Restore variable registers
	pop {fp, pc} // Restore variable fp and resume execution from lr address

empty:  // The user entered an empty number
	mov r0, #0x0 // It is valid, but equivalent to zero
	mov r1, #0x0
	pop {r4-r10} // Restore variable registers
	pop {fp, pc} // Restore variable fp and resume execution from lr address

print_sum:
/*
	parameters:
	arg0 (r0) the first number to add
	arg1 (r1) the second number to add

	returns:
	void
*/

	push {fp, lr} // Preserve the callers fp and pc
	mov  fp, sp   // Set the frame pointer to the current stack pointer
	push {r4-r10} // Preserve the caller's variable registers

	add r0, r0, r1 // Adds both numbers and clobbers r0 with the sum
/*
		Variable registers:
		r4: counter
		r5: b_MIN_ASCII / b_NEWLINE
		r6: divisor / newline flag
		r7: quotient 
		r8: divisor * quotient product
		r9: remainder/decimal digit/null pad
		r10: the base address of our string on the stack
 */

	sub sp, sp, #0x0c  // Make room on the stack, 12 bytes can hold 10 characters for a 32-bit integer
	mov r10, sp        // Store the base address of our string
	mov r4, #0x0b      // We are storing little endian, so we need to start at the end of the stack for our loop
	ldr r5, =b_NEWLINE
	mov r6, #10        // Set the divisor to 10

	// Store a newline which will be read last for little endian
	strb r5, [sp, r4]     // Store the newline character on the stack
	sub  r4, r4, #0x1     // Decrement our counter to reflect writing the newline character
	ldr  r5, =b_MIN_ASCII

digit_to_ASCII_loop:

	cmp r0, #0x0
	bne if_more_digits

else_no_more_digits: // Null pad the rest of the string
	mov  r5, #0x00
	strb r5, [sp, r4]
	b    endif_more_digits

if_more_digits:
	sdiv r7, r0, r6   // Divide the hex number by 10
	mul  r8, r7, r6   // Multiply the quotient by 10
	sub  r9, r0, r8   // Find the remainder
	add  r9, r9, r5   // Add b_MIN_ASCII to the remainder to convert it to the ASCII decimal
	strb r9, [sp, r4] // Store the ASCII character on the stack
	mov  r0, r7       // Overwrite the original number with the quotient

endif_more_digits:
	sub r4, r4, #0x1
	cmp r4, #0x0
	bge digit_to_ASCII_loop

print_sum_syscall:
	ldr r7, =WRITE
	ldr r0, =STDOUT
	mov r1, r10       // Set the write address to the base of our string
	mov r2, #0x0c
	svc #00000000
	add sp, sp, #0x0c // Move the stack pointer back

	pop {r4-r10, fp, pc} // Restore the stack and return to the caller