int foo (int a1, int a2, int a3, int a4, int a5, int a6, int a7)
{
return boo (a7, a6, a5, a3, a4) + coo (a2, a3) + a1;
}

When foo gets called the stack looks as follows:

+0	a5
+4	a6
+8	a7

First we look whether foo() calls any other functions. Since it does we need to save and restore ra at the prologue and epilogue respectively:

foo: addi sp, sp, -4
stw ra, 0(sp)

…

epi:        ldw         ra, 0(sp)
                addi       sp, sp, 4
                ret

Now it’s time to look at foo’s main body. This can be rewritten as follows to illustrate the sequence in which the various actions should take place:

int foo (int a1, int a2, int a3, int a4, int a5, int a6, int a7)
{
                tmp1 = boo (a7, a6, a5, a3, a4);
                tmp2 = coo (a2, a3)
                return tmp1 + tmp2 + a1;
}

Notice that the value for parameter a1 has to “survive” the calls to boo and coo. So a1 has to be pushed on the stack before the call to boo and be restore from the stack after the call to coo returns. So the structure of foo is as follows:

foo:        push ra
                push a1
                code to call boo
                code to call coo
                pop a1
                calculate return value (tmp1 + tmp2 + a1)
                pop ra
                ret

Here’s the code for foo with the saving and restoring of a1:

foo: addi sp, sp, -4
stw ra, 0(sp)

addi sp, sp, -4
stw r4, 0(sp) # r4 contains a1

…

# tmp1 = tmp1 + tmp2
ldw         r4, 0(sp)               # restore r4’s original value (a1) from the stack
addi       sp, sp, 4

# calculate tmp1 + a1

epi:        ldw         ra, 0(sp)
                addi       sp, sp, 4
                ret

Now let’s look at the two calls:

tmp1 = boo (a7, a6, a5, a3, a4);
tmp2 = coo (a2, a3) ;

Notice that a2 and a3 are used *after* the call to boo. These two values are, upon entry to foo, in r5 and r6. So, we must explicitly preserve them as they are in *caller-saved* registers. This is what we did before for a1 which was stored in r4.

Here’s the code for foo with the saving and restoring of a2 and a3:

foo: addi sp, sp, -4
stw ra, 0(sp)

addi sp, sp, -4
stw r4, 0(sp) # r4 contains a1

                addi       sp, sp, -8              # save r5 and r6 (a2 and a3) on the stack
                stw         r5, 0(sp)
                stw         r6, 4(sp)

#code for tmp1= boo (a7, a6, a5, a3, a4);

ldw         r5, 0(sp)               # restore r5 and r6 from the stack
ldw         r6, 4(sp)
addi       sp, sp, 8

stack1:

# code for tmp2 = coo (a2, a3) ;

# tmp1 = tmp1 + tmp2
ldw r4, 0(sp) # restore r4’s original value (a1) from the stack
addi sp, sp, 4

# calculate tmp1 + a1

epi:        ldw         ra, 0(sp)
                addi       sp, sp, 4
                ret

Just after the push of r5 and r6 (label stack1:) the stack looks like this:

+0	a2
+4	a3
+8	a1
+12	ra
+16	a5
+20	a6
+24	a7

Let’s implement the call to boo (a7, a6, a5, a3, a4). We need to pass the first four parameters in registers r4 through r7 and the firth on the stack. At this point r4=a1, r5=a2, r6=a3, and r7=a4. The rest of the parameters to foo are on the stack as shown before. To call boo, we need to have r4=a7, r5=a6, r6=a5, r7=a3 and on the top of the stack we should have a4. Here’s the code:

addi       sp, sp, -4
stw         r7, 0(sp)               # pass a4 as the fifth parameter to boo
ldw         r4, 28(sp)             # r4 = a7 which is now at distance 24+4 from the top of the stack
ldw         r5,24(sp)              # r5 = a6 +24 from the top of the stack
add        r7, r6, r0               # r7 = a3 which in r6, this has to happen before the next instruction
                                                # otherwise we lose a3
ldw         r6, 20(sp)             # r6 = a5
call          boo
add        sp, sp, 4                # pop the fifth argument – not needed anymore

Now we need to call coo (a2, a3). At this point the stack looks as shown before. A2 is at the top of the stack and a3 is at distance +4. We have one more thing to take care of. We just got a return value from the call to boo in r2. R2 will be overwritten by the call to coo. Hence we got to preserve it. We do that around the call to coo:

addi       sp, sp, -4
stw         r2, 0(sp)               # push the return value of boo onto the stack
                                                # that’s tmp1
add        r4, r5, r0               # r4 = a2, the first parameter to pass to coo
add        r5, r6, r0               # r5 = a3
call          coo
# calculate tmp1+=t mp2, we’ll keep the result in r2
ldw         r4, 0(sp)               # restore tmp1
add        r2, r2, r4               # tmp1 + tmp2
addi       sp, sp, 4                # pop the return value of boo from the stack

Here’s the complete code for foo:

foo: addi sp, sp, -4
stw ra, 0(sp)

addi sp, sp, -4
stw r4, 0(sp) # r4 contains a1

                addi       sp, sp, -8              # save r5 and r6 (a2 and a3) on the stack
                stw         r5, 0(sp)
                stw         r6, 4(sp)

#code for tmp1= boo (a7, a6, a5, a3, a4);
addi       sp, sp, -4
stw         r7, 0(sp)               # pass a4 as the fifth parameter to boo
ldw         r4, 28(sp)             # r4 = a7 which is now at distance 24+4 from the top of the stack
ldw         r5,24(sp)              # r5 = a6 +24 from the top of the stack
add        r7, r6, r0               # r7 = a3 which in r6, this has to happen before the next instruction
                                                # otherwise we lose a3
ldw         r6, 20(sp)             # r6 = a5
call          boo
add        sp, sp, 4                # pop the fifth argument – not needed anymore

ldw         r5, 0(sp)               # restore r5 and r6 from the stack
ldw         r6, 4(sp)
addi       sp, sp, 8

# code for tmp2 = coo (a2, a3) ;

# tmp1 = tmp1 + tmp2
ldw         r4, 0(sp)               # restore r4’s original value (a1) from the stack
addi       sp, sp, 4

# calculate tmp1 + a1
add        r2, r2, r4               # r2 += a1

epi:        ldw         ra, 0(sp)
                addi       sp, sp, 4
                ret

This code does step-by-step adjustments on the stack any time it needs to push or pop values. Now that we wrote foo we can determine what is the maximum number of elements we push on the stack and aggregate these changes. We needed space for saving ra, a1, a2, a3, passing a firth argument to boo, and saving the return value of boo. We can accommodate all these by allocating space for six words on the stack. Here’s one possible allocation for the stack frame:

+0	5^th arg to boo
+4	ret value of boo
+8	a3
+12	a2
+16	a1
+20	ra
When foo gets called stack points here +24	a5
+28	a6
+32	a7

Here’s the code for foo assuming this stack frame. It’s much easier to follow:

foo:        addi       sp, sp, 24
                stw         ra, 20(sp)
                stw         r4, 16(sp)
                stw         r5, 12(sp)
                stw         r6, 8(sp)

                # call boo (a7, a6, a5, a3, a4).
                stw         r7, 0(sp)               # pass a4 as the fifth argument
                ldw         r4, 32(sp)             # a7 is the first argument
                ldw         r5, 28(sp)             # a6 is the second
                ldw         r6, 24(sp)             # a5 is the third
                ldw         r7, 8(sp)               # a7 is the fourth (we could have moved from r6 just before the
                                                                # previous load
                call          boo
                stw         r2, 4(sp)               # save boo’s return value

                # call coo (a2, a3)
                ldw         r4, 12(sp)
                ldw         r5, 8(sp)
                call          coo

                # calculate return value
                ldw         r4, 4(sp)               # return value of boo
                                                                # using r4 as temporary, it’s caller-saved so no need to do anything
                add        r2, r2, r4               # add to return value of coo
                ldw         r4, 16(sp)             # get a1
                add        r2, r2, r4

epi:        ldw         ra, 20(sp)
                addi       sp, sp, -24
                ret

Further reduction in instruction count is possible. For example, we could use callee-saved registers to preserve a1, a2, and a3. While that would reduce instruction count, it could also make the code harder to follow. So, it’s debatable whether that would be good programming practice (assuming that performance is satisfactory to start with).