| /* Overlay manager for SPU. |
| |
| Copyright 2006, 2007 Free Software Foundation, Inc. |
| |
| This file is part of the GNU Binutils. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3 of the License, or |
| (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, |
| MA 02110-1301, USA. */ |
| |
| /** |
| * MFC DMA defn's. |
| */ |
| #define MFC_GET_CMD 0x40 |
| #define MFC_MAX_DMA_SIZE 0x4000 |
| #define MFC_TAG_UPDATE_ALL 2 |
| #define MFC_TAG_ID 0 |
| |
| |
| /** |
| * Temporary register allocations. |
| * These are saved/restored here. |
| */ |
| #define tab $75 |
| #define cgbits $75 |
| #define add64 $75 |
| #define ealo $75 |
| #define newmask $75 |
| #define tagstat $75 |
| #define bchn $75 |
| #define rv1 $75 |
| |
| #define off $76 |
| #define off64 $76 |
| #define maxsize $76 |
| #define oldmask $76 |
| #define sz $76 |
| #define lnkr $76 |
| #define rv2 $76 |
| |
| #define cur $77 |
| #define cmp $77 |
| #define buf $77 |
| #define genwi $77 |
| #define tagid $77 |
| #define cmd $77 |
| #define rv3 $77 |
| |
| #define cgshuf $78 |
| |
| #define vma $6 |
| |
| #define map $7 |
| #define osize $7 |
| #define cmp2 $7 |
| |
| #define ea64 $8 |
| #define retval $8 |
| |
| #ifdef OVLY_IRQ_SAVE |
| #define irqtmp $8 |
| #define irq_stat $9 |
| #endif |
| |
| # Stack quadword minux N |
| #define SQWM1 -16*1 |
| #define SQWM2 -16*2 |
| #define SQWM3 -16*3 |
| #define SQWM4 -16*4 |
| #define SQWM5 -16*5 |
| #define SQWM6 -16*6 |
| #define SQWM7 -16*7 |
| #define SQWM8 -16*8 |
| #define SQWM9 -16*9 |
| #define SQWM10 -16*10 |
| #define SQWM11 -16*11 |
| #define SQWM12 -16*12 |
| #define SQWM13 -16*13 |
| #define SQWM14 -16*14 |
| #define SQWM15 -16*15 |
| #define SQWM16 -16*16 |
| |
| .extern _ovly_table |
| .extern _ovly_buf_table |
| |
| #ifdef OVLY_PRINTFS |
| #define SPE_C99_VPRINTF 37 |
| __entry_event_format: |
| .string "In entry_event_hook segment=0x%08x entry-address=0x%08x\n" |
| __debug_event_format: |
| .string "In debug_event_hook link-register=0x%08x %08x %08x %08x\n" |
| __dma_event_format: |
| .string "In dma_event_hook vma=0x%08x ea=%08x%08x sz=%08x\n" |
| __ovly_buf_table_format: |
| .string "_ovly_buf_table[%08x]=%08x\n" |
| #endif |
| |
| .text |
| .align 4 |
| .type __rv_pattern, @object |
| .size __rv_pattern, 16 |
| __rv_pattern: |
| .word 0x00010203, 0x1c1d1e1f, 0x00010203, 0x10111213 |
| .type __cg_pattern, @object |
| .size __cg_pattern, 16 |
| __cg_pattern: |
| .word 0x04050607, 0x80808080, 0x80808080, 0x80808080 |
| |
| /** |
| * __ovly_return - stub for returning from overlay functions. |
| * |
| * inputs: |
| * $lr link register |
| * |
| * outputs: |
| * $78 old partition number, to be reloaded |
| * $79 return address in old partion number |
| */ |
| .global __ovly_return |
| .type __ovly_return, @function |
| |
| .word 0 |
| __ovly_return: |
| shlqbyi $78, $lr, 4 |
| shlqbyi $79, $lr, 8 |
| biz $78, $79 |
| .size __ovly_return, . - __ovly_return |
| |
| /** |
| * __ovly_load - copy an overlay partion to local store. |
| * |
| * inputs: |
| * $78 partition number to be loaded. |
| * $79 branch target in new partition. |
| * $lr link register, containing return addr. |
| * |
| * outputs: |
| * $lr new link register, returning through __ovly_return. |
| * |
| * Copy a new overlay partition into local store, or return |
| * immediately if the partition is already resident. |
| */ |
| .global __ovly_load |
| .type __ovly_load, @function |
| |
| __ovly_load: |
| /* Save temporary registers to stack. */ |
| stqd $6, -16($sp) |
| stqd $7, -32($sp) |
| stqd $8, -48($sp) |
| |
| #ifdef OVLY_IRQ_SAVE |
| /* Save irq state, then disable interrupts. */ |
| stqd $9, -64($sp) |
| ila irqtmp, __ovly_irq_save |
| rdch irq_stat, $SPU_RdMachStat |
| bid irqtmp |
| __ovly_irq_save: |
| #endif |
| |
| #ifdef OVLY_PRINTFS |
| //============================================== |
| // In entry_event_hook segment=0x%08x entry-address=0x%08x |
| //============================================== |
| # save registers |
| stqd $10, SQWM5($sp) |
| stqd $11, SQWM6($sp) |
| stqd $12, SQWM7($sp) |
| # Place input parameters onto the stack to form the |
| # local storage memory image. |
| ila $10, __entry_event_format |
| stqd $10, SQWM12($sp) |
| ai $10, $sp, SQWM9 |
| stqd $10, SQWM11($sp) |
| stqd $sp, SQWM10($sp) |
| stqd $78, SQWM9($sp) |
| stqd $79, SQWM8($sp) |
| # Construct a message consisting of the 8-bit opcode |
| # and 24-bit local store pointer to the input |
| # parameters and place it forllowing the stop and signal |
| ila $10, 0x3ffff # address mask |
| ilhu $11, SPE_C99_VPRINTF << 8 |
| ai $12, $sp, SQWM12 # parameter pointer |
| selb $11, $11, $12, $10 # combine command & address ptr |
| brsl $10, next1a |
| next1a: |
| .type next1a, @function |
| lqr $12, message1a |
| cwd $10, message1a-next1a($10) |
| shufb $11, $11, $12, $10 # insert msg into inst word |
| stqr $11, message1a # store cmd/ptr into msg word |
| dsync |
| # Notify the PPE to perform the assisted call request |
| # by issing a stop and signal with a signal code |
| # of 0x2100 (C99 class) |
| stop 0x2100 |
| message1a: |
| .word 0 |
| |
| # save registers |
| stqd $13, SQWM8($sp) |
| stqd $14, SQWM9($sp) |
| stqd $15, SQWM10($sp) |
| stqd $16, SQWM11($sp) |
| |
| # initialize loop |
| il $13, 1 |
| ila $14, _ovly_buf_table |
| ila $15, _ovly_buf_table_end |
| |
| loop_start1: |
| # Place input parameters onto the stack to form the |
| # local storage memory image. |
| ila $10, __ovly_buf_table_format |
| stqd $10, SQWM16($sp) |
| ai $10, $sp, SQWM13 |
| stqd $10, SQWM15($sp) |
| stqd $sp, SQWM14($sp) |
| stqd $13, SQWM13($sp) |
| lqd $16, 0($14) |
| rotqby $16, $16, $14 |
| stqd $16, SQWM12($sp) |
| # Construct a message consisting of the 8-bit opcode |
| # and 24-bit local store pointer to the input |
| # parameters and place it forllowing the stop and signal |
| ila $10, 0x3ffff # address mask |
| ilhu $11, SPE_C99_VPRINTF << 8 |
| ai $12, $sp, SQWM16 # parameter pointer |
| selb $11, $11, $12, $10 # combine command & address ptr |
| brsl $10, next1b |
| next1b: |
| .type next1b, @function |
| lqr $12, message1b |
| cwd $10, message1b-next1b($10) |
| shufb $11, $11, $12, $10 # insert msg into inst word |
| stqr $11, message1b # store cmd/ptr into msg word |
| dsync |
| # Notify the PPE to perform the assisted call request |
| # by issing a stop and signal with a signal code |
| # of 0x2100 (C99 class) |
| stop 0x2100 |
| message1b: |
| .word 0 |
| |
| # move to next entry |
| ai $13, $13, 1 |
| ai $14, $14, 4 |
| clgt $16, $15, $14 |
| brnz $16, loop_start1 |
| |
| # restore registers |
| lqd $16, SQWM11($sp) |
| lqd $15, SQWM10($sp) |
| lqd $14, SQWM9($sp) |
| lqd $13, SQWM8($sp) |
| lqd $12, SQWM7($sp) |
| lqd $11, SQWM6($sp) |
| lqd $10, SQWM5($sp) |
| //============================================== |
| #endif |
| |
| /* Set branch hint to overlay target. */ |
| hbr __ovly_load_ret, $79 |
| |
| /* Get caller's overlay index by back chaining through stack frames. |
| * Loop until end of stack (back chain all-zeros) or |
| * encountered a link register we set here. */ |
| lqd bchn, 0($sp) |
| ila retval, __ovly_return |
| |
| __ovly_backchain_loop: |
| lqd lnkr, 16(bchn) |
| lqd bchn, 0(bchn) |
| ceq cmp, lnkr, retval |
| ceqi cmp2, bchn, 0 |
| or cmp, cmp, cmp2 |
| brz cmp, __ovly_backchain_loop |
| |
| /* If we reached the zero back-chain, then lnkr is bogus. Clear the |
| * part of lnkr that we use later (slot 3). */ |
| rotqbyi cmp2, cmp2, 4 |
| andc lnkr, lnkr, cmp2 |
| |
| /* Set lr = {__ovly_return, prev ovl ndx, caller return adr, callee ovl ndx}. */ |
| lqd rv1, (__rv_pattern-__ovly_return+4)(retval) |
| shufb rv2, retval, lnkr, rv1 |
| shufb rv3, $lr, $78, rv1 |
| fsmbi rv1, 0xff |
| selb rv2, rv2, rv3, rv1 |
| /* If we have a tail call from one overlay function to another overlay, |
| then lr is already set up. Don't change it. */ |
| ceq rv1, $lr, retval |
| fsmb rv1, rv1 |
| selb $lr, rv2, $lr, rv1 |
| |
| /* Branch to $79 if non-overlay */ |
| brz $78, __ovly_load_restore |
| |
| /* Load values from _ovly_table[$78]. |
| * extern struct { |
| * u32 vma; |
| * u32 size; |
| * u32 file_offset; |
| * u32 buf; |
| * } _ovly_table[]; |
| */ |
| shli off, $78, 4 |
| ila tab, _ovly_table - 16 |
| lqx vma, tab, off |
| rotqbyi buf, vma, 12 |
| |
| /* Load values from _ovly_buf_table[buf]. |
| * extern struct { |
| * u32 mapped; |
| * } _ovly_buf_table[]; |
| */ |
| ila tab, _ovly_buf_table |
| ai off, buf, -1 |
| shli off, off, 2 |
| lqx map, tab, off |
| rotqby cur, map, off |
| |
| /* Branch to $79 now if overlay is already mapped. */ |
| ceq cmp, $78, cur |
| brnz cmp, __ovly_load_restore |
| |
| /* Marker for profiling code. If we get here, we are about to load |
| * a new overlay. |
| */ |
| .global __ovly_load_event |
| .type __ovly_load_event, @function |
| __ovly_load_event: |
| |
| /* Set _ovly_buf_table[buf].mapped = $78. */ |
| cwx genwi, tab, off |
| shufb map, $78, map, genwi |
| stqx map, tab, off |
| |
| /* A new partition needs to be loaded. Prepare for DMA loop. |
| * _EAR_ is the 64b base EA, filled in at run time by the |
| * loader, and indicating the value for SPU executable image start. |
| */ |
| lqd cgshuf, (__cg_pattern-__ovly_return+4)(retval) |
| rotqbyi osize, vma, 4 |
| rotqbyi sz, vma, 8 |
| lqa ea64, _EAR_ |
| |
| __ovly_xfer_loop: |
| /* 64b add to compute next ea64. */ |
| rotqmbyi off64, sz, -4 |
| cg cgbits, ea64, off64 |
| shufb add64, cgbits, cgbits, cgshuf |
| addx add64, ea64, off64 |
| ori ea64, add64, 0 |
| |
| /* Setup DMA parameters, then issue DMA request. */ |
| rotqbyi ealo, add64, 4 |
| ila maxsize, MFC_MAX_DMA_SIZE |
| cgt cmp, osize, maxsize |
| selb sz, osize, maxsize, cmp |
| ila tagid, MFC_TAG_ID |
| wrch $MFC_LSA, vma |
| wrch $MFC_EAH, ea64 |
| wrch $MFC_EAL, ealo |
| wrch $MFC_Size, sz |
| wrch $MFC_TagId, tagid |
| ila cmd, MFC_GET_CMD |
| wrch $MFC_Cmd, cmd |
| |
| #ifdef OVLY_PRINTFS |
| //============================================== |
| // In dma_event_hook vma=0x%08x ea=%08x%08x sz=%08x |
| //============================================== |
| # save registers |
| stqd $10, SQWM5($sp) |
| stqd $11, SQWM6($sp) |
| stqd $12, SQWM7($sp) |
| # Place input parameters onto the stack to form the |
| # local storage memory image. |
| ila $10, __dma_event_format |
| stqd $10, SQWM14($sp) |
| ai $10, $sp, SQWM11 |
| stqd $10, SQWM13($sp) |
| stqd $sp, SQWM12($sp) |
| stqd vma, SQWM11($sp) |
| stqd ea64, SQWM10($sp) |
| stqd ealo, SQWM9($sp) |
| stqd sz, SQWM8($sp) |
| # Construct a message consisting of the 8-bit opcode |
| # and 24-bit local store pointer to the input |
| # parameters and place it forllowing the stop and signal |
| ila $10, 0x3ffff # address mask |
| ilhu $11, SPE_C99_VPRINTF << 8 |
| ai $12, $sp, SQWM14 # parameter pointer |
| selb $11, $11, $12, $10 # combine command & address ptr |
| brsl $10, next3a |
| next3a: |
| .type next3a, @function |
| lqr $12, message3a |
| cwd $10, message3a-next3a($10) |
| shufb $11, $11, $12, $10 # insert msg into inst word |
| stqr $11, message3a # store cmd/ptr into msg word |
| dsync |
| # Notify the PPE to perform the assisted call request |
| # by issing a stop and signal with a signal code |
| # of 0x2100 (C99 class) |
| stop 0x2100 |
| message3a: |
| .word 0 |
| |
| # restore registers |
| lqd $12, SQWM7($sp) |
| lqd $11, SQWM6($sp) |
| lqd $10, SQWM5($sp) |
| //============================================== |
| #endif |
| |
| /* Increment vma, decrement size, branch back as needed. */ |
| a vma, vma, sz |
| sf osize, sz, osize |
| brnz osize, __ovly_xfer_loop |
| |
| /* Save app's tagmask, wait for DMA complete, restore mask. */ |
| rdch oldmask, $MFC_RdTagMask |
| #if MFC_TAG_ID < 16 |
| ilh newmask, 1 << MFC_TAG_ID |
| #else |
| ilhu newmask, 1 << (MFC_TAG_ID - 16) |
| #endif |
| wrch $MFC_WrTagMask, newmask |
| ila tagstat, MFC_TAG_UPDATE_ALL |
| wrch $MFC_WrTagUpdate, tagstat |
| rdch tagstat, $MFC_RdTagStat |
| sync |
| wrch $MFC_WrTagMask, oldmask |
| |
| #ifdef OVLY_PRINTFS |
| //============================================== |
| // In debug_event_hook link-register=0x%08x %08x %08x %08x |
| //============================================== |
| # save registers |
| stqd $10, SQWM5($sp) |
| stqd $11, SQWM6($sp) |
| stqd $12, SQWM7($sp) |
| # Place input parameters onto the stack to form the |
| # local storage memory image. |
| ila $10, __debug_event_format |
| stqd $10, SQWM14($sp) |
| ai $10, $sp, SQWM11 |
| stqd $10, SQWM13($sp) |
| stqd $sp, SQWM12($sp) |
| stqd $lr, SQWM11($sp) |
| rotqbyi $10, $lr, 4 |
| stqd $10, SQWM10($sp) |
| rotqbyi $10, $10, 4 |
| stqd $10, SQWM9($sp) |
| rotqbyi $10, $10, 4 |
| stqd $10, SQWM8($sp) |
| # Construct a message consisting of the 8-bit opcode |
| # and 24-bit local store pointer to the input |
| # parameters and place it forllowing the stop and signal |
| ila $10, 0x3ffff # address mask |
| ilhu $11, SPE_C99_VPRINTF << 8 |
| ai $12, $sp, SQWM14 # parameter pointer |
| selb $11, $11, $12, $10 # combine command & address ptr |
| brsl $10, next2a |
| next2a: |
| .type next2a, @function |
| lqr $12, message2a |
| cwd $10, message2a-next2a($10) |
| shufb $11, $11, $12, $10 # insert msg into inst word |
| stqr $11, message2a # store cmd/ptr into msg word |
| dsync |
| # Notify the PPE to perform the assisted call request |
| # by issing a stop and signal with a signal code |
| # of 0x2100 (C99 class) |
| stop 0x2100 |
| message2a: |
| .word 0 |
| |
| # save registers |
| stqd $13, SQWM8($sp) |
| stqd $14, SQWM9($sp) |
| stqd $15, SQWM10($sp) |
| stqd $16, SQWM11($sp) |
| |
| # initialize loop |
| il $13, 1 |
| ila $14, _ovly_buf_table |
| ila $15, _ovly_buf_table_end |
| |
| loop_start2: |
| # Place input parameters onto the stack to form the |
| # local storage memory image. |
| ila $10, __ovly_buf_table_format |
| stqd $10, SQWM16($sp) |
| ai $10, $sp, SQWM13 |
| stqd $10, SQWM15($sp) |
| stqd $sp, SQWM14($sp) |
| stqd $13, SQWM13($sp) |
| lqd $16, 0($14) |
| rotqby $16, $16, $14 |
| stqd $16, SQWM12($sp) |
| # Construct a message consisting of the 8-bit opcode |
| # and 24-bit local store pointer to the input |
| # parameters and place it forllowing the stop and signal |
| ila $10, 0x3ffff # address mask |
| ilhu $11, SPE_C99_VPRINTF << 8 |
| ai $12, $sp, SQWM16 # parameter pointer |
| selb $11, $11, $12, $10 # combine command & address ptr |
| brsl $10, next2b |
| next2b: |
| .type next2b, @function |
| lqr $12, message2b |
| cwd $10, message2b-next2b($10) |
| shufb $11, $11, $12, $10 # insert msg into inst word |
| stqr $11, message2b # store cmd/ptr into msg word |
| dsync |
| # Notify the PPE to perform the assisted call request |
| # by issing a stop and signal with a signal code |
| # of 0x2100 (C99 class) |
| stop 0x2100 |
| message2b: |
| .word 0 |
| |
| # move to next entry |
| ai $13, $13, 1 |
| ai $14, $14, 4 |
| clgt $16, $15, $14 |
| brnz $16, loop_start2 |
| |
| # restore registers |
| lqd $16, SQWM11($sp) |
| lqd $15, SQWM10($sp) |
| lqd $14, SQWM9($sp) |
| lqd $13, SQWM8($sp) |
| lqd $12, SQWM7($sp) |
| lqd $11, SQWM6($sp) |
| lqd $10, SQWM5($sp) |
| //============================================== |
| #endif |
| |
| .global _ovly_debug_event |
| .type _ovly_debug_event, @function |
| _ovly_debug_event: |
| /* GDB inserts debugger trap here. */ |
| nop |
| |
| __ovly_load_restore: |
| #ifdef OVLY_IRQ_SAVE |
| /* Conditionally re-enable interrupts. */ |
| andi irq_stat, irq_stat, 1 |
| ila irqtmp, __ovly_irq_restore |
| binze irq_stat, irqtmp |
| __ovly_irq_restore: |
| lqd $9, -64($sp) |
| #endif |
| |
| /* Restore saved registers. */ |
| lqd $8, -48($sp) |
| lqd $7, -32($sp) |
| lqd $6, -16($sp) |
| |
| __ovly_load_ret: |
| /* Branch to target address. */ |
| bi $79 |
| |
| .size __ovly_load, . - __ovly_load |