aboutsummaryrefslogtreecommitdiff
path: root/x86_64/locore.S
diff options
context:
space:
mode:
Diffstat (limited to 'x86_64/locore.S')
-rw-r--r--x86_64/locore.S1640
1 files changed, 1640 insertions, 0 deletions
diff --git a/x86_64/locore.S b/x86_64/locore.S
new file mode 100644
index 0000000..25dc15d
--- /dev/null
+++ b/x86_64/locore.S
@@ -0,0 +1,1640 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1993,1992,1991,1990 Carnegie Mellon University
+ * Copyright (c) 1991 IBM Corporation
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation,
+ * and that the nema IBM not be used in advertising or publicity
+ * pertaining to distribution of the software without specific, written
+ * prior permission.
+ *
+ * CARNEGIE MELLON AND IBM ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON AND IBM DISCLAIM ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#include <mach/machine/asm.h>
+#include <mach/machine/eflags.h>
+#include <i386/i386/proc_reg.h>
+#include <i386/i386/trap.h>
+#include <i386/i386/seg.h>
+#include <i386/i386/gdt.h>
+#include <i386/i386/ldt.h>
+#include <i386/i386/msr.h>
+#include <i386/i386/i386asm.h>
+#include <i386/i386/cpu_number.h>
+#include <i386/i386/xen.h>
+
+
+/*
+ * Helpers for thread state as saved in the pcb area, during trap or irq handling
+ */
+#define pusha \
+ pushq %rax ;\
+ pushq %rcx ;\
+ pushq %rdx ;\
+ pushq %rbx ;\
+ subq $8,%rsp ;\
+ pushq %rbp ;\
+ pushq %rsi ;\
+ pushq %rdi ;\
+ pushq %r8 ;\
+ pushq %r9 ;\
+ pushq %r10 ;\
+ pushq %r11 ;\
+ pushq %r12 ;\
+ pushq %r13 ;\
+ pushq %r14 ;\
+ pushq %r15
+
+#define popa \
+ popq %r15 ;\
+ popq %r14 ;\
+ popq %r13 ;\
+ popq %r12 ;\
+ popq %r11 ;\
+ popq %r10 ;\
+ popq %r9 ;\
+ popq %r8 ;\
+ popq %rdi ;\
+ popq %rsi ;\
+ popq %rbp ;\
+ addq $8,%rsp ;\
+ popq %rbx ;\
+ popq %rdx ;\
+ popq %rcx ;\
+ popq %rax
+
+#define PUSH_REGS_ISR \
+ pushq %rcx ;\
+ pushq %rdx ;\
+ pushq %rsi ;\
+ pushq %rdi ;\
+ pushq %r8 ;\
+ pushq %r9 ;\
+ pushq %r10 ;\
+ pushq %r11
+
+#define PUSH_AREGS_ISR \
+ pushq %rax ;\
+ PUSH_REGS_ISR
+
+
+#define POP_REGS_ISR \
+ popq %r11 ;\
+ popq %r10 ;\
+ popq %r9 ;\
+ popq %r8 ;\
+ popq %rdi ;\
+ popq %rsi ;\
+ popq %rdx ;\
+ popq %rcx
+
+#define POP_AREGS_ISR \
+ POP_REGS_ISR ;\
+ popq %rax
+
+/*
+ * Note that we have to load the kernel segment registers even if this
+ * is a trap from the kernel, because the kernel uses user segment
+ * registers for copyin/copyout.
+ * (XXX Would it be smarter just to use fs or gs for that?)
+ */
+#ifdef USER32
+#define PUSH_SEGMENTS(reg) \
+ movq %ds,reg ;\
+ pushq reg ;\
+ movq %es,reg ;\
+ pushq reg ;\
+ pushq %fs ;\
+ pushq %gs
+#else
+#define PUSH_SEGMENTS(reg)
+#endif
+
+#ifdef USER32
+#define POP_SEGMENTS(reg) \
+ popq %gs ;\
+ popq %fs ;\
+ popq reg ;\
+ movq reg,%es ;\
+ popq reg ;\
+ movq reg,%ds
+#else
+#define POP_SEGMENTS(reg)
+#endif
+
+#ifdef USER32
+#define PUSH_SEGMENTS_ISR(reg) \
+ movq %ds,reg ;\
+ pushq reg ;\
+ movq %es,reg ;\
+ pushq reg ;\
+ pushq %fs ;\
+ pushq %gs
+#else
+#define PUSH_SEGMENTS_ISR(reg)
+#endif
+
+#ifdef USER32
+#define POP_SEGMENTS_ISR(reg) \
+ popq %gs ;\
+ popq %fs ;\
+ popq reg ;\
+ movq reg,%es ;\
+ popq reg ;\
+ movq reg,%ds
+#else
+#define POP_SEGMENTS_ISR(reg)
+#endif
+
+#ifdef USER32
+#define SET_KERNEL_SEGMENTS(reg) \
+ mov %ss,reg /* switch to kernel segments */ ;\
+ mov reg,%ds /* (same as kernel stack segment) */ ;\
+ mov reg,%es ;\
+ mov reg,%fs ;\
+ mov $(PERCPU_DS),reg ;\
+ mov reg,%gs
+#else
+#define SET_KERNEL_SEGMENTS(reg)
+#endif
+
+/*
+ * Fault recovery.
+ */
+#define RECOVER_TABLE_START \
+ .text 2 ;\
+DATA(recover_table) ;\
+ .text
+
+#define RECOVER(addr) \
+ .text 2 ;\
+ .quad 9f ;\
+ .quad addr ;\
+ .text ;\
+9:
+
+#define RECOVER_TABLE_END \
+ .text 2 ;\
+ .globl EXT(recover_table_end) ;\
+LEXT(recover_table_end) ;\
+ .text
+
+/*
+ * Retry table for certain successful faults.
+ */
+#define RETRY_TABLE_START \
+ .text 3 ;\
+DATA(retry_table) ;\
+ .text
+
+#define RETRY(addr) \
+ .text 3 ;\
+ .quad 9f ;\
+ .quad addr ;\
+ .text ;\
+9:
+
+#define RETRY_TABLE_END \
+ .text 3 ;\
+ .globl EXT(retry_table_end) ;\
+LEXT(retry_table_end) ;\
+ .text
+
+/*
+ * Allocate recovery and retry tables.
+ */
+ RECOVER_TABLE_START
+ RETRY_TABLE_START
+
+/*
+ * Timing routines.
+ */
+#if STAT_TIME
+
+#define TIME_TRAP_UENTRY
+#define TIME_TRAP_SENTRY
+#define TIME_TRAP_UEXIT
+#define TIME_INT_ENTRY
+#define TIME_INT_EXIT
+
+#else /* microsecond timing */
+
+/*
+ * Microsecond timing.
+ * Assumes a free-running microsecond counter.
+ * no TIMER_MAX check needed.
+ */
+
+/*
+ * There is only one current time-stamp per CPU, since only
+ * the time-stamp in the current timer is used.
+ * To save time, we allocate the current time-stamps here.
+ */
+ .comm EXT(current_tstamp), 4*NCPUS
+
+/*
+ * Update time on user trap entry.
+ * 11 instructions (including cli on entry)
+ * Assumes CPU number in %edx.
+ * Uses %eax, %ebx, %ecx.
+ */
+#define TIME_TRAP_UENTRY \
+ pushf /* Save flags */ ;\
+ cli /* block interrupts */ ;\
+ movl VA_ETC,%ebx /* get timer value */ ;\
+ movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\
+ movl %ebx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\
+ subl %ecx,%ebx /* elapsed = new-old */ ;\
+ movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\
+ addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\
+ jns 0f /* if overflow, */ ;\
+ call timer_normalize /* normalize timer */ ;\
+0: addl $(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx ;\
+ /* switch to sys timer */;\
+ movl %ecx,CX(EXT(current_timer),%rdx) /* make it current */ ;\
+ popf /* allow interrupts */
+
+/*
+ * Update time on system call entry.
+ * 11 instructions (including cli on entry)
+ * Assumes CPU number in %edx.
+ * Uses %ebx, %ecx.
+ * Same as TIME_TRAP_UENTRY, but preserves %eax.
+ */
+#define TIME_TRAP_SENTRY \
+ pushf /* Save flags */ ;\
+ cli /* block interrupts */ ;\
+ movl VA_ETC,%ebx /* get timer value */ ;\
+ movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\
+ movl %ebx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\
+ subl %ecx,%ebx /* elapsed = new-old */ ;\
+ movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\
+ addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\
+ jns 0f /* if overflow, */ ;\
+ pushq %rax /* save %rax */ ;\
+ call timer_normalize /* normalize timer */ ;\
+ popq %rax /* restore %rax */ ;\
+0: addl $(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx ;\
+ /* switch to sys timer */;\
+ movl %ecx,CX(EXT(current_timer),%rdx) /* make it current */ ;\
+ popf /* allow interrupts */
+
+/*
+ * update time on user trap exit.
+ * 10 instructions.
+ * Assumes CPU number in %edx.
+ * Uses %ebx, %ecx.
+ */
+#define TIME_TRAP_UEXIT \
+ cli /* block interrupts */ ;\
+ movl VA_ETC,%ebx /* get timer */ ;\
+ movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\
+ movl %ebx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\
+ subl %ecx,%ebx /* elapsed = new-old */ ;\
+ movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\
+ addl %ebx,LOW_BITS(%ecx) /* add to low bits */ ;\
+ jns 0f /* if overflow, */ ;\
+ call timer_normalize /* normalize timer */ ;\
+0: addl $(TH_USER_TIMER-TH_SYSTEM_TIMER),%ecx ;\
+ /* switch to user timer */;\
+ movl %ecx,CX(EXT(current_timer),%rdx) /* make it current */
+
+/*
+ * update time on interrupt entry.
+ * 9 instructions.
+ * Assumes CPU number in %edx.
+ * Leaves old timer in %ebx.
+ * Uses %ecx.
+ */
+#define TIME_INT_ENTRY \
+ movl VA_ETC,%ecx /* get timer */ ;\
+ movl CX(EXT(current_tstamp),%rdx),%ebx /* get old time stamp */;\
+ movl %ecx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\
+ subl %ebx,%ecx /* elapsed = new-old */ ;\
+ movl CX(EXT(current_timer),%rdx),%ebx /* get current timer */ ;\
+ addl %ecx,LOW_BITS(%ebx) /* add to low bits */ ;\
+ leal CX(0,%rdx),%ecx /* timer is 16 bytes */ ;\
+ lea CX(EXT(kernel_timer),%rdx),%ecx /* get interrupt timer*/;\
+ movl %ecx,CX(EXT(current_timer),%rdx) /* set timer */
+
+/*
+ * update time on interrupt exit.
+ * 11 instructions
+ * Assumes CPU number in %edx, old timer in %ebx.
+ * Uses %eax, %ecx.
+ */
+#define TIME_INT_EXIT \
+ movl VA_ETC,%eax /* get timer */ ;\
+ movl CX(EXT(current_tstamp),%rdx),%ecx /* get old time stamp */;\
+ movl %eax,CX(EXT(current_tstamp),%rdx) /* set new time stamp */;\
+ subl %ecx,%eax /* elapsed = new-old */ ;\
+ movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */ ;\
+ addl %eax,LOW_BITS(%ecx) /* add to low bits */ ;\
+ jns 0f /* if overflow, */ ;\
+ call timer_normalize /* normalize timer */ ;\
+0: testb $0x80,LOW_BITS+3(%ebx) /* old timer overflow? */;\
+ jz 0f /* if overflow, */ ;\
+ movl %ebx,%ecx /* get old timer */ ;\
+ call timer_normalize /* normalize timer */ ;\
+0: movl %ebx,CX(EXT(current_timer),%rdx) /* set timer */
+
+
+/*
+ * Normalize timer in ecx.
+ * Preserves edx; clobbers eax.
+ */
+ .align 2
+timer_high_unit:
+ .long TIMER_HIGH_UNIT /* div has no immediate opnd */
+
+timer_normalize:
+ pushq %rdx /* save register */
+ xorl %edx,%edx /* clear divisor high */
+ movl LOW_BITS(%ecx),%eax /* get divisor low */
+ divl timer_high_unit,%eax /* quotient in eax */
+ /* remainder in edx */
+ addl %eax,HIGH_BITS_CHECK(%ecx) /* add high_inc to check */
+ movl %edx,LOW_BITS(%ecx) /* remainder to low_bits */
+ addl %eax,HIGH_BITS(%ecx) /* add high_inc to high bits */
+ popq %rdx /* restore register */
+ ret
+
+/*
+ * Switch to a new timer.
+ */
+ENTRY(timer_switch)
+ CPU_NUMBER(%edx) /* get this CPU */
+ movl VA_ETC,%ecx /* get timer */
+ movl CX(EXT(current_tstamp),%rdx),%eax /* get old time stamp */
+ movl %ecx,CX(EXT(current_tstamp),%rdx) /* set new time stamp */
+ subl %ecx,%eax /* elapsed = new - old */
+ movl CX(EXT(current_timer),%rdx),%ecx /* get current timer */
+ addl %eax,LOW_BITS(%ecx) /* add to low bits */
+ jns 0f /* if overflow, */
+ call timer_normalize /* normalize timer */
+0:
+ movl S_ARG0,%ecx /* get new timer */
+ movl %ecx,CX(EXT(current_timer),%rdx) /* set timer */
+ ret
+
+/*
+ * Initialize the first timer for a CPU.
+ */
+ENTRY(start_timer)
+ CPU_NUMBER(%edx) /* get this CPU */
+ movl VA_ETC,%ecx /* get timer */
+ movl %ecx,CX(EXT(current_tstamp),%rdx) /* set initial time stamp */
+ movl S_ARG0,%ecx /* get timer */
+ movl %ecx,CX(EXT(current_timer),%rdx) /* set initial timer */
+ ret
+
+#endif /* accurate timing */
+
+/* */
+
+/*
+ * Trap/interrupt entry points.
+ *
+ * All traps must create the i386_saved_state struct on the stack on
+ * entry. Note that:
+ * - CR2 is only used if the trap is a page fault
+ * - user_rsp/user_ss are only used if entering from user space
+ * - v86_regs are used only from V86 threads
+ * (TODO check if V86 is still used with USER32)
+ *
+ * Depending the CPL before entry, the stack might be switched or not;
+ * if entering from user-space the CPU loads TSS->RSP0 in RSP,
+ * otherwise RSP is unchanged. After this, the cpu pushes
+ * SS/RSP/RFLAFS/CS/RIP and optionally ErrorCode and executes the handler.
+ */
+
+/* Try to save/show some information when a double fault happens
+ * We can't recover to a working state, so if we have a debugger wait for it,
+ * otherwise reset */
+ENTRY(t_dbl_fault)
+ INT_FIX
+ cli /* disable interrupts that might corrupt the state*/
+ pusha
+ movq %cr2,%rax
+ movq %rax,R_CR2-R_R15(%rsp) /* CR2 might contain the faulting address */
+ subq $48,%rsp // FIXME remove when segments are cleaned up
+ movq %rsp,%rdi /* pass the saved state */
+ call handle_double_fault
+ jmp cpu_shutdown /* reset */
+END(t_dbl_fault)
+
+/*
+ * General protection or segment-not-present fault.
+ * Check for a GP/NP fault in the kernel_return
+ * sequence; if there, report it as a GP/NP fault on the user's instruction.
+ *
+ * rsp-> 0: trap code (NP or GP)
+ * 8: segment number in error
+ * 16 eip
+ * 24 cs
+ * 32 eflags
+ * 40 old registers (trap is from kernel)
+ */
+ENTRY(t_gen_prot)
+ INT_FIX
+ pushq $(T_GENERAL_PROTECTION) /* indicate fault type */
+ jmp trap_check_kernel_exit /* check for kernel exit sequence */
+
+ENTRY(t_segnp)
+ INT_FIX
+ pushq $(T_SEGMENT_NOT_PRESENT)
+ /* indicate fault type */
+
+trap_check_kernel_exit:
+#ifdef USER32
+ testq $(EFL_VM),32(%rsp) /* is trap from V86 mode? */
+ jnz EXT(alltraps) /* isn`t kernel trap if so */
+#endif
+ /* Note: handling KERNEL_RING value by hand */
+ testq $2,24(%rsp) /* is trap from kernel mode? */
+ jnz EXT(alltraps) /* if so: */
+ /* check for the kernel exit sequence */
+ cmpq $_kret_iret,16(%rsp) /* on IRET? */
+ je fault_iret
+#ifdef USER32
+ cmpq $_kret_popl_ds,16(%rsp) /* popping DS? */
+ je fault_popl_ds
+ cmpq $_kret_popl_es,16(%rsp) /* popping ES? */
+ je fault_popl_es
+ cmpq $_kret_popl_fs,16(%rsp) /* popping FS? */
+ je fault_popl_fs
+ cmpq $_kret_popl_gs,16(%rsp) /* popping GS? */
+ je fault_popl_gs
+#endif
+take_fault: /* if none of the above: */
+ jmp EXT(alltraps) /* treat as normal trap. */
+
+/*
+ * GP/NP fault on IRET: CS or SS is in error.
+ * All registers contain the user's values.
+ *
+ * on SP is
+ * 0 trap number
+ * 8 errcode
+ * 16 eip
+ * 24 cs --> trapno
+ * 32 efl --> errcode
+ * 40 user eip
+ * 48 user cs
+ * 56 user eflags
+ * 64 user rsp
+ * 72 user ss
+ */
+fault_iret:
+ movq %rax,16(%rsp) /* save eax (we don`t need saved eip) */
+ popq %rax /* get trap number */
+ movq %rax,24-8(%rsp) /* put in user trap number */
+ popq %rax /* get error code */
+ movq %rax,32-16(%rsp) /* put in user errcode */
+ popq %rax /* restore eax */
+ jmp EXT(alltraps) /* take fault */
+
+#ifdef USER32
+/*
+ * Fault restoring a segment register. The user's registers are still
+ * saved on the stack. The offending segment register has not been
+ * popped.
+ */
+fault_popl_ds:
+ popq %rax /* get trap number */
+ popq %rdx /* get error code */
+ addq $24,%rsp /* pop stack to user regs */
+ jmp push_es /* (DS on top of stack) */
+fault_popl_es:
+ popq %rax /* get trap number */
+ popq %rdx /* get error code */
+ addq $24,%rsp /* pop stack to user regs */
+ jmp push_fs /* (ES on top of stack) */
+fault_popl_fs:
+ popq %rax /* get trap number */
+ popq %rdx /* get error code */
+ addq $24,%rsp /* pop stack to user regs */
+ jmp push_gs /* (FS on top of stack) */
+fault_popl_gs:
+ popq %rax /* get trap number */
+ popq %rdx /* get error code */
+ addq $24,%rsp /* pop stack to user regs */
+ jmp push_segregs /* (GS on top of stack) */
+
+push_es:
+ movq %es,%rcx
+ pushq %rcx /* restore es, */
+push_fs:
+ pushq %fs /* restore fs, */
+push_gs:
+ pushq %gs /* restore gs. */
+push_gsbase:
+ pushq $0
+ pushq $0
+#endif
+push_segregs:
+ movq %rax,R_TRAPNO(%rsp) /* set trap number */
+ movq %rdx,R_ERR(%rsp) /* set error code */
+ jmp trap_set_segs /* take trap */
+
+/*
+ * Debug trap. Check for single-stepping across system call into
+ * kernel. If this is the case, taking the debug trap has turned
+ * off single-stepping - save the flags register with the trace
+ * bit set.
+ */
+ENTRY(t_debug)
+ INT_FIX
+#ifdef USER32
+ testq $(EFL_VM),16(%rsp) /* is trap from V86 mode? */
+ jnz 0f /* isn`t kernel trap if so */
+#endif
+ /* Note: handling KERNEL_RING value by hand */
+ testq $2,8(%rsp) /* is trap from kernel mode? */
+ jnz 0f /* if so: */
+#ifdef USER32
+ cmpq $syscall_entry,(%rsp) /* system call entry? */
+ jne 0f /* if so: */
+ /* flags are sitting where syscall */
+ /* wants them */
+ addq $32,%rsp /* remove eip/cs */
+ jmp syscall_entry_2 /* continue system call entry */
+#else
+ // TODO: implement the 64-bit case
+ ud2
+#endif
+0: pushq $0 /* otherwise: */
+ pushq $(T_DEBUG) /* handle as normal */
+ jmp EXT(alltraps) /* debug fault */
+
+/*
+ * Page fault traps save cr2.
+ */
+ENTRY(t_page_fault)
+ INT_FIX
+ pushq $(T_PAGE_FAULT) /* mark a page fault trap */
+ pusha /* save the general registers */
+#ifdef MACH_XEN
+ movq %ss:hyp_shared_info+CR2,%rax
+#else /* MACH_XEN */
+ movq %cr2,%rax /* get the faulting address */
+#endif /* MACH_XEN */
+ movq %rax,R_CR2-R_R15(%rsp) /* save in rsp save slot */
+ jmp trap_push_segs /* continue fault */
+
+/*
+ * All 'exceptions' enter here with:
+ * rsp-> trap number
+ * error code
+ * old eip
+ * old cs
+ * old eflags
+ * old rsp if trapped from user
+ * old ss if trapped from user
+ */
+ENTRY(alltraps)
+ pusha /* save the general registers */
+trap_push_segs:
+ PUSH_SEGMENTS(%rax) /* and the segment registers */
+ SET_KERNEL_SEGMENTS(%rax) /* switch to kernel data segment */
+trap_set_segs:
+ cld /* clear direction flag */
+#ifdef USER32
+ testl $(EFL_VM),R_EFLAGS(%rsp) /* in V86 mode? */
+ jnz trap_from_user /* user mode trap if so */
+#endif
+ /* Note: handling KERNEL_RING value by hand */
+ testb $2,R_CS(%rsp) /* user mode trap? */
+ jz trap_from_kernel /* kernel trap if not */
+trap_from_user:
+
+ CPU_NUMBER(%edx)
+ TIME_TRAP_UENTRY
+
+ movq CX(EXT(kernel_stack),%rdx),%rbx
+ xchgq %rbx,%rsp /* switch to kernel stack */
+ /* user regs pointer already set */
+_take_trap:
+ movq %rbx,%rdi /* pass register save area to trap */
+ call EXT(user_trap) /* call user trap routine */
+#ifdef USER32
+ orq %rax,%rax /* emulated syscall? */
+ jz 1f /* no, just return */
+ movq R_EAX(%rbx),%rax /* yes, get syscall number */
+ jmp syscall_entry_3 /* and emulate it */
+#endif
+1:
+ movq (%rsp),%rsp /* switch back to PCB stack */
+
+/*
+ * Return from trap or system call, checking for ASTs.
+ * On PCB stack.
+ */
+
+_return_from_trap:
+ CPU_NUMBER(%edx)
+ cmpl $0,CX(EXT(need_ast),%rdx)
+ jz _return_to_user /* if we need an AST: */
+
+ movq CX(EXT(kernel_stack),%rdx),%rsp
+ /* switch to kernel stack */
+ call EXT(i386_astintr) /* take the AST */
+ popq %rsp /* switch back to PCB stack */
+ jmp _return_from_trap /* and check again (rare) */
+ /* ASTs after this point will */
+ /* have to wait */
+
+_return_to_user:
+ TIME_TRAP_UEXIT
+
+/*
+ * Return from kernel mode to interrupted thread.
+ */
+
+_return_from_kernel:
+#ifdef USER32
+_kret_popl_gs:
+ popq %gs /* restore segment registers */
+_kret_popl_fs:
+ popq %fs
+_kret_popl_es:
+ popq %rax
+ movq %rax,%es
+_kret_popl_ds:
+ popq %rax
+ movq %rax,%ds
+#endif
+ popa /* restore general registers */
+ addq $16,%rsp /* discard trap number and error code */
+_kret_iret:
+ iretq /* return from interrupt */
+
+
+/*
+ * Trap from kernel mode. No need to switch stacks.
+ */
+trap_from_kernel:
+#if MACH_KDB || MACH_TTD
+ movq %rsp,%rbx /* save current stack */
+ movq %rsp,%rdx /* on an interrupt stack? */
+
+ CPU_NUMBER(%ecx)
+ and $(~(INTSTACK_SIZE-1)),%rdx
+ cmpq CX(EXT(int_stack_base),%rcx),%rdx
+ je 1f /* OK if so */
+
+ movl %ecx,%edx
+ cmpq CX(EXT(kernel_stack),%rdx),%rsp
+ /* already on kernel stack? */
+ ja 0f
+ cmpq MY(ACTIVE_STACK),%rsp
+ ja 1f /* switch if not */
+0:
+ movq CX(EXT(kernel_stack),%rdx),%rsp
+1:
+ pushq %rbx /* save old stack */
+ movq %rbx,%rdi /* pass as parameter */
+ call EXT(kernel_trap) /* to kernel trap routine */
+
+ popq %rsp /* return to old stack */
+#else /* MACH_KDB || MACH_TTD */
+
+ movq %rsp,%rdi /* pass parameter */
+ call EXT(kernel_trap) /* to kernel trap routine */
+
+#endif /* MACH_KDB || MACH_TTD */
+
+ jmp _return_from_kernel
+
+
+/*
+ * Called as a function, makes the current thread
+ * return from the kernel as if from an exception.
+ */
+
+ENTRY(thread_exception_return)
+ENTRY(thread_bootstrap_return)
+ movq %rsp,%rcx /* get kernel stack */
+ or $(KERNEL_STACK_SIZE-1),%rcx
+ movq -7-IKS_SIZE(%rcx),%rsp /* switch back to PCB stack */
+ jmp _return_from_trap
+
+/*
+ * Called as a function, makes the current thread
+ * return from the kernel as if from a syscall.
+ * Takes the syscall's return code as an argument.
+ */
+
+ENTRY(thread_syscall_return)
+ movq S_ARG0,%rax /* get return value */
+ movq %rsp,%rcx /* get kernel stack */
+ or $(KERNEL_STACK_SIZE-1),%rcx
+ movq -7-IKS_SIZE(%rcx),%rsp /* switch back to PCB stack */
+ movq %rax,R_EAX(%rsp) /* save return value */
+ jmp _return_from_trap
+
+ENTRY(call_continuation)
+ movq S_ARG0,%rax /* get continuation */
+ movq %rsp,%rcx /* get kernel stack */
+ or $(KERNEL_STACK_SIZE-1),%rcx
+ addq $(-7-IKS_SIZE),%rcx
+ movq %rcx,%rsp /* pop the stack */
+ xorq %rbp,%rbp /* zero frame pointer */
+ pushq $0 /* Dummy return address */
+ jmp *%rax /* goto continuation */
+
+/* IOAPIC has 24 interrupts, put spurious in the same array */
+
+#define INTERRUPT(n) \
+ .data 2 ;\
+ .quad 0f ;\
+ .text ;\
+ P2ALIGN(TEXT_ALIGN) ;\
+0: ;\
+ INT_FIX ;\
+ pushq %rax ;\
+ movq $(n),%rax ;\
+ jmp EXT(all_intrs)
+
+ .data 2
+DATA(int_entry_table)
+ .text
+/* Legacy APIC interrupts or PIC interrupts */
+INTERRUPT(0)
+INTERRUPT(1)
+INTERRUPT(2)
+INTERRUPT(3)
+INTERRUPT(4)
+INTERRUPT(5)
+INTERRUPT(6)
+INTERRUPT(7)
+INTERRUPT(8)
+INTERRUPT(9)
+INTERRUPT(10)
+INTERRUPT(11)
+INTERRUPT(12)
+INTERRUPT(13)
+INTERRUPT(14)
+INTERRUPT(15)
+#ifdef APIC
+/* APIC PCI interrupts PIRQ A-H */
+INTERRUPT(16)
+INTERRUPT(17)
+INTERRUPT(18)
+INTERRUPT(19)
+INTERRUPT(20)
+INTERRUPT(21)
+INTERRUPT(22)
+INTERRUPT(23)
+#endif
+#if NCPUS > 1
+INTERRUPT(CALL_AST_CHECK)
+INTERRUPT(CALL_PMAP_UPDATE)
+#endif
+#ifdef APIC
+/* Spurious interrupt, set irq number to vect number */
+INTERRUPT(255)
+#endif
+
+/* XXX handle NMI - at least print a warning like Linux does. */
+
+/*
+ * All interrupts enter here. The cpu might have loaded a new RSP,
+ * depending on the previous CPL, as in alltraps.
+ * Old %eax on stack, interrupt number in %eax; we need to fill the remaining
+ * fields of struct i386_interrupt_state, which might be in the pcb or in the
+ * interrupt stack.
+ */
+ENTRY(all_intrs)
+ PUSH_REGS_ISR /* save registers */
+ cld /* clear direction flag */
+
+ PUSH_SEGMENTS_ISR(%rdx) /* save segment registers */
+
+ CPU_NUMBER_NO_GS(%ecx)
+ movq %rsp,%rdx /* on an interrupt stack? */
+ and $(~(INTSTACK_SIZE-1)),%rdx
+ cmpq %ss:CX(EXT(int_stack_base),%rcx),%rdx
+ je int_from_intstack /* if not: */
+
+ SET_KERNEL_SEGMENTS(%rdx) /* switch to kernel segments */
+
+ CPU_NUMBER(%edx)
+
+ movq CX(EXT(int_stack_top),%rdx),%rcx
+
+ xchgq %rcx,%rsp /* switch to interrupt stack */
+
+#if STAT_TIME
+ pushq %rcx /* save pointer to old stack */
+#else
+ pushq %rbx /* save %ebx - out of the way */
+ /* so stack looks the same */
+ pushq %rcx /* save pointer to old stack */
+ TIME_INT_ENTRY /* do timing */
+#endif
+
+#ifdef MACH_LDEBUG
+ incl CX(EXT(in_interrupt),%rdx)
+#endif
+
+ call EXT(interrupt) /* call generic interrupt routine */
+ .globl EXT(return_to_iret) /* ( label for kdb_kintr and hardclock */
+LEXT(return_to_iret) /* to find the return from calling interrupt) */
+
+ CPU_NUMBER(%edx)
+#ifdef MACH_LDEBUG
+ decl CX(EXT(in_interrupt),%rdx)
+#endif
+
+#if STAT_TIME
+#else
+ TIME_INT_EXIT /* do timing */
+ movq 8(%rsp),%rbx /* restore the extra reg we saved */
+#endif
+
+ popq %rsp /* switch back to old stack */
+
+#ifdef USER32
+ testl $(EFL_VM),I_EFL(%rsp) /* if in V86 */
+ jnz 0f /* or */
+#endif
+ /* Note: handling KERNEL_RING value by hand */
+ testb $2,I_CS(%rsp) /* user mode, */
+ jz 1f /* check for ASTs */
+0:
+ cmpq $0,CX(EXT(need_ast),%rdx)
+ jnz ast_from_interrupt /* take it if so */
+1:
+ POP_SEGMENTS_ISR(%rdx) /* restore segment regs */
+ POP_AREGS_ISR /* restore registers */
+
+ iretq /* return to caller */
+
+int_from_intstack:
+ CPU_NUMBER_NO_GS(%edx)
+ cmpq CX(EXT(int_stack_base),%rdx),%rsp /* seemingly looping? */
+ jb stack_overflowed /* if not: */
+ call EXT(interrupt) /* call interrupt routine */
+_return_to_iret_i: /* ( label for kdb_kintr) */
+ POP_SEGMENTS_ISR(%rdx)
+ POP_AREGS_ISR /* restore registers */
+ /* no ASTs */
+
+ iretq
+
+stack_overflowed:
+ ud2
+
+/*
+ * Take an AST from an interrupt.
+ * On PCB stack.
+ * sp-> gs -> edx
+ * fs -> ecx
+ * es -> eax
+ * ds -> trapno
+ * edx -> code
+ * ecx
+ * eax
+ * eip
+ * cs
+ * efl
+ * rsp
+ * ss
+ */
+ast_from_interrupt:
+ POP_SEGMENTS_ISR(%rdx) /* restore all registers ... */
+ POP_AREGS_ISR
+ pushq $0 /* zero code */
+ pushq $0 /* zero trap number */
+ pusha /* save general registers */
+ PUSH_SEGMENTS_ISR(%rdx) /* save segment registers */
+ SET_KERNEL_SEGMENTS(%rdx) /* switch to kernel segments */
+ CPU_NUMBER(%edx)
+ TIME_TRAP_UENTRY
+
+ movq CX(EXT(kernel_stack),%rdx),%rsp
+ /* switch to kernel stack */
+ call EXT(i386_astintr) /* take the AST */
+ popq %rsp /* back to PCB stack */
+ jmp _return_from_trap /* return */
+
+#if MACH_KDB
+/*
+ * kdb_kintr: enter kdb from keyboard interrupt.
+ * Chase down the stack frames until we find one whose return
+ * address is the interrupt handler. At that point, we have:
+ *
+ * frame-> saved %rbp
+ * return address in interrupt handler
+ * saved SPL
+ * saved IRQ
+ * return address == return_to_iret_i
+ * saved %r11
+ * saved %r10
+ * saved %r9
+ * saved %r8
+ * saved %rdx
+ * saved %rcx
+ * saved %rax
+ * saved %rip
+ * saved %cs
+ * saved %rfl
+ *
+ * OR:
+ * frame-> saved %rbp
+ * return address in interrupt handler
+ * return address == return_to_iret
+ * pointer to save area on old stack
+ * [ saved %ebx, if accurate timing ]
+ *
+ * old stack: saved %gs
+ * saved %fs
+ * saved %es
+ * saved %ds
+ * saved %r11
+ * saved %r10
+ * saved %r9
+ * saved %r8
+ * saved %rdi
+ * saved %rsi
+ * saved %rdx
+ * saved %rcx
+ * saved %eax
+ * saved %rip
+ * saved %cs
+ * saved %rfl
+ *
+ * Call kdb, passing it that register save area.
+ */
+
+#define RET_OFFSET 32
+
+
+ENTRY(kdb_kintr)
+ movq %rbp,%rax /* save caller`s frame pointer */
+ movq $EXT(return_to_iret),%rcx /* interrupt return address 1 */
+ movq $_return_to_iret_i,%rdx /* interrupt return address 2 */
+
+0: cmpq RET_OFFSET(%rax),%rcx /* does this frame return to */
+ /* interrupt handler (1)? */
+ je 1f
+ cmpq RET_OFFSET(%rax),%rdx /* interrupt handler (2)? */
+ je 2f /* if not: */
+ movq (%rax),%rax /* try next frame */
+ testq %rax,%rax
+ jnz 0b
+ ud2 /* oops, didn't find frame, fix me :/ */
+
+1: movq $kdb_from_iret,RET_OFFSET(%rax)
+ ret /* returns to kernel/user stack */
+
+2: movq $kdb_from_iret_i,RET_OFFSET(%rax)
+ /* returns to interrupt stack */
+ ret
+
+/*
+ * On return from keyboard interrupt, we will execute
+ * kdb_from_iret_i
+ * if returning to an interrupt on the interrupt stack
+ * kdb_from_iret
+ * if returning to an interrupt on the user or kernel stack
+ */
+kdb_from_iret:
+ /* save regs in known locations */
+#if STAT_TIME
+ pushq %rbx /* caller`s %ebx is in reg */
+#else
+ movq 8(%rsp),%rax /* get caller`s %ebx */
+ pushq %rax /* push on stack */
+#endif
+ pushq %rbp
+ movq %rsp,%rdi /* pass regs */
+ call EXT(kdb_kentry) /* to kdb */
+ popq %rbp
+#if STAT_TIME
+ popq %rbx
+#else
+ popq %rax
+ movq %rax,8(%rsp)
+#endif
+ jmp EXT(return_to_iret) /* normal interrupt return */
+
+kdb_from_iret_i: /* on interrupt stack */
+ pop %rdx /* restore saved registers */
+ pop %rcx
+ pop %rax
+ pushq $0 /* zero error code */
+ pushq $0 /* zero trap number */
+ pusha /* save general registers */
+ PUSH_SEGMENTS(%rdx) /* save segment registers */
+ movq %rsp,%rdx /* pass regs, */
+ movq $0,%rsi /* code, */
+ movq $-1,%rdi /* type to kdb */
+ call EXT(kdb_trap)
+ POP_SEGMENTS(%rdx) /* restore segment registers */
+ popa /* restore general registers */
+ addq $16,%rsp
+
+// TODO: test it before dropping ud2
+movq (%rsp),%rax
+ud2
+ iretq
+
+#endif /* MACH_KDB */
+
+#if MACH_TTD
+/*
+ * Same code as that above for the keyboard entry into kdb.
+ */
+ENTRY(kttd_intr)
+// TODO: test it before dropping ud2
+ud2
+ movq %rbp,%rax /* save caller`s frame pointer */
+ movq $EXT(return_to_iret),%rcx /* interrupt return address 1 */
+ movq $_return_to_iret_i,%rdx /* interrupt return address 2 */
+
+0: cmpq 32(%rax),%rcx /* does this frame return to */
+ /* interrupt handler (1)? */
+ je 1f
+ cmpq 32(%rax),%rdx /* interrupt handler (2)? */
+ je 2f /* if not: */
+ movq (%rax),%rax /* try next frame */
+ jmp 0b
+
+1: movq $ttd_from_iret,32(%rax) /* returns to kernel/user stack */
+ ret
+
+2: movq $ttd_from_iret_i,32(%rax)
+ /* returns to interrupt stack */
+ ret
+
+/*
+ * On return from keyboard interrupt, we will execute
+ * ttd_from_iret_i
+ * if returning to an interrupt on the interrupt stack
+ * ttd_from_iret
+ * if returning to an interrupt on the user or kernel stack
+ */
+ttd_from_iret:
+ /* save regs in known locations */
+#if STAT_TIME
+ pushq %rbx /* caller`s %ebx is in reg */
+#else
+ movq 8(%rsp),%rax /* get caller`s %ebx */
+ pushq %rax /* push on stack */
+#endif
+ pushq %rbp
+ pushq %rsi
+ pushq %rdi
+ movq %rsp,%rdi /* pass regs */
+ call _kttd_netentry /* to kdb */
+ popq %rdi /* restore registers */
+ popq %rsi
+ popq %rbp
+#if STAT_TIME
+ popq %rbx
+#else
+ popq %rax
+ movq %rax,8(%rsp)
+#endif
+ jmp EXT(return_to_iret) /* normal interrupt return */
+
+ttd_from_iret_i: /* on interrupt stack */
+ pop %rdx /* restore saved registers */
+ pop %rcx
+ pop %rax
+ pushq $0 /* zero error code */
+ pushq $0 /* zero trap number */
+ pusha /* save general registers */
+ PUSH_SEGMENTS_ISR(%rdx) /* save segment registers */
+ ud2 // TEST it
+ movq %rsp,%rdx /* pass regs, */
+ movq $0,%rsi /* code, */
+ movq $-1,%rdi /* type to kdb */
+ call _kttd_trap
+ POP_SEGMENTS_ISR(%rdx) /* restore segment registers */
+ popa /* restore general registers */
+ addq $16,%rsp
+
+// TODO: test it before dropping ud2
+movq (%rsp),%rax
+ud2
+ iretq
+
+#endif /* MACH_TTD */
+
+#ifdef USER32
+/*
+ * System call enters through a call gate. Flags are not saved -
+ * we must shuffle stack to look like trap save area.
+ *
+ * rsp-> old eip
+ * old cs
+ * old rsp
+ * old ss
+ *
+ * eax contains system call number.
+ */
+ENTRY(syscall)
+syscall_entry:
+ pushf /* save flags as soon as possible */
+syscall_entry_2:
+ cld /* clear direction flag */
+
+ pushq %rax /* save system call number */
+ pushq $0 /* clear trap number slot */
+
+ pusha /* save the general registers */
+ PUSH_SEGMENTS(%rdx) /* and the segment registers */
+ SET_KERNEL_SEGMENTS(%rdx) /* switch to kernel data segment */
+
+/*
+ * Shuffle eflags,eip,cs into proper places
+ */
+
+ movq R_EIP(%rsp),%rbx /* eflags are in EIP slot */
+ movq R_CS(%rsp),%rcx /* eip is in CS slot */
+ movq R_EFLAGS(%rsp),%rdx /* cs is in EFLAGS slot */
+ movq %rcx,R_EIP(%rsp) /* fix eip */
+ movq %rdx,R_CS(%rsp) /* fix cs */
+ movq %rbx,R_EFLAGS(%rsp) /* fix eflags */
+
+ CPU_NUMBER_NO_STACK(%edx)
+ TIME_TRAP_SENTRY
+
+ movq CX(EXT(kernel_stack),%rdx),%rbx
+ /* get current kernel stack */
+ xchgq %rbx,%rsp /* switch stacks - %ebx points to */
+ /* user registers. */
+ /* user regs pointer already set */
+
+/*
+ * Check for MACH or emulated system call
+ */
+syscall_entry_3:
+ movq MY(ACTIVE_THREAD),%rdx
+ /* point to current thread */
+ movq TH_TASK(%rdx),%rdx /* point to task */
+ movq TASK_EMUL(%rdx),%rdx /* get emulation vector */
+ orq %rdx,%rdx /* if none, */
+ je syscall_native /* do native system call */
+ movq %rax,%rcx /* copy system call number */
+ subq DISP_MIN(%rdx),%rcx /* get displacement into syscall */
+ /* vector table */
+ jl syscall_native /* too low - native system call */
+ cmpq DISP_COUNT(%rdx),%rcx /* check range */
+ jnl syscall_native /* too high - native system call */
+ movq DISP_VECTOR(%rdx,%rcx,4),%rdx
+ /* get the emulation vector */
+ orq %rdx,%rdx /* emulated system call if not zero */
+ jnz syscall_emul
+
+/*
+ * Native system call.
+ */
+syscall_native:
+ negl %eax /* get system call number */
+ jl mach_call_range /* out of range if it was positive */
+ cmpl EXT(mach_trap_count),%eax /* check system call table bounds */
+ jg mach_call_range /* error if out of range */
+#if 0 /* debug hack to show the syscall number on the screen */
+ movb %al,%dl
+ shrb $4,%dl
+ orb $0x30,%dl
+ movb $0x0f,%dh
+ movw %dx,0xb800a
+ movb %al,%dl
+ andb $0xf,%dl
+ orb $0x30,%dl
+ movb $0xf,%dh
+ movw %dx,0xb800c
+#endif
+ shll $5,%eax /* manual indexing of mach_trap_t */
+ xorq %r10,%r10
+ mov EXT(mach_trap_table)(%rax),%r10
+ /* get number of arguments */
+ andq %r10,%r10
+ jz mach_call_call /* skip argument copy if none */
+
+ movq $USER_DS,%rdx /* use user data segment for accesses */
+ mov %dx,%fs
+ movq %rsp,%r11 /* save kernel ESP for error recovery */
+
+ movq R_UESP(%rbx),%rbp /* get user stack pointer */
+ addq $4,%rbp /* Skip user return address */
+
+#define PARAM(reg,ereg) \
+ xorq %reg,%reg ;\
+ RECOVER(mach_call_addr_push) \
+ movl %fs:(%rbp),%ereg /* 1st parameter */ ;\
+ addq $4,%rbp ;\
+ dec %r10 ;\
+ jz mach_call_call
+
+ PARAM(rdi,edi) /* 1st parameter */
+ PARAM(rsi,esi) /* 2nd parameter */
+ PARAM(rdx,edx) /* 3rd parameter */
+ PARAM(rcx,ecx) /* 4th parameter */
+ PARAM(r8,r8d) /* 5th parameter */
+ PARAM(r9,r9d) /* 6th parameter */
+
+ lea (%rbp,%r10,4),%rbp /* point past last argument */
+ xorq %r12,%r12
+
+0: subq $4,%rbp
+ RECOVER(mach_call_addr_push)
+ movl %fs:(%rbp),%r12d
+ pushq %r12 /* push argument on stack */
+ dec %r10
+ jnz 0b /* loop for all arguments */
+
+mach_call_call:
+
+#ifdef DEBUG
+ testb $0xff,EXT(syscall_trace)
+ jz 0f
+ movq %rax,%rdi
+ call EXT(syscall_trace_print)
+ /* will return with syscallofs still (or again) in eax */
+0:
+#endif /* DEBUG */
+ call *EXT(mach_trap_table)+8(%rax) /* call procedure */
+ movq %rsp,%rcx /* get kernel stack */
+ or $(KERNEL_STACK_SIZE-1),%rcx
+ movq -7-IKS_SIZE(%rcx),%rsp /* switch back to PCB stack */
+ movq %rax,R_EAX(%rsp) /* save return value */
+ jmp _return_from_trap /* return to user */
+
+/*
+ * Address out of range. Change to page fault.
+ * %rbp holds failing address.
+ */
+mach_call_addr_push:
+ movq %r11,%rsp /* clean parameters from stack */
+mach_call_addr:
+ movq %rbp,R_CR2(%rbx) /* set fault address */
+ movq $(T_PAGE_FAULT),R_TRAPNO(%rbx)
+ /* set page-fault trap */
+ movq $(T_PF_USER),R_ERR(%rbx)
+ /* set error code - read user space */
+ jmp _take_trap /* treat as a trap */
+
+/*
+ * System call out of range. Treat as invalid-instruction trap.
+ * (? general protection?)
+ */
+mach_call_range:
+ movq $(T_INVALID_OPCODE),R_TRAPNO(%rbx)
+ /* set invalid-operation trap */
+ movq $0,R_ERR(%rbx) /* clear error code */
+ jmp _take_trap /* treat as a trap */
+
+/*
+ * User space emulation of system calls.
+ * edx - user address to handle syscall
+ *
+ * User stack will become:
+ * ursp-> eflags
+ * eip
+ * eax still contains syscall number.
+ */
+syscall_emul:
+ movq $USER_DS,%rdi /* use user data segment for accesses */
+ mov %di,%fs
+
+/* XXX what about write-protected pages? */
+ movq R_UESP(%rbx),%rdi /* get user stack pointer */
+ subq $16,%rdi /* push space for new arguments */
+ movq R_EFLAGS(%rbx),%rax /* move flags */
+ RECOVER(syscall_addr)
+ movl %eax,%fs:0(%rdi) /* to user stack */
+ movl R_EIP(%rbx),%eax /* move eip */
+ RECOVER(syscall_addr)
+ movl %eax,%fs:4(%rdi) /* to user stack */
+ movq %rdi,R_UESP(%rbx) /* set new user stack pointer */
+ movq %rdx,R_EIP(%rbx) /* change return address to trap */
+ movq %rbx,%rsp /* back to PCB stack */
+// TODO: test it before dropping ud2
+ud2
+ jmp _return_from_trap /* return to user */
+
+/*
+ * Address error - address is in %edi.
+ */
+syscall_addr:
+ movq %rdi,R_CR2(%rbx) /* set fault address */
+ movq $(T_PAGE_FAULT),R_TRAPNO(%rbx)
+ /* set page-fault trap */
+ movq $(T_PF_USER),R_ERR(%rbx)
+ /* set error code - read user space */
+ jmp _take_trap /* treat as a trap */
+END(syscall)
+
+#else /* USER32 */
+
+/* Entry point for 64-bit syscalls.
+ * On entry we're still on the user stack, so better not use it. Instead we
+ * save the thread state immediately in thread->pcb->iss, then try to invoke
+ * the syscall.
+ * Note: emulated syscalls seem to not be used anymore in GNU/Hurd, so they
+ * are not handled here.
+ * TODO:
+ - for now we assume the return address is canonical, but apparently there
+ can be cases where it's not (see how Linux handles this). Does it apply
+ here?
+ - check that the case where a task is suspended, and later returns via
+ iretq from return_from_trap, works fine in all combinations
+ */
+ENTRY(syscall64)
+ /* RFLAGS[32:63] are reserved, so combine syscall num (32 bit) and
+ * eflags in RAX to allow using r11 as temporary register
+ */
+ shlq $32,%r11
+ shlq $32,%rax /* make sure bits 32:63 of %rax are zero */
+ shrq $32,%rax
+ or %r11,%rax
+
+ /* Save thread state in pcb->iss, as on exception entry.
+ * Since this is triggered synchronously from userspace, we could
+ * save only the callee-preserved status according to the C ABI,
+ * plus RIP and EFLAGS for sysret
+ */
+ movq MY(ACTIVE_THREAD),%r11 /* point to current thread */
+ movq TH_PCB(%r11),%r11 /* point to pcb */
+ addq $ PCB_ISS,%r11 /* point to saved state */
+
+ mov %rsp,R_UESP(%r11) /* callee-preserved register */
+ mov %rcx,R_EIP(%r11) /* syscall places user RIP in RCX */
+ mov %rbx,R_EBX(%r11) /* callee-preserved register */
+ mov %rax,%rbx /* Now we can unpack eflags again */
+ shr $32,%rbx
+ mov %rbx,R_EFLAGS(%r11) /* ... and save them in pcb as well */
+ mov %rbp,R_EBP(%r11) /* callee-preserved register */
+ mov %r12,R_R12(%r11) /* callee-preserved register */
+ mov %r13,R_R13(%r11) /* callee-preserved register */
+ mov %r14,R_R14(%r11) /* callee-preserved register */
+ mov %r15,R_R15(%r11) /* callee-preserved register */
+
+ /* Save syscall number and args for SYSCALL_EXAMINE/MSG_EXAMINE in glibc.
+ * Note: syscall number is only 32 bit, in EAX, so we sign-extend it in
+ * RAX to mask the EFLAGS bits.
+ */
+ cdqe /* sign-extend EAX in RAX */
+ mov %rax,R_EAX(%r11) /* syscall number */
+ mov %rdi,R_EDI(%r11) /* syscall arg0 */
+ mov %rsi,R_ESI(%r11) /* syscall arg1 */
+ mov %rdx,R_EDX(%r11) /* syscall arg2 */
+ mov %r10,R_R10(%r11) /* syscall arg3 */
+ mov %r8,R_R8(%r11) /* syscall arg4 */
+ mov %r9,R_R9(%r11) /* syscall arg5 */
+
+ mov %r11,%rbx /* prepare for error handling */
+ mov %r10,%rcx /* fix arg3 location according to C ABI */
+
+ /* switch to kernel stack, then we can enable interrupts */
+ CPU_NUMBER_NO_STACK(%r11d)
+ movq CX(EXT(kernel_stack),%r11),%rsp
+ sti
+
+ /* Now we have saved state and args 1-6 are in place.
+ * Before invoking the syscall we do some bound checking and,
+ * if we have more that 6 arguments, we need to copy the
+ * remaining ones to the kernel stack, handling page faults when
+ * accessing the user stack.
+ */
+ negl %eax /* get system call number */
+ jl _syscall64_range /* out of range if it was positive */
+ cmpl EXT(mach_trap_count),%eax /* check system call table bounds */
+ jg _syscall64_range /* error if out of range */
+ shll $5,%eax /* manual indexing of mach_trap_t */
+
+ /* check if we need to place some arguments on the stack */
+_syscall64_args_stack:
+ mov EXT(mach_trap_table)(%rax),%r10 /* get number of arguments */
+ subq $6,%r10 /* the first 6 args are already in place */
+ jle _syscall64_call /* skip argument copy if num args <= 6 */
+
+ movq R_UESP(%rbx),%r11 /* get user stack pointer */
+ addq $8,%r11 /* Skip user return address */
+
+ lea (%r11,%r10,8),%r11 /* point past last argument */
+
+0: subq $8,%r11
+ RECOVER(_syscall64_addr_push)
+ mov (%r11),%r12
+ pushq %r12 /* push argument on stack */
+ dec %r10
+ jnz 0b /* loop for all remaining arguments */
+
+_syscall64_call:
+ call *EXT(mach_trap_table)+8(%rax) /* call procedure */
+
+_syscall64_check_for_ast:
+ /* Check for ast. */
+ CPU_NUMBER_NO_GS(%r11d)
+ cmpl $0,CX(EXT(need_ast),%r11)
+ jz _syscall64_restore_state
+
+ /* Save the syscall return value, both on our stack, for the case
+ * i386_astintr returns normally, and in the PCB stack, in case it
+ * instead calls thread_block(thread_exception_return).
+ */
+ pushq %rax /* save the return value on our stack */
+ pushq $0 /* dummy value to keep the stack aligned */
+
+ /* Find the PCB stack. */
+ movq %rsp,%rcx
+ or $(KERNEL_STACK_SIZE-1),%rcx
+ movq -7-IKS_SIZE(%rcx),%rcx
+
+ movq %rax,R_EAX(%rcx) /* save the return value in the PCB stack */
+ call EXT(i386_astintr)
+ popq %rax
+ popq %rax /* restore the return value */
+ jmp _syscall64_check_for_ast /* check again */
+
+_syscall64_restore_state:
+ /* Restore thread state and return to user using sysret. */
+ cli /* block interrupts when using the user stack in kernel space */
+ movq MY(ACTIVE_THREAD),%r11 /* point to current thread */
+ movq TH_PCB(%r11),%r11 /* point to pcb */
+ addq $ PCB_ISS,%r11 /* point to saved state */
+
+ /* Restore syscall args. Note: we can't restore the syscall number in
+ * RAX because it needs to hold the return value.*/
+ mov R_EDI(%r11),%rdi /* syscall arg0 */
+ mov R_ESI(%r11),%rsi /* syscall arg1 */
+ mov R_EDX(%r11),%rdx /* syscall arg2 */
+ mov R_R10(%r11),%r10 /* syscall arg3 */
+ mov R_R8(%r11),%r8 /* syscall arg4 */
+ mov R_R9(%r11),%r9 /* syscall arg5 */
+
+ mov R_UESP(%r11),%rsp /* callee-preserved register,
+ * also switch back to user stack */
+ mov R_EIP(%r11),%rcx /* sysret convention */
+ mov R_EBX(%r11),%rbx /* callee-preserved register */
+ mov R_EBP(%r11),%rbp /* callee-preserved register */
+ mov R_R12(%r11),%r12 /* callee-preserved register */
+ mov R_R13(%r11),%r13 /* callee-preserved register */
+ mov R_R14(%r11),%r14 /* callee-preserved register */
+ mov R_R15(%r11),%r15 /* callee-preserved register */
+ mov R_EFLAGS(%r11),%r11 /* sysret convention */
+
+ sysretq /* fast return to user-space, the thread didn't block */
+
+/* Error handling fragments, from here we jump directly to the trap handler */
+_syscall64_addr_push:
+ movq %r11,R_CR2(%rbx) /* set fault address */
+ movq $(T_PAGE_FAULT),R_TRAPNO(%rbx) /* set page-fault trap */
+ movq $(T_PF_USER),R_ERR(%rbx) /* set error code - read user space */
+ jmp _take_trap /* treat as a trap */
+
+_syscall64_range:
+ movq $(T_INVALID_OPCODE),R_TRAPNO(%rbx)
+ /* set invalid-operation trap */
+ movq $0,R_ERR(%rbx) /* clear error code */
+ jmp _take_trap /* treat as a trap */
+
+END(syscall64)
+#endif /* USER32 */
+
+ .data
+DATA(cpu_features)
+DATA(cpu_features_edx)
+ .long 0
+DATA(cpu_features_ecx)
+ .long 0
+ .text
+
+/* Discover what kind of cpu we have; return the family number
+ (3, 4, 5, 6, for 386, 486, 586, 686 respectively). */
+ENTRY(discover_x86_cpu_type)
+ /* We are a modern enough processor to have the CPUID instruction;
+ use it to find out what we are. */
+ movl $1,%eax /* Fetch CPU type info ... */
+ cpuid /* ... into eax */
+ movl %ecx,cpu_features_ecx /* Keep a copy */
+ movl %edx,cpu_features_edx /* Keep a copy */
+ shrl $8,%eax /* Slide family bits down */
+ andl $15,%eax /* And select them */
+ ret /* And return */
+
+
+/* */
+/*
+ * Utility routines.
+ */
+
+ENTRY(copyin)
+ xchgq %rsi,%rdi /* Get user source and kernel destination */
+
+copyin_remainder:
+ /*cld*/ /* count up: default mode in all GCC code */
+ movq %rdx,%rcx /* move by longwords first */
+ shrq $3,%rcx
+ RECOVER(copyin_fail)
+ rep
+ movsq /* move longwords */
+ movq %rdx,%rcx /* now move remaining bytes */
+ andq $7,%rcx
+ RECOVER(copyin_fail)
+ rep
+ movsb
+ xorq %rax,%rax /* return 0 for success */
+
+copyin_ret:
+ ret /* and return */
+
+copyin_fail:
+ movq $1,%rax /* return 1 for failure */
+ jmp copyin_ret /* pop frame and return */
+
+bogus:
+ ud2
+
+ENTRY(copyout)
+ xchgq %rsi,%rdi /* Get user source and kernel destination */
+
+copyout_remainder:
+ movq %rdx,%rax /* use count */
+ /*cld*/ /* count up: always this way in GCC code */
+ movq %rax,%rcx /* move by longwords first */
+ shrq $3,%rcx
+ RECOVER(copyout_fail)
+ rep
+ movsq
+ movq %rax,%rcx /* now move remaining bytes */
+ andq $7,%rcx
+ RECOVER(copyout_fail)
+ rep
+ movsb /* move */
+ xorq %rax,%rax /* return 0 for success */
+
+copyout_ret:
+ ret /* and return */
+
+copyout_fail:
+ movq $1,%rax /* return 1 for failure */
+ jmp copyout_ret /* pop frame and return */
+
+/*
+ * int inst_fetch(int eip, int cs);
+ *
+ * Fetch instruction byte. Return -1 if invalid address.
+ */
+ENTRY(inst_fetch)
+ movq S_ARG1, %rax /* get segment */
+ movw %ax,%fs /* into FS */
+ movq S_ARG0, %rax /* get offset */
+ RETRY(EXT(inst_fetch)) /* re-load FS on retry */
+ RECOVER(_inst_fetch_fault)
+ movzbq %fs:(%rax),%rax /* load instruction byte */
+ ret
+
+_inst_fetch_fault:
+ movq $-1,%rax /* return -1 if error */
+ ret
+
+
+/*
+ * Done with recovery and retry tables.
+ */
+ RECOVER_TABLE_END
+ RETRY_TABLE_END
+
+
+
+/*
+ * cpu_shutdown()
+ * Force reboot
+ */
+null_idt:
+ .space 8 * 32
+
+null_idtr:
+ .word 8 * 32 - 1
+ .quad null_idt
+
+Entry(cpu_shutdown)
+ lidt null_idtr /* disable the interrupt handler */
+ xor %rcx,%rcx /* generate a divide by zero */
+ div %rcx,%rax /* reboot now */
+ ret /* this will "never" be executed */