1 files changed, 1640 insertions, 0 deletions
diff --git a/x86_64/locore.S b/x86_64/locore.S
new file mode 100644
index 0000000..25dc15d
--- /dev/null
+++ b/x86_64/locore.S
@@ -0,0 +1,1640 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1993,1992,1991,1990 Carnegie Mellon University
+ * Copyright (c) 1991 IBM Corporation
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation,
+ * and that the nema IBM not be used in advertising or publicity
+ * pertaining to distribution of the software without specific, written
+ * prior permission.
+ *
+ * CARNEGIE MELLON AND IBM ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON AND IBM DISCLAIM ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#include <mach/machine/asm.h>
+#include <mach/machine/eflags.h>
+#include <i386/i386/proc_reg.h>
+#include <i386/i386/trap.h>
+#include <i386/i386/seg.h>
+#include <i386/i386/gdt.h>
+#include <i386/i386/ldt.h>
+#include <i386/i386/msr.h>
+#include <i386/i386/i386asm.h>
+#include <i386/i386/cpu_number.h>
+#include <i386/i386/xen.h>
+
+
+/*
+ * Helpers for thread state as saved in the pcb area, during trap or irq handling
+ */
+#define pusha		\
+	pushq %rax	;\
+	pushq %rcx	;\
+	pushq %rdx	;\
+	pushq %rbx	;\
+	subq $8,%rsp	;\
+	pushq %rbp	;\
+	pushq %rsi	;\
+	pushq %rdi	;\
+	pushq %r8	;\
+	pushq %r9	;\
+	pushq %r10	;\
+	pushq %r11	;\
+	pushq %r12	;\
+	pushq %r13	;\
+	pushq %r14	;\
+	pushq %r15
+
+#define popa		\
+	popq %r15	;\
+	popq %r14	;\
+	popq %r13	;\
+	popq %r12	;\
+	popq %r11	;\
+	popq %r10	;\
+	popq %r9	;\
+	popq %r8	;\
+	popq %rdi	;\
+	popq %rsi	;\
+	popq %rbp	;\
+	addq $8,%rsp	;\
+	popq %rbx	;\
+	popq %rdx	;\
+	popq %rcx	;\
+	popq %rax
+
+#define PUSH_REGS_ISR	\
+	pushq	%rcx    ;\
+	pushq	%rdx    ;\
+	pushq	%rsi    ;\
+	pushq	%rdi    ;\
+	pushq	%r8     ;\
+	pushq	%r9     ;\
+	pushq	%r10     ;\
+	pushq	%r11
+
+#define PUSH_AREGS_ISR	\
+	pushq	%rax    ;\
+	PUSH_REGS_ISR
+
+
+#define POP_REGS_ISR	\
+	popq	%r11    ;\
+	popq	%r10     ;\
+	popq	%r9     ;\
+	popq	%r8     ;\
+	popq	%rdi    ;\
+	popq	%rsi    ;\
+	popq	%rdx    ;\
+	popq	%rcx
+
+#define POP_AREGS_ISR	\
+	POP_REGS_ISR	;\
+	popq	%rax
+
+/*
+ * Note that we have to load the kernel segment registers even if this
+ * is a trap from the kernel, because the kernel uses user segment
+ * registers for copyin/copyout.
+ * (XXX Would it be smarter just to use fs or gs for that?)
+ */
+#ifdef USER32
+#define PUSH_SEGMENTS(reg)	\
+	movq	%ds,reg		;\
+	pushq	reg		;\
+	movq	%es,reg		;\
+	pushq	reg		;\
+	pushq	%fs		;\
+	pushq	%gs
+#else
+#define PUSH_SEGMENTS(reg)
+#endif
+
+#ifdef USER32
+#define POP_SEGMENTS(reg)	\
+	popq	%gs		;\
+	popq	%fs		;\
+	popq	reg		;\
+	movq	reg,%es		;\
+	popq	reg		;\
+	movq	reg,%ds
+#else
+#define POP_SEGMENTS(reg)
+#endif
+
+#ifdef USER32
+#define PUSH_SEGMENTS_ISR(reg)	\
+	movq	%ds,reg		;\
+	pushq	reg		;\
+	movq	%es,reg		;\
+	pushq	reg		;\
+	pushq	%fs		;\
+	pushq	%gs
+#else
+#define PUSH_SEGMENTS_ISR(reg)
+#endif
+
+#ifdef USER32
+#define POP_SEGMENTS_ISR(reg)	\
+	popq	%gs		;\
+	popq	%fs		;\
+	popq	reg		;\
+	movq	reg,%es		;\
+	popq	reg		;\
+	movq	reg,%ds
+#else
+#define POP_SEGMENTS_ISR(reg)
+#endif
+
+#ifdef USER32
+#define SET_KERNEL_SEGMENTS(reg)	       \
+	mov	%ss,reg /* switch to kernel segments */ ;\
+	mov	reg,%ds	/* (same as kernel stack segment) */ ;\
+	mov	reg,%es			;\
+	mov	reg,%fs			;\
+	mov	$(PERCPU_DS),reg	;\
+	mov	reg,%gs
+#else
+#define SET_KERNEL_SEGMENTS(reg)
+#endif
+
+/*
+ * Fault recovery.
+ */
+#define	RECOVER_TABLE_START	\
+	.text	2		;\
+DATA(recover_table)		;\
+	.text
+
+#define	RECOVER(addr)		\
+	.text	2		;\
+	.quad	9f		;\
+	.quad	addr		;\
+	.text			;\
+9:
+
+#define	RECOVER_TABLE_END		\
+	.text	2			;\
+	.globl	EXT(recover_table_end)	;\
+LEXT(recover_table_end)			;\
+	.text
+
+/*
+ * Retry table for certain successful faults.
+ */
+#define	RETRY_TABLE_START	\
+	.text	3		;\
+DATA(retry_table)		;\
+	.text
+
+#define	RETRY(addr)		\
+	.text	3		;\
+	.quad	9f		;\
+	.quad	addr		;\
+	.text			;\
+9:
+
+#define	RETRY_TABLE_END			\
+	.text	3			;\
+	.globl	EXT(retry_table_end)	;\
+LEXT(retry_table_end)			;\
+	.text
+
+/*
+ * Allocate recovery and retry tables.
+ */
+	RECOVER_TABLE_START
+	RETRY_TABLE_START
+
+/*
+ * Timing routines.
+ */
+#if	STAT_TIME
+
+#define	TIME_TRAP_UENTRY
+#define	TIME_TRAP_SENTRY
+#define	TIME_TRAP_UEXIT
+#define	TIME_INT_ENTRY
+#define	TIME_INT_EXIT
+
+#else	/* microsecond timing */
+
+/*
+ * Microsecond timing.
+ * Assumes a free-running microsecond counter.
+ * no TIMER_MAX check needed.
+ */
+
+/*
+ * There is only one current time-stamp per CPU, since only
+ * the time-stamp in the current timer is used.
+ * To save time, we allocate the current time-stamps here.
+ */
+	.comm	EXT(current_tstamp), 4*NCPUS
+
+/*
+ * Update time on user trap entry.
+ * 11 instructions (including cli on entry)
+ * Assumes CPU number in %edx.
+ * Uses %eax, %ebx, %ecx.
+ */
+#define	TIME_TRAP_UENTRY \
+	pushf					/* Save flags */	;\
+	cli					/* block interrupts */	;\
+	movl	VA_ETC,%ebx			/* get timer value */	;\
+	movl	CX(EXT(current_tstamp),%rdx),%ecx	/* get old time stamp */;\
+	movl	%ebx,CX(EXT(current_tstamp),%rdx)	/* set new time stamp */;\
+	subl	%ecx,%ebx			/* elapsed = new-old */	;\
+	movl	CX(EXT(current_timer),%rdx),%ecx	/* get current timer */	;\
+	addl	%ebx,LOW_BITS(%ecx)		/* add to low bits */	;\
+	jns	0f				/* if overflow, */	;\
+	call	timer_normalize			/* normalize timer */	;\
+0:	addl	$(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx			;\
+						/* switch to sys timer */;\
+	movl	%ecx,CX(EXT(current_timer),%rdx)	/* make it current */	;\
+	popf					/* allow interrupts */
+
+/*
+ * Update time on system call entry.
+ * 11 instructions (including cli on entry)
+ * Assumes CPU number in %edx.
+ * Uses %ebx, %ecx.
+ * Same as TIME_TRAP_UENTRY, but preserves %eax.
+ */
+#define	TIME_TRAP_SENTRY \
+	pushf					/* Save flags */	;\
+	cli					/* block interrupts */	;\
+	movl	VA_ETC,%ebx			/* get timer value */	;\
+	movl	CX(EXT(current_tstamp),%rdx),%ecx	/* get old time stamp */;\
+	movl	%ebx,CX(EXT(current_tstamp),%rdx)	/* set new time stamp */;\
+	subl	%ecx,%ebx			/* elapsed = new-old */	;\
+	movl	CX(EXT(current_timer),%rdx),%ecx	/* get current timer */	;\
+	addl	%ebx,LOW_BITS(%ecx)		/* add to low bits */	;\
+	jns	0f				/* if overflow, */	;\
+	pushq	%rax				/* save %rax */		;\
+	call	timer_normalize			/* normalize timer */	;\
+	popq	%rax				/* restore %rax */	;\
+0:	addl	$(TH_SYSTEM_TIMER-TH_USER_TIMER),%ecx			;\
+						/* switch to sys timer */;\
+	movl	%ecx,CX(EXT(current_timer),%rdx)	/* make it current */	;\
+	popf					/* allow interrupts */
+
+/*
+ * update time on user trap exit.
+ * 10 instructions.
+ * Assumes CPU number in %edx.
+ * Uses %ebx, %ecx.
+ */
+#define	TIME_TRAP_UEXIT \
+	cli					/* block interrupts */	;\
+	movl	VA_ETC,%ebx			/* get timer */		;\
+	movl	CX(EXT(current_tstamp),%rdx),%ecx	/* get old time stamp */;\
+	movl	%ebx,CX(EXT(current_tstamp),%rdx)	/* set new time stamp */;\
+	subl	%ecx,%ebx			/* elapsed = new-old */	;\
+	movl	CX(EXT(current_timer),%rdx),%ecx	/* get current timer */	;\
+	addl	%ebx,LOW_BITS(%ecx)		/* add to low bits */	;\
+	jns	0f				/* if overflow,	*/	;\
+	call	timer_normalize			/* normalize timer */	;\
+0:	addl	$(TH_USER_TIMER-TH_SYSTEM_TIMER),%ecx			;\
+						/* switch to user timer	*/;\
+	movl	%ecx,CX(EXT(current_timer),%rdx)	/* make it current */
+
+/*
+ * update time on interrupt entry.
+ * 9 instructions.
+ * Assumes CPU number in %edx.
+ * Leaves old timer in %ebx.
+ * Uses %ecx.
+ */
+#define	TIME_INT_ENTRY \
+	movl	VA_ETC,%ecx			/* get timer */		;\
+	movl	CX(EXT(current_tstamp),%rdx),%ebx	/* get old time stamp */;\
+	movl	%ecx,CX(EXT(current_tstamp),%rdx)	/* set new time stamp */;\
+	subl	%ebx,%ecx			/* elapsed = new-old */	;\
+	movl	CX(EXT(current_timer),%rdx),%ebx	/* get current timer */	;\
+	addl	%ecx,LOW_BITS(%ebx)		/* add to low bits */	;\
+	leal	CX(0,%rdx),%ecx			/* timer is 16 bytes */	;\
+	lea	CX(EXT(kernel_timer),%rdx),%ecx	/* get interrupt timer*/;\
+	movl	%ecx,CX(EXT(current_timer),%rdx)	/* set timer */
+
+/*
+ * update time on interrupt exit.
+ * 11 instructions
+ * Assumes CPU number in %edx, old timer in %ebx.
+ * Uses %eax, %ecx.
+ */
+#define	TIME_INT_EXIT \
+	movl	VA_ETC,%eax			/* get timer */		;\
+	movl	CX(EXT(current_tstamp),%rdx),%ecx	/* get old time stamp */;\
+	movl	%eax,CX(EXT(current_tstamp),%rdx)	/* set new time stamp */;\
+	subl	%ecx,%eax			/* elapsed = new-old */	;\
+	movl	CX(EXT(current_timer),%rdx),%ecx	/* get current timer */	;\
+	addl	%eax,LOW_BITS(%ecx)		/* add to low bits */	;\
+	jns	0f				/* if overflow, */	;\
+	call	timer_normalize			/* normalize timer */	;\
+0:	testb	$0x80,LOW_BITS+3(%ebx)		/* old timer overflow? */;\
+	jz	0f				/* if overflow, */	;\
+	movl	%ebx,%ecx			/* get old timer */	;\
+	call	timer_normalize			/* normalize timer */	;\
+0:	movl	%ebx,CX(EXT(current_timer),%rdx)	/* set timer */
+
+
+/*
+ * Normalize timer in ecx.
+ * Preserves edx; clobbers eax.
+ */
+	.align	2
+timer_high_unit:
+	.long	TIMER_HIGH_UNIT			/* div has no immediate opnd */
+
+timer_normalize:
+	pushq	%rdx				/* save register */
+	xorl	%edx,%edx			/* clear divisor high */
+	movl	LOW_BITS(%ecx),%eax		/* get divisor low */
+	divl	timer_high_unit,%eax		/* quotient in eax */
+						/* remainder in edx */
+	addl	%eax,HIGH_BITS_CHECK(%ecx)	/* add high_inc to check  */
+	movl	%edx,LOW_BITS(%ecx)		/* remainder to low_bits  */
+	addl	%eax,HIGH_BITS(%ecx)		/* add high_inc to high bits */
+	popq	%rdx				/* restore register  */
+	ret
+
+/*
+ * Switch to a new timer.
+ */
+ENTRY(timer_switch)
+	CPU_NUMBER(%edx)			/* get this CPU  */
+	movl	VA_ETC,%ecx			/* get timer */
+	movl	CX(EXT(current_tstamp),%rdx),%eax	/* get old time stamp  */
+	movl	%ecx,CX(EXT(current_tstamp),%rdx)	/* set new time stamp */
+	subl	%ecx,%eax			/* elapsed = new - old */
+	movl	CX(EXT(current_timer),%rdx),%ecx	/* get current timer */
+	addl	%eax,LOW_BITS(%ecx)		/* add to low bits */
+	jns	0f				/* if overflow, */
+	call	timer_normalize			/* normalize timer */
+0:
+	movl	S_ARG0,%ecx			/* get new timer */
+	movl	%ecx,CX(EXT(current_timer),%rdx)	/* set timer */
+	ret
+
+/*
+ * Initialize the first timer for a CPU.
+ */
+ENTRY(start_timer)
+	CPU_NUMBER(%edx)			/* get this CPU */
+	movl	VA_ETC,%ecx			/* get timer */
+	movl	%ecx,CX(EXT(current_tstamp),%rdx)	/* set initial time stamp */
+	movl	S_ARG0,%ecx			/* get timer */
+	movl	%ecx,CX(EXT(current_timer),%rdx)	/* set initial timer */
+	ret
+
+#endif	/* accurate timing */
+
+/**/
+
+/*
+ * Trap/interrupt entry points.
+ *
+ * All traps must create the i386_saved_state struct on the stack on
+ * entry. Note that:
+ *      - CR2 is only used if the trap is a page fault
+ *      - user_rsp/user_ss are only used if entering from user space
+ *      - v86_regs are used only from V86 threads
+ *         (TODO check if V86 is still used with USER32)
+ *
+ * Depending the CPL before entry, the stack might be switched or not;
+ * if entering from user-space the CPU loads TSS->RSP0 in RSP,
+ * otherwise RSP is unchanged. After this, the cpu pushes
+ * SS/RSP/RFLAFS/CS/RIP and optionally ErrorCode and executes the handler.
+ */
+
+/* Try to save/show some information when a double fault happens
+ * We can't recover to a working state, so if we have a debugger wait for it,
+ * otherwise reset */
+ENTRY(t_dbl_fault)
+	INT_FIX
+	cli	/* disable interrupts that might corrupt the state*/
+	pusha
+	movq	%cr2,%rax
+	movq	%rax,R_CR2-R_R15(%rsp)	/* CR2 might contain the faulting address */
+	subq	$48,%rsp	// FIXME remove when segments are cleaned up
+	movq	%rsp,%rdi		/* pass the saved state */
+	call	handle_double_fault
+	jmp	cpu_shutdown	/* reset */
+END(t_dbl_fault)
+
+/*
+ * General protection or segment-not-present fault.
+ * Check for a GP/NP fault in the kernel_return
+ * sequence; if there, report it as a GP/NP fault on the user's instruction.
+ *
+ * rsp->     0:	trap code (NP or GP)
+ *	     8:	segment number in error
+ *	    16	eip
+ *	    24	cs
+ *	    32	eflags
+ *	    40	old registers (trap is from kernel)
+ */
+ENTRY(t_gen_prot)
+	INT_FIX
+	pushq	$(T_GENERAL_PROTECTION)	/* indicate fault type  */
+	jmp	trap_check_kernel_exit	/* check for kernel exit sequence */
+
+ENTRY(t_segnp)
+	INT_FIX
+	pushq	$(T_SEGMENT_NOT_PRESENT)
+					/* indicate fault type */
+
+trap_check_kernel_exit:
+#ifdef USER32
+	testq	$(EFL_VM),32(%rsp)	/* is trap from V86 mode? */
+	jnz	EXT(alltraps)		/* isn`t kernel trap if so */
+#endif
+	/* Note: handling KERNEL_RING value by hand */
+	testq	$2,24(%rsp)		/* is trap from kernel mode? */
+	jnz	EXT(alltraps)		/* if so:  */
+					/* check for the kernel exit sequence */
+	cmpq	$_kret_iret,16(%rsp)	/* on IRET? */
+	je	fault_iret
+#ifdef USER32
+	cmpq	$_kret_popl_ds,16(%rsp)	/* popping DS? */
+	je	fault_popl_ds
+	cmpq	$_kret_popl_es,16(%rsp)	/* popping ES? */
+	je	fault_popl_es
+	cmpq	$_kret_popl_fs,16(%rsp)	/* popping FS? */
+	je	fault_popl_fs
+	cmpq	$_kret_popl_gs,16(%rsp)	/* popping GS? */
+	je	fault_popl_gs
+#endif
+take_fault:				/* if none of the above: */
+	jmp	EXT(alltraps)		/* treat as normal trap. */
+
+/*
+ * GP/NP fault on IRET: CS or SS is in error.
+ * All registers contain the user's values.
+ *
+ * on SP is
+ *  0	trap number
+ *  8	errcode
+ * 16	eip
+ * 24	cs		--> trapno
+ * 32	efl		--> errcode
+ * 40	user eip
+ * 48	user cs
+ * 56	user eflags
+ * 64	user rsp
+ * 72	user ss
+ */
+fault_iret:
+	movq	%rax,16(%rsp)		/* save eax (we don`t need saved eip) */
+	popq	%rax			/* get trap number */
+	movq	%rax,24-8(%rsp)		/* put in user trap number */
+	popq	%rax			/* get error code */
+	movq	%rax,32-16(%rsp)	/* put in user errcode */
+	popq	%rax			/* restore eax */
+	jmp	EXT(alltraps)		/* take fault */
+
+#ifdef USER32
+/*
+ * Fault restoring a segment register.  The user's registers are still
+ * saved on the stack.  The offending segment register has not been
+ * popped.
+ */
+fault_popl_ds:
+	popq	%rax			/* get trap number  */
+	popq	%rdx			/* get error code */
+	addq	$24,%rsp		/* pop stack to user regs */
+	jmp	push_es			/* (DS on top of stack) */
+fault_popl_es:
+	popq	%rax			/* get trap number */
+	popq	%rdx			/* get error code */
+	addq	$24,%rsp		/* pop stack to user regs */
+	jmp	push_fs			/* (ES on top of stack) */
+fault_popl_fs:
+	popq	%rax			/* get trap number */
+	popq	%rdx			/* get error code */
+	addq	$24,%rsp		/* pop stack to user regs */
+	jmp	push_gs			/* (FS on top of stack) */
+fault_popl_gs:
+	popq	%rax			/* get trap number */
+	popq	%rdx			/* get error code */
+	addq	$24,%rsp		/* pop stack to user regs */
+	jmp	push_segregs		/* (GS on top of stack) */
+
+push_es:
+	movq	%es,%rcx
+	pushq	%rcx			/* restore es, */
+push_fs:
+	pushq	%fs			/* restore fs, */
+push_gs:
+	pushq	%gs			/* restore gs. */
+push_gsbase:
+	pushq	$0
+	pushq	$0
+#endif
+push_segregs:
+	movq	%rax,R_TRAPNO(%rsp)	/* set trap number */
+	movq	%rdx,R_ERR(%rsp)	/* set error code */
+	jmp	trap_set_segs		/* take trap */
+
+/*
+ * Debug trap.  Check for single-stepping across system call into
+ * kernel.  If this is the case, taking the debug trap has turned
+ * off single-stepping - save the flags register with the trace
+ * bit set.
+ */
+ENTRY(t_debug)
+	INT_FIX
+#ifdef USER32
+	testq	$(EFL_VM),16(%rsp)	/* is trap from V86 mode? */
+	jnz	0f			/* isn`t kernel trap if so */
+#endif
+	/* Note: handling KERNEL_RING value by hand */
+	testq	$2,8(%rsp)		/* is trap from kernel mode? */
+	jnz	0f			/* if so: */
+#ifdef USER32
+	cmpq	$syscall_entry,(%rsp)	/* system call entry? */
+	jne	0f			/* if so: */
+					/* flags are sitting where syscall */
+					/* wants them */
+	addq	$32,%rsp		/* remove eip/cs */
+	jmp	syscall_entry_2		/* continue system call entry */
+#else
+	// TODO: implement the 64-bit case
+	ud2
+#endif
+0:	pushq	$0			/* otherwise: */
+	pushq	$(T_DEBUG)		/* handle as normal */
+	jmp	EXT(alltraps)		/* debug fault */
+
+/*
+ * Page fault traps save cr2.
+ */
+ENTRY(t_page_fault)
+	INT_FIX
+	pushq	$(T_PAGE_FAULT)		/* mark a page fault trap */
+	pusha				/* save the general registers */
+#ifdef	MACH_XEN
+	movq	%ss:hyp_shared_info+CR2,%rax
+#else	/* MACH_XEN */
+	movq	%cr2,%rax		/* get the faulting address */
+#endif	/* MACH_XEN */
+	movq	%rax,R_CR2-R_R15(%rsp)	/* save in rsp save slot */
+	jmp	trap_push_segs		/* continue fault */
+
+/*
+ * All 'exceptions' enter here with:
+ *	rsp->   trap number
+ *		error code
+ *		old eip
+ *		old cs
+ *		old eflags
+ *		old rsp		if trapped from user
+ *		old ss		if trapped from user
+ */
+ENTRY(alltraps)
+	pusha				/* save the general registers */
+trap_push_segs:
+	PUSH_SEGMENTS(%rax)		/* and the segment registers */
+	SET_KERNEL_SEGMENTS(%rax)	/* switch to kernel data segment */
+trap_set_segs:
+	cld				/* clear direction flag */
+#ifdef USER32
+	testl	$(EFL_VM),R_EFLAGS(%rsp) /* in V86 mode? */
+	jnz	trap_from_user		/* user mode trap if so */
+#endif
+	/* Note: handling KERNEL_RING value by hand */
+	testb	$2,R_CS(%rsp)		/* user mode trap? */
+	jz	trap_from_kernel	/* kernel trap if not */
+trap_from_user:
+
+	CPU_NUMBER(%edx)
+	TIME_TRAP_UENTRY
+
+	movq	CX(EXT(kernel_stack),%rdx),%rbx
+	xchgq	%rbx,%rsp		/* switch to kernel stack */
+					/* user regs pointer already set */
+_take_trap:
+	movq	%rbx,%rdi		/* pass register save area to trap */
+	call	EXT(user_trap)		/* call user trap routine */
+#ifdef USER32
+	orq     %rax,%rax               /* emulated syscall? */
+	jz      1f			/* no, just return */
+	movq    R_EAX(%rbx),%rax        /* yes, get syscall number */
+	jmp     syscall_entry_3         /* and emulate it */
+#endif
+1:
+	movq	(%rsp),%rsp		/* switch back to PCB stack */
+
+/*
+ * Return from trap or system call, checking for ASTs.
+ * On PCB stack.
+ */
+
+_return_from_trap:
+	CPU_NUMBER(%edx)
+	cmpl	$0,CX(EXT(need_ast),%rdx)
+	jz	_return_to_user		/* if we need an AST: */
+
+	movq	CX(EXT(kernel_stack),%rdx),%rsp
+					/* switch to kernel stack */
+	call	EXT(i386_astintr)	/* take the AST */
+	popq	%rsp			/* switch back to PCB stack */
+	jmp	_return_from_trap	/* and check again (rare) */
+					/* ASTs after this point will */
+					/* have to wait */
+
+_return_to_user:
+	TIME_TRAP_UEXIT
+
+/*
+ * Return from kernel mode to interrupted thread.
+ */
+
+_return_from_kernel:
+#ifdef USER32
+_kret_popl_gs:
+	popq	%gs			/* restore segment registers */
+_kret_popl_fs:
+	popq	%fs
+_kret_popl_es:
+	popq	%rax
+	movq	%rax,%es
+_kret_popl_ds:
+	popq	%rax
+	movq	%rax,%ds
+#endif
+	popa				/* restore general registers */
+	addq	$16,%rsp		/* discard trap number and error code */
+_kret_iret:
+	iretq				/* return from interrupt */
+
+
+/*
+ * Trap from kernel mode.  No need to switch stacks.
+ */
+trap_from_kernel:
+#if	MACH_KDB || MACH_TTD
+	movq	%rsp,%rbx		/* save current stack */
+	movq	%rsp,%rdx		/* on an interrupt stack? */
+
+	CPU_NUMBER(%ecx)
+	and	$(~(INTSTACK_SIZE-1)),%rdx
+	cmpq	CX(EXT(int_stack_base),%rcx),%rdx
+	je	1f			/* OK if so */
+
+	movl	%ecx,%edx
+	cmpq	CX(EXT(kernel_stack),%rdx),%rsp
+					/* already on kernel stack? */
+	ja	0f
+	cmpq	MY(ACTIVE_STACK),%rsp
+	ja	1f			/* switch if not */
+0:
+	movq	CX(EXT(kernel_stack),%rdx),%rsp
+1:
+	pushq	%rbx			/* save old stack */
+	movq	%rbx,%rdi		/* pass as parameter */
+	call	EXT(kernel_trap)	/* to kernel trap routine */
+
+	popq	%rsp			/* return to old stack */
+#else	/* MACH_KDB || MACH_TTD */
+
+	movq	%rsp,%rdi		/* pass parameter */
+	call	EXT(kernel_trap)	/* to kernel trap routine */
+
+#endif	/* MACH_KDB || MACH_TTD */
+
+	jmp	_return_from_kernel
+
+
+/*
+ *	Called as a function, makes the current thread
+ *	return from the kernel as if from an exception.
+ */
+
+ENTRY(thread_exception_return)
+ENTRY(thread_bootstrap_return)
+	movq	%rsp,%rcx			/* get kernel stack */
+	or	$(KERNEL_STACK_SIZE-1),%rcx
+	movq	-7-IKS_SIZE(%rcx),%rsp		/* switch back to PCB stack */
+	jmp	_return_from_trap
+
+/*
+ *	Called as a function, makes the current thread
+ *	return from the kernel as if from a syscall.
+ *	Takes the syscall's return code as an argument.
+ */
+
+ENTRY(thread_syscall_return)
+	movq	S_ARG0,%rax			/* get return value */
+	movq	%rsp,%rcx			/* get kernel stack */
+	or	$(KERNEL_STACK_SIZE-1),%rcx
+	movq	-7-IKS_SIZE(%rcx),%rsp		/* switch back to PCB stack */
+	movq	%rax,R_EAX(%rsp)		/* save return value */
+	jmp	_return_from_trap
+
+ENTRY(call_continuation)
+	movq	S_ARG0,%rax			/* get continuation */
+	movq	%rsp,%rcx			/* get kernel stack */
+	or	$(KERNEL_STACK_SIZE-1),%rcx
+	addq	$(-7-IKS_SIZE),%rcx
+	movq	%rcx,%rsp			/* pop the stack */
+	xorq	%rbp,%rbp			/* zero frame pointer */
+	pushq	$0				/* Dummy return address */
+	jmp	*%rax				/* goto continuation */
+
+/* IOAPIC has 24 interrupts, put spurious in the same array */
+
+#define INTERRUPT(n)				\
+	.data	2				;\
+	.quad	0f				;\
+	.text					;\
+	P2ALIGN(TEXT_ALIGN)			;\
+0:						;\
+	INT_FIX					;\
+	pushq	%rax				;\
+	movq	$(n),%rax			;\
+	jmp	EXT(all_intrs)
+
+	.data	2
+DATA(int_entry_table)
+	.text
+/* Legacy APIC interrupts or PIC interrupts */
+INTERRUPT(0)
+INTERRUPT(1)
+INTERRUPT(2)
+INTERRUPT(3)
+INTERRUPT(4)
+INTERRUPT(5)
+INTERRUPT(6)
+INTERRUPT(7)
+INTERRUPT(8)
+INTERRUPT(9)
+INTERRUPT(10)
+INTERRUPT(11)
+INTERRUPT(12)
+INTERRUPT(13)
+INTERRUPT(14)
+INTERRUPT(15)
+#ifdef APIC
+/* APIC PCI interrupts PIRQ A-H */
+INTERRUPT(16)
+INTERRUPT(17)
+INTERRUPT(18)
+INTERRUPT(19)
+INTERRUPT(20)
+INTERRUPT(21)
+INTERRUPT(22)
+INTERRUPT(23)
+#endif
+#if NCPUS > 1
+INTERRUPT(CALL_AST_CHECK)
+INTERRUPT(CALL_PMAP_UPDATE)
+#endif
+#ifdef APIC
+/* Spurious interrupt, set irq number to vect number */
+INTERRUPT(255)
+#endif
+
+/* XXX handle NMI - at least print a warning like Linux does.  */
+
+/*
+ * All interrupts enter here. The cpu might have loaded a new RSP,
+ * depending on the previous CPL, as in alltraps.
+ * Old %eax on stack, interrupt number in %eax; we need to fill the remaining
+ * fields of struct i386_interrupt_state, which might be in the pcb or in the
+ * interrupt stack.
+ */
+ENTRY(all_intrs)
+	PUSH_REGS_ISR			/* save registers */
+	cld				/* clear direction flag */
+
+	PUSH_SEGMENTS_ISR(%rdx)		/* save segment registers */
+
+	CPU_NUMBER_NO_GS(%ecx)
+	movq	%rsp,%rdx		/* on an interrupt stack? */
+	and	$(~(INTSTACK_SIZE-1)),%rdx
+	cmpq	%ss:CX(EXT(int_stack_base),%rcx),%rdx
+	je	int_from_intstack	/* if not: */
+
+	SET_KERNEL_SEGMENTS(%rdx)	/* switch to kernel segments */
+
+	CPU_NUMBER(%edx)
+
+	movq	CX(EXT(int_stack_top),%rdx),%rcx
+
+	xchgq	%rcx,%rsp		/* switch to interrupt stack */
+
+#if	STAT_TIME
+	pushq	%rcx			/* save pointer to old stack */
+#else
+	pushq	%rbx			/* save %ebx - out of the way */
+					/* so stack looks the same */
+	pushq	%rcx			/* save pointer to old stack */
+	TIME_INT_ENTRY			/* do timing */
+#endif
+
+#ifdef MACH_LDEBUG
+	incl	CX(EXT(in_interrupt),%rdx)
+#endif
+
+	call	EXT(interrupt)		/* call generic interrupt routine */
+	.globl	EXT(return_to_iret)	/* ( label for kdb_kintr and hardclock */
+LEXT(return_to_iret)			/* to find the return from calling interrupt) */
+
+	CPU_NUMBER(%edx)
+#ifdef MACH_LDEBUG
+	decl	CX(EXT(in_interrupt),%rdx)
+#endif
+
+#if	STAT_TIME
+#else
+	TIME_INT_EXIT			/* do timing */
+	movq	8(%rsp),%rbx		/* restore the extra reg we saved */
+#endif
+
+	popq	%rsp			/* switch back to old stack */
+
+#ifdef USER32
+	testl	$(EFL_VM),I_EFL(%rsp)	/* if in V86 */
+	jnz	0f			/* or */
+#endif
+	/* Note: handling KERNEL_RING value by hand */
+	testb	$2,I_CS(%rsp)		/* user mode, */
+	jz	1f			/* check for ASTs */
+0:
+	cmpq	$0,CX(EXT(need_ast),%rdx)
+	jnz	ast_from_interrupt	/* take it if so */
+1:
+	POP_SEGMENTS_ISR(%rdx)		/* restore segment regs */
+	POP_AREGS_ISR			/* restore registers */
+
+	iretq				/* return to caller */
+
+int_from_intstack:
+	CPU_NUMBER_NO_GS(%edx)
+	cmpq	CX(EXT(int_stack_base),%rdx),%rsp /* seemingly looping? */
+	jb	stack_overflowed	/* if not: */
+	call	EXT(interrupt)		/* call interrupt routine */
+_return_to_iret_i:			/* ( label for kdb_kintr) */
+	POP_SEGMENTS_ISR(%rdx)
+	POP_AREGS_ISR			/* restore registers */
+					/* no ASTs */
+
+	iretq
+
+stack_overflowed:
+	ud2
+
+/*
+ *	Take an AST from an interrupt.
+ *	On PCB stack.
+ * sp->	gs	-> edx
+ *	fs	-> ecx
+ *	es	-> eax
+ *	ds	-> trapno
+ *	edx	-> code
+ *	ecx
+ *	eax
+ *	eip
+ *	cs
+ *	efl
+ *	rsp
+ *	ss
+ */
+ast_from_interrupt:
+	POP_SEGMENTS_ISR(%rdx)		/* restore all registers ... */
+	POP_AREGS_ISR
+	pushq	$0			/* zero code */
+	pushq	$0			/* zero trap number */
+	pusha				/* save general registers */
+	PUSH_SEGMENTS_ISR(%rdx)		/* save segment registers */
+	SET_KERNEL_SEGMENTS(%rdx)	/* switch to kernel segments */
+	CPU_NUMBER(%edx)
+	TIME_TRAP_UENTRY
+
+	movq	CX(EXT(kernel_stack),%rdx),%rsp
+					/* switch to kernel stack */
+	call	EXT(i386_astintr)	/* take the AST */
+	popq	%rsp			/* back to PCB stack */
+	jmp	_return_from_trap	/* return */
+
+#if	MACH_KDB
+/*
+ * kdb_kintr:	enter kdb from keyboard interrupt.
+ * Chase down the stack frames until we find one whose return
+ * address is the interrupt handler.   At that point, we have:
+ *
+ * frame->	saved %rbp
+ *		return address in interrupt handler
+ *		saved SPL
+ *		saved IRQ
+ *		return address == return_to_iret_i
+ *		saved %r11
+ *		saved %r10
+ *		saved %r9
+ *		saved %r8
+ *		saved %rdx
+ *		saved %rcx
+ *		saved %rax
+ *		saved %rip
+ *		saved %cs
+ *		saved %rfl
+ *
+ * OR:
+ * frame->	saved %rbp
+ *		return address in interrupt handler
+ *		return address == return_to_iret
+ *		pointer to save area on old stack
+ *	      [ saved %ebx, if accurate timing ]
+ *
+ * old stack:	saved %gs
+ *		saved %fs
+ *		saved %es
+ *		saved %ds
+ *		saved %r11
+ *		saved %r10
+ *		saved %r9
+ *		saved %r8
+ *		saved %rdi
+ *		saved %rsi
+ *		saved %rdx
+ *		saved %rcx
+ *		saved %eax
+ *		saved %rip
+ *		saved %cs
+ *		saved %rfl
+ *
+ * Call kdb, passing it that register save area.
+ */
+
+#define	RET_OFFSET	32
+
+
+ENTRY(kdb_kintr)
+	movq	%rbp,%rax		/* save caller`s frame pointer */
+	movq	$EXT(return_to_iret),%rcx	/* interrupt return address 1 */
+	movq	$_return_to_iret_i,%rdx	/* interrupt return address 2 */
+
+0:	cmpq	RET_OFFSET(%rax),%rcx	/* does this frame return to */
+					/* interrupt handler (1)? */
+	je	1f
+	cmpq	RET_OFFSET(%rax),%rdx	/* interrupt handler (2)? */
+	je	2f			/* if not: */
+	movq	(%rax),%rax		/* try next frame */
+	testq	%rax,%rax
+	jnz	0b
+	ud2				/* oops, didn't find frame, fix me :/ */
+
+1:	movq	$kdb_from_iret,RET_OFFSET(%rax)
+	ret				/* returns to kernel/user stack */
+
+2:	movq	$kdb_from_iret_i,RET_OFFSET(%rax)
+					/* returns to interrupt stack */
+	ret
+
+/*
+ * On return from keyboard interrupt, we will execute
+ * kdb_from_iret_i
+ *	if returning to an interrupt on the interrupt stack
+ * kdb_from_iret
+ *	if returning to an interrupt on the user or kernel stack
+ */
+kdb_from_iret:
+					/* save regs in known locations */
+#if	STAT_TIME
+	pushq	%rbx			/* caller`s %ebx is in reg */
+#else
+	movq	8(%rsp),%rax		/* get caller`s %ebx */
+	pushq	%rax			/* push on stack */
+#endif
+	pushq	%rbp
+	movq	%rsp,%rdi		/* pass regs */
+	call	EXT(kdb_kentry)		/* to kdb */
+	popq	%rbp
+#if	STAT_TIME
+	popq	%rbx
+#else
+	popq	%rax
+	movq	%rax,8(%rsp)
+#endif
+	jmp	EXT(return_to_iret)	/* normal interrupt return */
+
+kdb_from_iret_i:			/* on interrupt stack */
+	pop	%rdx			/* restore saved registers */
+	pop	%rcx
+	pop	%rax
+	pushq	$0			/* zero error code */
+	pushq	$0			/* zero trap number */
+	pusha				/* save general registers */
+	PUSH_SEGMENTS(%rdx)		/* save segment registers */
+	movq	%rsp,%rdx		/* pass regs, */
+	movq	$0,%rsi			/* code, */
+	movq	$-1,%rdi		/* type to kdb */
+	call	EXT(kdb_trap)
+	POP_SEGMENTS(%rdx)		/* restore segment registers */
+	popa				/* restore general registers */
+	addq	$16,%rsp
+
+// TODO: test it before dropping ud2
+movq (%rsp),%rax
+ud2
+	iretq
+
+#endif	/* MACH_KDB */
+
+#if	MACH_TTD
+/*
+ * Same code as that above for the keyboard entry into kdb.
+ */
+ENTRY(kttd_intr)
+// TODO: test it before dropping ud2
+ud2
+	movq	%rbp,%rax		/* save caller`s frame pointer */
+	movq	$EXT(return_to_iret),%rcx	/* interrupt return address 1 */
+	movq	$_return_to_iret_i,%rdx	/* interrupt return address 2 */
+
+0:	cmpq	32(%rax),%rcx		/* does this frame return to */
+					/* interrupt handler (1)? */
+	je	1f
+	cmpq	32(%rax),%rdx		/* interrupt handler (2)? */
+	je	2f			/* if not: */
+	movq	(%rax),%rax		/* try next frame */
+	jmp	0b
+
+1:	movq	$ttd_from_iret,32(%rax)	/* returns to kernel/user stack */
+	ret
+
+2:	movq	$ttd_from_iret_i,32(%rax)
+					/* returns to interrupt stack */
+	ret
+
+/*
+ * On return from keyboard interrupt, we will execute
+ * ttd_from_iret_i
+ *	if returning to an interrupt on the interrupt stack
+ * ttd_from_iret
+ *	if returning to an interrupt on the user or kernel stack
+ */
+ttd_from_iret:
+					/* save regs in known locations */
+#if	STAT_TIME
+	pushq	%rbx			/* caller`s %ebx is in reg */
+#else
+	movq	8(%rsp),%rax		/* get caller`s %ebx */
+	pushq	%rax			/* push on stack */
+#endif
+	pushq	%rbp
+	pushq	%rsi
+	pushq	%rdi
+	movq	%rsp,%rdi		/* pass regs */
+	call	_kttd_netentry		/* to kdb */
+	popq	%rdi			/* restore registers */
+	popq	%rsi
+	popq	%rbp
+#if	STAT_TIME
+	popq	%rbx
+#else
+	popq	%rax
+	movq	%rax,8(%rsp)
+#endif
+	jmp	EXT(return_to_iret)	/* normal interrupt return */
+
+ttd_from_iret_i:			/* on interrupt stack */
+	pop	%rdx			/* restore saved registers */
+	pop	%rcx
+	pop	%rax
+	pushq	$0			/* zero error code */
+	pushq	$0			/* zero trap number */
+	pusha				/* save general registers */
+	PUSH_SEGMENTS_ISR(%rdx)		/* save segment registers */
+	ud2	// TEST it
+	movq	%rsp,%rdx		/* pass regs, */
+	movq	$0,%rsi			/* code, */
+	movq	$-1,%rdi		/* type to kdb */
+	call	_kttd_trap
+	POP_SEGMENTS_ISR(%rdx)		/* restore segment registers */
+	popa				/* restore general registers */
+	addq	$16,%rsp
+
+// TODO: test it before dropping ud2
+movq (%rsp),%rax
+ud2
+	iretq
+
+#endif	/* MACH_TTD */
+
+#ifdef USER32
+/*
+ * System call enters through a call gate.  Flags are not saved -
+ * we must shuffle stack to look like trap save area.
+ *
+ * rsp->	old eip
+ *		old cs
+ *		old rsp
+ *		old ss
+ *
+ * eax contains system call number.
+ */
+ENTRY(syscall)
+syscall_entry:
+	pushf				/* save flags as soon as possible */
+syscall_entry_2:
+	cld				/* clear direction flag */
+
+	pushq	%rax			/* save system call number */
+	pushq	$0			/* clear trap number slot */
+
+	pusha				/* save the general registers */
+	PUSH_SEGMENTS(%rdx)		/* and the segment registers */
+	SET_KERNEL_SEGMENTS(%rdx)	/* switch to kernel data segment */
+
+/*
+ * Shuffle eflags,eip,cs into proper places
+ */
+
+	movq	R_EIP(%rsp),%rbx	/* eflags are in EIP slot */
+	movq	R_CS(%rsp),%rcx		/* eip is in CS slot */
+	movq	R_EFLAGS(%rsp),%rdx	/* cs is in EFLAGS slot */
+	movq	%rcx,R_EIP(%rsp)	/* fix eip */
+	movq	%rdx,R_CS(%rsp)		/* fix cs */
+	movq	%rbx,R_EFLAGS(%rsp)	/* fix eflags */
+
+	CPU_NUMBER_NO_STACK(%edx)
+	TIME_TRAP_SENTRY
+
+	movq	CX(EXT(kernel_stack),%rdx),%rbx
+					/* get current kernel stack */
+	xchgq	%rbx,%rsp		/* switch stacks - %ebx points to */
+					/* user registers. */
+					/* user regs pointer already set */
+
+/*
+ * Check for MACH or emulated system call
+ */
+syscall_entry_3:
+	movq	MY(ACTIVE_THREAD),%rdx
+					/* point to current thread */
+	movq	TH_TASK(%rdx),%rdx	/* point to task */
+	movq	TASK_EMUL(%rdx),%rdx	/* get emulation vector */
+	orq	%rdx,%rdx		/* if none, */
+	je	syscall_native		/*    do native system call */
+	movq	%rax,%rcx		/* copy system call number */
+	subq	DISP_MIN(%rdx),%rcx	/* get displacement into syscall */
+					/* vector table */
+	jl	syscall_native		/* too low - native system call */
+	cmpq	DISP_COUNT(%rdx),%rcx	/* check range */
+	jnl	syscall_native		/* too high - native system call */
+	movq	DISP_VECTOR(%rdx,%rcx,4),%rdx
+					/* get the emulation vector */
+	orq	%rdx,%rdx		/* emulated system call if not zero */
+	jnz	syscall_emul
+
+/*
+ * Native system call.
+ */
+syscall_native:
+	negl	%eax			/* get system call number */
+	jl	mach_call_range		/* out of range if it was positive */
+	cmpl	EXT(mach_trap_count),%eax	/* check system call table bounds */
+	jg	mach_call_range		/* error if out of range */
+#if 0 /* debug hack to show the syscall number on the screen */
+	movb	%al,%dl
+	shrb	$4,%dl
+	orb	$0x30,%dl
+	movb	$0x0f,%dh
+	movw	%dx,0xb800a
+	movb	%al,%dl
+	andb	$0xf,%dl
+	orb	$0x30,%dl
+	movb	$0xf,%dh
+	movw	%dx,0xb800c
+#endif
+	shll	$5,%eax			/* manual indexing of mach_trap_t */
+	xorq	%r10,%r10
+	mov	EXT(mach_trap_table)(%rax),%r10
+					/* get number of arguments */
+	andq	%r10,%r10
+	jz	mach_call_call		/* skip argument copy if none */
+
+	movq	$USER_DS,%rdx		/* use user data segment for accesses */
+	mov	%dx,%fs
+	movq	%rsp,%r11		/* save kernel ESP for error recovery */
+
+	movq	R_UESP(%rbx),%rbp	/* get user stack pointer */
+	addq	$4,%rbp			/* Skip user return address */
+
+#define PARAM(reg,ereg) \
+	xorq	%reg,%reg		;\
+	RECOVER(mach_call_addr_push) \
+	movl	%fs:(%rbp),%ereg	/* 1st parameter */ ;\
+	addq	$4,%rbp			;\
+	dec	%r10			;\
+	jz	mach_call_call
+
+	PARAM(rdi,edi)			/* 1st parameter */
+	PARAM(rsi,esi)			/* 2nd parameter */
+	PARAM(rdx,edx)			/* 3rd parameter */
+	PARAM(rcx,ecx)			/* 4th parameter */
+	PARAM(r8,r8d)			/* 5th parameter */
+	PARAM(r9,r9d)			/* 6th parameter */
+
+	lea	(%rbp,%r10,4),%rbp	/* point past last argument */
+	xorq	%r12,%r12
+
+0:	subq	$4,%rbp
+	RECOVER(mach_call_addr_push)
+	movl	%fs:(%rbp),%r12d
+	pushq	%r12			/* push argument on stack */
+	dec	%r10
+	jnz	0b			/* loop for all arguments */
+
+mach_call_call:
+
+#ifdef DEBUG
+	testb	$0xff,EXT(syscall_trace)
+	jz	0f
+	movq	%rax,%rdi
+	call	EXT(syscall_trace_print)
+	/* will return with syscallofs still (or again) in eax */
+0:
+#endif /* DEBUG */
+	call	*EXT(mach_trap_table)+8(%rax)  /* call procedure */
+	movq	%rsp,%rcx		/* get kernel stack */
+	or	$(KERNEL_STACK_SIZE-1),%rcx
+	movq	-7-IKS_SIZE(%rcx),%rsp	/* switch back to PCB stack */
+	movq	%rax,R_EAX(%rsp)	/* save return value */
+	jmp	_return_from_trap	/* return to user */
+
+/*
+ * Address out of range.  Change to page fault.
+ * %rbp holds failing address.
+ */
+mach_call_addr_push:
+	movq	%r11,%rsp		/* clean parameters from stack */
+mach_call_addr:
+	movq	%rbp,R_CR2(%rbx)	/* set fault address */
+	movq	$(T_PAGE_FAULT),R_TRAPNO(%rbx)
+					/* set page-fault trap */
+	movq	$(T_PF_USER),R_ERR(%rbx)
+					/* set error code - read user space */
+	jmp	_take_trap		/* treat as a trap */
+
+/*
+ * System call out of range.  Treat as invalid-instruction trap.
+ * (? general protection?)
+ */
+mach_call_range:
+	movq	$(T_INVALID_OPCODE),R_TRAPNO(%rbx)
+					/* set invalid-operation trap */
+	movq	$0,R_ERR(%rbx)		/* clear error code */
+	jmp	_take_trap		/* treat as a trap */
+
+/*
+ * User space emulation of system calls.
+ * edx - user address to handle syscall
+ *
+ * User stack will become:
+ * ursp->	eflags
+ *		eip
+ * eax still contains syscall number.
+ */
+syscall_emul:
+	movq	$USER_DS,%rdi		/* use user data segment for accesses */
+	mov	%di,%fs
+
+/* XXX what about write-protected pages? */
+	movq	R_UESP(%rbx),%rdi	/* get user stack pointer */
+	subq	$16,%rdi		/* push space for new arguments */
+	movq	R_EFLAGS(%rbx),%rax	/* move flags */
+	RECOVER(syscall_addr)
+	movl	%eax,%fs:0(%rdi)	/* to user stack */
+	movl	R_EIP(%rbx),%eax	/* move eip */
+	RECOVER(syscall_addr)
+	movl	%eax,%fs:4(%rdi)	/* to user stack */
+	movq	%rdi,R_UESP(%rbx)	/* set new user stack pointer */
+	movq	%rdx,R_EIP(%rbx)	/* change return address to trap */
+	movq	%rbx,%rsp		/* back to PCB stack */
+// TODO: test it before dropping ud2
+ud2
+	jmp	_return_from_trap	/* return to user */
+
+/*
+ * Address error - address is in %edi.
+ */
+syscall_addr:
+	movq	%rdi,R_CR2(%rbx)	/* set fault address */
+	movq	$(T_PAGE_FAULT),R_TRAPNO(%rbx)
+	        			/* set page-fault trap */
+	movq	$(T_PF_USER),R_ERR(%rbx)
+					/* set error code - read user space */
+	jmp	_take_trap		/* treat as a trap */
+END(syscall)
+
+#else /* USER32 */
+
+/* Entry point for 64-bit syscalls.
+ * On entry we're still on the user stack, so better not use it. Instead we
+ * save the thread state immediately in thread->pcb->iss, then try to invoke
+ * the syscall.
+ * Note: emulated syscalls seem to not be used anymore in GNU/Hurd, so they
+ * are not handled here.
+ * TODO:
+     - for now we assume the return address is canonical, but apparently there
+       can be cases where it's not (see how Linux handles this). Does it apply
+       here?
+     - check that the case where a task is suspended, and later returns via
+       iretq from return_from_trap, works fine in all combinations
+ */
+ENTRY(syscall64)
+	/* RFLAGS[32:63] are reserved, so combine syscall num (32 bit) and
+	 * eflags in RAX to allow using r11 as temporary register
+	 */
+	shlq	$32,%r11
+	shlq	$32,%rax	/* make sure bits 32:63 of %rax are zero */
+	shrq	$32,%rax
+	or	%r11,%rax
+
+	/* Save thread state in pcb->iss, as on exception entry.
+	 * Since this is triggered synchronously from userspace, we could
+	 * save only the callee-preserved status according to the C ABI,
+	 * plus RIP and EFLAGS for sysret
+	 */
+	movq	MY(ACTIVE_THREAD),%r11	/* point to current thread */
+	movq	TH_PCB(%r11),%r11		/* point to pcb */
+	addq	$ PCB_ISS,%r11			/* point to saved state */
+
+	mov	%rsp,R_UESP(%r11)	/* callee-preserved register */
+	mov	%rcx,R_EIP(%r11)	/* syscall places user RIP in RCX */
+	mov	%rbx,R_EBX(%r11)	/* callee-preserved register */
+	mov	%rax,%rbx		/* Now we can unpack eflags again */
+	shr	$32,%rbx
+	mov	%rbx,R_EFLAGS(%r11)	/* ... and save them in pcb as well */
+	mov	%rbp,R_EBP(%r11)	/* callee-preserved register */
+	mov	%r12,R_R12(%r11)	/* callee-preserved register */
+	mov	%r13,R_R13(%r11)	/* callee-preserved register */
+	mov	%r14,R_R14(%r11)	/* callee-preserved register */
+	mov	%r15,R_R15(%r11)	/* callee-preserved register */
+
+	/* Save syscall number and args for SYSCALL_EXAMINE/MSG_EXAMINE in glibc.
+	 * Note: syscall number is only 32 bit, in EAX, so we sign-extend it in
+	 * RAX to mask the EFLAGS bits.
+	 */
+	cdqe				/* sign-extend EAX in RAX */
+	mov	%rax,R_EAX(%r11)	/* syscall number */
+	mov	%rdi,R_EDI(%r11)	/* syscall arg0 */
+	mov	%rsi,R_ESI(%r11)	/* syscall arg1 */
+	mov	%rdx,R_EDX(%r11)	/* syscall arg2 */
+	mov	%r10,R_R10(%r11)	/* syscall arg3 */
+	mov	%r8,R_R8(%r11)		/* syscall arg4 */
+	mov	%r9,R_R9(%r11)		/* syscall arg5 */
+
+	mov	%r11,%rbx		/* prepare for error handling */
+	mov	%r10,%rcx		/* fix arg3 location according to C ABI */
+
+	/* switch to kernel stack, then we can enable interrupts */
+	CPU_NUMBER_NO_STACK(%r11d)
+	movq	CX(EXT(kernel_stack),%r11),%rsp
+	sti
+
+	/* Now we have saved state and args 1-6 are in place.
+	 * Before invoking the syscall we do some bound checking and,
+	 * if we have more that 6 arguments, we need to copy the
+	 * remaining ones to the kernel stack, handling page faults when
+	 * accessing the user stack.
+	 */
+	negl	%eax			/* get system call number */
+	jl	_syscall64_range	/* out of range if it was positive */
+	cmpl	EXT(mach_trap_count),%eax	/* check system call table bounds */
+	jg	_syscall64_range	/* error if out of range */
+	shll	$5,%eax			/* manual indexing of mach_trap_t */
+
+	/* check if we need to place some arguments on the stack */
+_syscall64_args_stack:
+	mov	EXT(mach_trap_table)(%rax),%r10	/* get number of arguments */
+	subq	$6,%r10			/* the first 6 args are already in place */
+	jle	_syscall64_call		/* skip argument copy if num args <= 6 */
+
+	movq	R_UESP(%rbx),%r11	/* get user stack pointer */
+	addq	$8,%r11			/* Skip user return address */
+
+	lea	(%r11,%r10,8),%r11	/* point past last argument */
+
+0:	subq	$8,%r11
+	RECOVER(_syscall64_addr_push)
+	mov	(%r11),%r12
+	pushq	%r12			/* push argument on stack */
+	dec	%r10
+	jnz	0b			/* loop for all remaining arguments */
+
+_syscall64_call:
+	call	*EXT(mach_trap_table)+8(%rax)  /* call procedure */
+
+_syscall64_check_for_ast:
+	/* Check for ast. */
+	CPU_NUMBER_NO_GS(%r11d)
+	cmpl	$0,CX(EXT(need_ast),%r11)
+	jz	_syscall64_restore_state
+
+	/* Save the syscall return value, both on our stack, for the case
+	 * i386_astintr returns normally, and in the PCB stack, in case it
+	 * instead calls thread_block(thread_exception_return).
+	 */
+	pushq	%rax				/* save the return value on our stack */
+	pushq	$0				/* dummy value to keep the stack aligned */
+
+	/* Find the PCB stack. */
+	movq	%rsp,%rcx
+	or	$(KERNEL_STACK_SIZE-1),%rcx
+	movq	-7-IKS_SIZE(%rcx),%rcx
+
+	movq	%rax,R_EAX(%rcx)		/* save the return value in the PCB stack */
+	call	EXT(i386_astintr)
+	popq	%rax
+	popq	%rax				/* restore the return value */
+	jmp	_syscall64_check_for_ast	/* check again */
+
+_syscall64_restore_state:
+	/* Restore thread state and return to user using sysret. */
+	cli	/* block interrupts when using the user stack in kernel space */
+	movq	MY(ACTIVE_THREAD),%r11	/* point to current thread */
+	movq	TH_PCB(%r11),%r11		/* point to pcb */
+	addq	$ PCB_ISS,%r11			/* point to saved state */
+
+	/* Restore syscall args. Note: we can't restore the syscall number in
+	 * RAX because it needs to hold the return value.*/
+	mov	R_EDI(%r11),%rdi	/* syscall arg0 */
+	mov	R_ESI(%r11),%rsi	/* syscall arg1 */
+	mov	R_EDX(%r11),%rdx	/* syscall arg2 */
+	mov	R_R10(%r11),%r10	/* syscall arg3 */
+	mov	R_R8(%r11),%r8		/* syscall arg4 */
+	mov	R_R9(%r11),%r9		/* syscall arg5 */
+
+	mov	R_UESP(%r11),%rsp	/* callee-preserved register,
+					 * also switch back to user stack */
+	mov	R_EIP(%r11),%rcx	/* sysret convention */
+	mov	R_EBX(%r11),%rbx	/* callee-preserved register */
+	mov	R_EBP(%r11),%rbp	/* callee-preserved register */
+	mov	R_R12(%r11),%r12	/* callee-preserved register */
+	mov	R_R13(%r11),%r13	/* callee-preserved register */
+	mov	R_R14(%r11),%r14	/* callee-preserved register */
+	mov	R_R15(%r11),%r15	/* callee-preserved register */
+	mov	R_EFLAGS(%r11),%r11	/* sysret convention */
+
+	sysretq		/* fast return to user-space, the thread didn't block */
+
+/* Error handling fragments, from here we jump directly to the trap handler */
+_syscall64_addr_push:
+	movq	%r11,R_CR2(%rbx)	/* set fault address */
+	movq	$(T_PAGE_FAULT),R_TRAPNO(%rbx)	/* set page-fault trap */
+	movq	$(T_PF_USER),R_ERR(%rbx) /* set error code - read user space */
+	jmp	_take_trap		/* treat as a trap */
+
+_syscall64_range:
+	movq	$(T_INVALID_OPCODE),R_TRAPNO(%rbx)
+					/* set invalid-operation trap */
+	movq	$0,R_ERR(%rbx)		/* clear error code */
+	jmp	_take_trap		/* treat as a trap */
+
+END(syscall64)
+#endif /* USER32 */
+
+	.data
+DATA(cpu_features)
+DATA(cpu_features_edx)
+	.long	0
+DATA(cpu_features_ecx)
+	.long	0
+	.text
+
+/* Discover what kind of cpu we have; return the family number
+   (3, 4, 5, 6, for 386, 486, 586, 686 respectively).  */
+ENTRY(discover_x86_cpu_type)
+	/* We are a modern enough processor to have the CPUID instruction;
+	   use it to find out what we are. */
+	movl	$1,%eax			/* Fetch CPU type info ... */
+	cpuid				/*  ... into eax */
+	movl	%ecx,cpu_features_ecx	/* Keep a copy */
+	movl	%edx,cpu_features_edx	/* Keep a copy */
+	shrl	$8,%eax			/* Slide family bits down */
+	andl	$15,%eax		/* And select them */
+	ret				/* And return */
+
+
+/**/
+/*
+ * Utility routines.
+ */
+
+ENTRY(copyin)
+	xchgq   %rsi,%rdi		/* Get user source and kernel destination */
+
+copyin_remainder:
+	/*cld*/				/* count up: default mode in all GCC code */
+	movq	%rdx,%rcx		/* move by longwords first */
+	shrq	$3,%rcx
+	RECOVER(copyin_fail)
+	rep
+	movsq				/* move longwords */
+	movq	%rdx,%rcx		/* now move remaining bytes */
+	andq	$7,%rcx
+	RECOVER(copyin_fail)
+	rep
+	movsb
+	xorq	%rax,%rax		/* return 0 for success */
+
+copyin_ret:
+	ret				/* and return */
+
+copyin_fail:
+	movq	$1,%rax			/* return 1 for failure */
+	jmp	copyin_ret		/* pop frame and return */
+
+bogus:
+	ud2
+
+ENTRY(copyout)
+	xchgq   %rsi,%rdi		/* Get user source and kernel destination */
+
+copyout_remainder:
+	movq	%rdx,%rax		/* use count */
+	/*cld*/				/* count up: always this way in GCC code */
+	movq	%rax,%rcx		/* move by longwords first */
+	shrq	$3,%rcx
+	RECOVER(copyout_fail)
+	rep
+	movsq
+	movq	%rax,%rcx		/* now move remaining bytes */
+	andq	$7,%rcx
+	RECOVER(copyout_fail)
+	rep
+	movsb				/* move */
+	xorq	%rax,%rax		/* return 0 for success */
+
+copyout_ret:
+	ret				/* and return */
+
+copyout_fail:
+	movq	$1,%rax			/* return 1 for failure */
+	jmp	copyout_ret		/* pop frame and return */
+
+/*
+ * int inst_fetch(int eip, int cs);
+ *
+ * Fetch instruction byte.  Return -1 if invalid address.
+ */
+ENTRY(inst_fetch)
+	movq	S_ARG1, %rax		/* get segment */
+	movw	%ax,%fs			/* into FS */
+	movq	S_ARG0, %rax		/* get offset */
+	RETRY(EXT(inst_fetch))		/* re-load FS on retry */
+	RECOVER(_inst_fetch_fault)
+	movzbq	%fs:(%rax),%rax		/* load instruction byte */
+	ret
+
+_inst_fetch_fault:
+	movq	$-1,%rax		/* return -1 if error */
+	ret
+
+
+/*
+ * Done with recovery and retry tables.
+ */
+	RECOVER_TABLE_END
+	RETRY_TABLE_END
+
+
+
+/*
+ * cpu_shutdown()
+ * Force reboot
+ */
+null_idt:
+	.space	8 * 32
+
+null_idtr:
+        .word   8 * 32 - 1
+        .quad   null_idt
+
+Entry(cpu_shutdown)
+        lidt    null_idtr       /* disable the interrupt handler */
+        xor     %rcx,%rcx       /* generate a divide by zero */
+        div     %rcx,%rax       /* reboot now */
+        ret                     /* this will "never" be executed */