Index: head/sys/amd64/amd64/bpf_jit_machdep.c
===================================================================
--- head/sys/amd64/amd64/bpf_jit_machdep.c	(revision 179966)
+++ head/sys/amd64/amd64/bpf_jit_machdep.c	(revision 179967)
@@ -1,494 +1,494 @@
 /*-
  * Copyright (c) 2002 - 2003 NetGroup, Politecnico di Torino (Italy)
  * Copyright (c) 2005 Jung-uk Kim <jkim@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  * notice, this list of conditions and the following disclaimer in the
  * documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the Politecnico di Torino nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS intERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_bpf.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/malloc.h>
 
 #include <net/if.h>
 #include <net/bpf.h>
 #include <net/bpf_jitter.h>
 
 #include <amd64/amd64/bpf_jit_machdep.h>
 
 bpf_filter_func	bpf_jit_compile(struct bpf_insn *, u_int, int *);
 
 /*
  * emit routine to update the jump table
  */
 static void
 emit_length(bpf_bin_stream *stream, u_int value, u_int len)
 {
 
 	(stream->refs)[stream->bpf_pc] += len;
 	stream->cur_ip += len;
 }
 
 /*
  * emit routine to output the actual binary code
  */
 static void
 emit_code(bpf_bin_stream *stream, u_int value, u_int len)
 {
 
 	switch (len) {
 	case 1:
 		stream->ibuf[stream->cur_ip] = (u_char)value;
 		stream->cur_ip++;
 		break;
 
 	case 2:
 		*((u_short *)(stream->ibuf + stream->cur_ip)) = (u_short)value;
 		stream->cur_ip += 2;
 		break;
 
 	case 4:
 		*((u_int *)(stream->ibuf + stream->cur_ip)) = value;
 		stream->cur_ip += 4;
 		break;
 	}
 
 	return;
 }
 
 /*
  * Function that does the real stuff
  */
 bpf_filter_func
 bpf_jit_compile(struct bpf_insn *prog, u_int nins, int *mem)
 {
 	struct bpf_insn *ins;
 	u_int i, pass;
 	bpf_bin_stream stream;
 
 	/*
 	 * NOTE: do not modify the name of this variable, as it's used by
 	 * the macros to emit code.
 	 */
 	emit_func emitm;
 
 	/* Do not compile an empty filter. */
 	if (nins == 0)
 		return NULL;
 
 	/* Allocate the reference table for the jumps */
 	stream.refs = (u_int *)malloc((nins + 1) * sizeof(u_int),
 	    M_BPFJIT, M_NOWAIT);
 	if (stream.refs == NULL)
 		return NULL;
 
 	/* Reset the reference table */
 	for (i = 0; i < nins + 1; i++)
 		stream.refs[i] = 0;
 
 	stream.cur_ip = 0;
 	stream.bpf_pc = 0;
 
 	/*
 	 * the first pass will emit the lengths of the instructions
 	 * to create the reference table
 	 */
 	emitm = emit_length;
 
 	pass = 0;
 	for (;;) {
 		ins = prog;
 
 		/* create the procedure header */
 		PUSH(RBP);
-		MOVrq(RBP, RSP);
-		MOVoqd(RBP, -8, ESI);
-		MOVoqd(RBP, -12, EDX);
+		MOVrq(RSP, RBP);
+		MOVdoq(ESI, -8, RBP);
+		MOVdoq(EDX, -12, RBP);
 		PUSH(RBX);
-		MOVrq(RBX, RDI);
+		MOVrq(RDI, RBX);
 
 		for (i = 0; i < nins; i++) {
 			stream.bpf_pc++;
 
 			switch (ins->code) {
 			default:
 				return NULL;
 
 			case BPF_RET|BPF_K:
-				MOVid(EAX, ins->k);
+				MOVid(ins->k, EAX);
 				POP(RBX);
 				LEAVE_RET();
 				break;
 
 			case BPF_RET|BPF_A:
 				POP(RBX);
 				LEAVE_RET();
 				break;
 
 			case BPF_LD|BPF_W|BPF_ABS:
-				MOVid(ECX, ins->k);
-				MOVrd(ESI, ECX);
-				ADDib(ECX, sizeof(int));
-				CMPodd(ECX, RBP, -12);
+				MOVid(ins->k, ECX);
+				MOVrd(ECX, ESI);
+				ADDib(sizeof(int), ECX);
+				CMPoqd(-12, RBP, ECX);
 				JLEb(5);
 				ZERO_EAX();
 				POP(RBX);
 				LEAVE_RET();
-				MOVobd(EAX, RBX, RSI);
+				MOVobd(RBX, RSI, EAX);
 				BSWAP(EAX);
 				break;
 
 			case BPF_LD|BPF_H|BPF_ABS:
 				ZERO_EAX();
-				MOVid(ECX, ins->k);
-				MOVrd(ESI, ECX);
-				ADDib(ECX, sizeof(short));
-				CMPodd(ECX, RBP, -12);
+				MOVid(ins->k, ECX);
+				MOVrd(ECX, ESI);
+				ADDib(sizeof(short), ECX);
+				CMPoqd(-12, RBP, ECX);
 				JLEb(3);
 				POP(RBX);
 				LEAVE_RET();
-				MOVobw(AX, RBX, RSI);
+				MOVobw(RBX, RSI, AX);
 				SWAP_AX();
 				break;
 
 			case BPF_LD|BPF_B|BPF_ABS:
 				ZERO_EAX();
-				MOVid(ECX, ins->k);
-				CMPodd(ECX, RBP, -12);
+				MOVid(ins->k, ECX);
+				CMPoqd(-12, RBP, ECX);
 				JLEb(3);
 				POP(RBX);
 				LEAVE_RET();
-				MOVobb(AL, RBX, RCX);
+				MOVobb(RBX, RCX, AL);
 				break;
 
 			case BPF_LD|BPF_W|BPF_LEN:
-				MOVodd(EAX, RBP, -8);
+				MOVoqd(-8, RBP, EAX);
 				break;
 
 			case BPF_LDX|BPF_W|BPF_LEN:
-				MOVodd(EDX, RBP, -8);
+				MOVoqd(-8, RBP, EDX);
 				break;
 
 			case BPF_LD|BPF_W|BPF_IND:
-				MOVid(ECX, ins->k);
-				ADDrd(ECX, EDX);
-				MOVrd(ESI, ECX);
-				ADDib(ECX, sizeof(int));
-				CMPodd(ECX, RBP, -12);
+				MOVid(ins->k, ECX);
+				ADDrd(EDX, ECX);
+				MOVrd(ECX, ESI);
+				ADDib(sizeof(int), ECX);
+				CMPoqd(-12, RBP, ECX);
 				JLEb(5);
 				ZERO_EAX();
 				POP(RBX);
 				LEAVE_RET();
-				MOVobd(EAX, RBX, RSI);
+				MOVobd(RBX, RSI, EAX);
 				BSWAP(EAX);
 				break;
 
 			case BPF_LD|BPF_H|BPF_IND:
 				ZERO_EAX();
-				MOVid(ECX, ins->k);
-				ADDrd(ECX, EDX);
-				MOVrd(ESI, ECX);
-				ADDib(ECX, sizeof(short));
-				CMPodd(ECX, RBP, -12);
+				MOVid(ins->k, ECX);
+				ADDrd(EDX, ECX);
+				MOVrd(ECX, ESI);
+				ADDib(sizeof(short), ECX);
+				CMPoqd(-12, RBP, ECX);
 				JLEb(3);
 				POP(RBX);
 				LEAVE_RET();
-				MOVobw(AX, RBX, RSI);
+				MOVobw(RBX, RSI, AX);
 				SWAP_AX();
 				break;
 
 			case BPF_LD|BPF_B|BPF_IND:
 				ZERO_EAX();
-				MOVid(ECX, ins->k);
-				ADDrd(ECX, EDX);
-				CMPodd(ECX, RBP, -12);
+				MOVid(ins->k, ECX);
+				ADDrd(EDX, ECX);
+				CMPoqd(-12, RBP, ECX);
 				JLEb(3);
 				POP(RBX);
 				LEAVE_RET();
-				MOVobb(AL, RBX, RCX);
+				MOVobb(RBX, RCX, AL);
 				break;
 
 			case BPF_LDX|BPF_MSH|BPF_B:
-				MOVid(ECX, ins->k);
-				CMPodd(ECX, RBP, -12);
+				MOVid(ins->k, ECX);
+				CMPoqd(-12, RBP, ECX);
 				JLEb(5);
 				ZERO_EAX();
 				POP(RBX);
 				LEAVE_RET();
 				ZERO_EDX();
-				MOVobb(DL, RBX, RCX);
-				ANDib(DL, 0xf);
-				SHLib(EDX, 2);
+				MOVobb(RBX, RCX, DL);
+				ANDib(0xf, DL);
+				SHLib(2, EDX);
 				break;
 
 			case BPF_LD|BPF_IMM:
-				MOVid(EAX, ins->k);
+				MOVid(ins->k, EAX);
 				break;
 
 			case BPF_LDX|BPF_IMM:
-				MOVid(EDX, ins->k);
+				MOVid(ins->k, EDX);
 				break;
 
 			case BPF_LD|BPF_MEM:
-				MOViq(RCX, (uintptr_t)mem);
-				MOVid(ESI, ins->k * 4);
-				MOVobd(EAX, RCX, RSI);
+				MOViq((uintptr_t)mem, RCX);
+				MOVid(ins->k * 4, ESI);
+				MOVobd(RCX, RSI, EAX);
 				break;
 
 			case BPF_LDX|BPF_MEM:
-				MOViq(RCX, (uintptr_t)mem);
-				MOVid(ESI, ins->k * 4);
-				MOVobd(EDX, RCX, RSI);
+				MOViq((uintptr_t)mem, RCX);
+				MOVid(ins->k * 4, ESI);
+				MOVobd(RCX, RSI, EDX);
 				break;
 
 			case BPF_ST:
 				/*
 				 * XXX this command and the following could
 				 * be optimized if the previous instruction
 				 * was already of this type
 				 */
-				MOViq(RCX, (uintptr_t)mem);
-				MOVid(ESI, ins->k * 4);
-				MOVomd(RCX, RSI, EAX);
+				MOViq((uintptr_t)mem, RCX);
+				MOVid(ins->k * 4, ESI);
+				MOVomd(EAX, RCX, RSI);
 				break;
 
 			case BPF_STX:
-				MOViq(RCX, (uintptr_t)mem);
-				MOVid(ESI, ins->k * 4);
-				MOVomd(RCX, RSI, EDX);
+				MOViq((uintptr_t)mem, RCX);
+				MOVid(ins->k * 4, ESI);
+				MOVomd(EDX, RCX, RSI);
 				break;
 
 			case BPF_JMP|BPF_JA:
 				JMP(stream.refs[stream.bpf_pc + ins->k] -
 				    stream.refs[stream.bpf_pc]);
 				break;
 
 			case BPF_JMP|BPF_JGT|BPF_K:
-				CMPid(EAX, ins->k);
+				CMPid(ins->k, EAX);
 				/* 5 is the size of the following JMP */
 				JG(stream.refs[stream.bpf_pc + ins->jt] -
 				    stream.refs[stream.bpf_pc] + 5 );
 				JMP(stream.refs[stream.bpf_pc + ins->jf] -
 				    stream.refs[stream.bpf_pc]);
 				break;
 
 			case BPF_JMP|BPF_JGE|BPF_K:
-				CMPid(EAX, ins->k);
+				CMPid(ins->k, EAX);
 				JGE(stream.refs[stream.bpf_pc + ins->jt] -
 				    stream.refs[stream.bpf_pc] + 5);
 				JMP(stream.refs[stream.bpf_pc + ins->jf] -
 				    stream.refs[stream.bpf_pc]);
 				break;
 
 			case BPF_JMP|BPF_JEQ|BPF_K:
-				CMPid(EAX, ins->k);
+				CMPid(ins->k, EAX);
 				JE(stream.refs[stream.bpf_pc + ins->jt] -
 				    stream.refs[stream.bpf_pc] + 5);
 				JMP(stream.refs[stream.bpf_pc + ins->jf] -
 				    stream.refs[stream.bpf_pc]);
 				break;
 
 			case BPF_JMP|BPF_JSET|BPF_K:
-				MOVrd(ECX, EAX);
-				ANDid(ECX, ins->k);
+				MOVrd(EAX, ECX);
+				ANDid(ins->k, ECX);
 				JE(stream.refs[stream.bpf_pc + ins->jf] -
 				    stream.refs[stream.bpf_pc] + 5);
 				JMP(stream.refs[stream.bpf_pc + ins->jt] -
 				    stream.refs[stream.bpf_pc]);
 				break;
 
 			case BPF_JMP|BPF_JGT|BPF_X:
-				CMPrd(EAX, EDX);
+				CMPrd(EDX, EAX);
 				JA(stream.refs[stream.bpf_pc + ins->jt] -
 				    stream.refs[stream.bpf_pc] + 5);
 				JMP(stream.refs[stream.bpf_pc + ins->jf] -
 				    stream.refs[stream.bpf_pc]);
 				break;
 
 			case BPF_JMP|BPF_JGE|BPF_X:
-				CMPrd(EAX, EDX);
+				CMPrd(EDX, EAX);
 				JAE(stream.refs[stream.bpf_pc + ins->jt] -
 				    stream.refs[stream.bpf_pc] + 5);
 				JMP(stream.refs[stream.bpf_pc + ins->jf] -
 				    stream.refs[stream.bpf_pc]);
 				break;
 
 			case BPF_JMP|BPF_JEQ|BPF_X:
-				CMPrd(EAX, EDX);
+				CMPrd(EDX, EAX);
 				JE(stream.refs[stream.bpf_pc + ins->jt] -
 				    stream.refs[stream.bpf_pc] + 5);
 				JMP(stream.refs[stream.bpf_pc + ins->jf] -
 				    stream.refs[stream.bpf_pc]);
 				break;
 
 			case BPF_JMP|BPF_JSET|BPF_X:
-				MOVrd(ECX, EAX);
-				ANDrd(ECX, EDX);
+				MOVrd(EAX, ECX);
+				ANDrd(EDX, ECX);
 				JE(stream.refs[stream.bpf_pc + ins->jf] -
 				    stream.refs[stream.bpf_pc] + 5);
 				JMP(stream.refs[stream.bpf_pc + ins->jt] -
 				    stream.refs[stream.bpf_pc]);
 				break;
 
 			case BPF_ALU|BPF_ADD|BPF_X:
-				ADDrd(EAX, EDX);
+				ADDrd(EDX, EAX);
 				break;
 
 			case BPF_ALU|BPF_SUB|BPF_X:
-				SUBrd(EAX, EDX);
+				SUBrd(EDX, EAX);
 				break;
 
 			case BPF_ALU|BPF_MUL|BPF_X:
-				MOVrd(ECX, EDX);
-				MULrd(EDX);
 				MOVrd(EDX, ECX);
+				MULrd(EDX);
+				MOVrd(ECX, EDX);
 				break;
 
 			case BPF_ALU|BPF_DIV|BPF_X:
-				CMPid(EDX, 0);
+				CMPid(0, EDX);
 				JNEb(5);
 				ZERO_EAX();
 				POP(RBX);
 				LEAVE_RET();
-				MOVrd(ECX, EDX);
+				MOVrd(EDX, ECX);
 				ZERO_EDX();
 				DIVrd(ECX);
-				MOVrd(EDX, ECX);
+				MOVrd(ECX, EDX);
 				break;
 
 			case BPF_ALU|BPF_AND|BPF_X:
-				ANDrd(EAX, EDX);
+				ANDrd(EDX, EAX);
 				break;
 
 			case BPF_ALU|BPF_OR|BPF_X:
-				ORrd(EAX, EDX);
+				ORrd(EDX, EAX);
 				break;
 
 			case BPF_ALU|BPF_LSH|BPF_X:
-				MOVrd(ECX, EDX);
+				MOVrd(EDX, ECX);
 				SHL_CLrb(EAX);
 				break;
 
 			case BPF_ALU|BPF_RSH|BPF_X:
-				MOVrd(ECX, EDX);
+				MOVrd(EDX, ECX);
 				SHR_CLrb(EAX);
 				break;
 
 			case BPF_ALU|BPF_ADD|BPF_K:
 				ADD_EAXi(ins->k);
 				break;
 
 			case BPF_ALU|BPF_SUB|BPF_K:
 				SUB_EAXi(ins->k);
 				break;
 
 			case BPF_ALU|BPF_MUL|BPF_K:
-				MOVrd(ECX, EDX);
-				MOVid(EDX, ins->k);
-				MULrd(EDX);
 				MOVrd(EDX, ECX);
+				MOVid(ins->k, EDX);
+				MULrd(EDX);
+				MOVrd(ECX, EDX);
 				break;
 
 			case BPF_ALU|BPF_DIV|BPF_K:
-				MOVrd(ECX, EDX);
+				MOVrd(EDX, ECX);
 				ZERO_EDX();
-				MOVid(ESI, ins->k);
+				MOVid(ins->k, ESI);
 				DIVrd(ESI);
-				MOVrd(EDX, ECX);
+				MOVrd(ECX, EDX);
 				break;
 
 			case BPF_ALU|BPF_AND|BPF_K:
-				ANDid(EAX, ins->k);
+				ANDid(ins->k, EAX);
 				break;
 
 			case BPF_ALU|BPF_OR|BPF_K:
-				ORid(EAX, ins->k);
+				ORid(ins->k, EAX);
 				break;
 
 			case BPF_ALU|BPF_LSH|BPF_K:
-				SHLib(EAX, (ins->k) & 255);
+				SHLib((ins->k) & 0xff, EAX);
 				break;
 
 			case BPF_ALU|BPF_RSH|BPF_K:
-				SHRib(EAX, (ins->k) & 255);
+				SHRib((ins->k) & 0xff, EAX);
 				break;
 
 			case BPF_ALU|BPF_NEG:
 				NEGd(EAX);
 				break;
 
 			case BPF_MISC|BPF_TAX:
-				MOVrd(EDX, EAX);
+				MOVrd(EAX, EDX);
 				break;
 
 			case BPF_MISC|BPF_TXA:
-				MOVrd(EAX, EDX);
+				MOVrd(EDX, EAX);
 				break;
 			}
 			ins++;
 		}
 
 		pass++;
 		if (pass == 2)
 			break;
 
 		stream.ibuf = (char *)malloc(stream.cur_ip, M_BPFJIT, M_NOWAIT);
 		if (stream.ibuf == NULL) {
 			free(stream.refs, M_BPFJIT);
 			return NULL;
 		}
 
 		/*
 		 * modify the reference table to contain the offsets and
 		 * not the lengths of the instructions
 		 */
 		for (i = 1; i < nins + 1; i++)
 			stream.refs[i] += stream.refs[i - 1];
 
 		/* Reset the counters */
 		stream.cur_ip = 0;
 		stream.bpf_pc = 0;
 
 		/* the second pass creates the actual code */
 		emitm = emit_code;
 	}
 
 	/*
 	 * the reference table is needed only during compilation,
 	 * now we can free it
 	 */
 	free(stream.refs, M_BPFJIT);
 
 	return (bpf_filter_func)stream.ibuf;
 }
Index: head/sys/amd64/amd64/bpf_jit_machdep.h
===================================================================
--- head/sys/amd64/amd64/bpf_jit_machdep.h	(revision 179966)
+++ head/sys/amd64/amd64/bpf_jit_machdep.h	(revision 179967)
@@ -1,437 +1,437 @@
 /*-
  * Copyright (c) 2002 - 2003 NetGroup, Politecnico di Torino (Italy)
  * Copyright (c) 2005 Jung-uk Kim <jkim@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  * notice, this list of conditions and the following disclaimer in the
  * documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the Politecnico di Torino nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS intERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _BPF_JIT_MACHDEP_H_
 #define _BPF_JIT_MACHDEP_H_
 
 /*
  * Registers
  */
 #define RAX	0
 #define RCX	1
 #define RDX	2
 #define RBX	3
 #define RSP	4
 #define RBP	5
 #define RSI	6
 #define RDI	7
 
 #define EAX	0
 #define ECX	1
 #define EDX	2
 #define EBX	3
 #define ESP	4
 #define EBP	5
 #define ESI	6
 #define EDI	7
 
 #define AX	0
 #define CX	1
 #define DX	2
 #define BX	3
 #define SP	4
 #define BP	5
 #define SI	6
 #define DI	7
 
 #define AL	0
 #define CL	1
 #define DL	2
 #define BL	3
 
 /* A stream of native binary code.*/
 typedef struct bpf_bin_stream {
 	/* Current native instruction pointer. */
 	int		cur_ip;
 
 	/*
 	 * Current BPF instruction pointer, i.e. position in
 	 * the BPF program reached by the jitter.
 	 */
 	int		bpf_pc;
 
 	/* Instruction buffer, contains the generated native code. */
 	char		*ibuf;
 
 	/* Jumps reference table. */
 	u_int		*refs;
 } bpf_bin_stream;
 
 /*
  * Prototype of the emit functions.
  *
  * Different emit functions are used to create the reference table and
  * to generate the actual filtering code. This allows to have simpler
  * instruction macros.
  * The first parameter is the stream that will receive the data.
  * The second one is a variable containing the data.
  * The third one is the length, that can be 1, 2, or 4 since it is possible
  * to emit a byte, a short, or a word at a time.
  */
 typedef void (*emit_func)(bpf_bin_stream *stream, u_int value, u_int n);
 
 /*
  * native Instruction Macros
  */
 
-/* mov r32,i32 */
-#define MOVid(r32, i32) do {						\
+/* movl i32,r32 */
+#define MOVid(i32, r32) do {						\
 	emitm(&stream, (11 << 4) | (1 << 3) | (r32 & 0x7), 1);		\
 	emitm(&stream, i32, 4);						\
 } while (0)
 
-/* mov r64,i64 */
-#define MOViq(r64, i64) do {						\
+/* movq i64,r64 */
+#define MOViq(i64, r64) do {						\
 	emitm(&stream, 0x48, 1);					\
 	emitm(&stream, (11 << 4) | (1 << 3) | (r64 & 0x7), 1);		\
 	emitm(&stream, i64, 4);						\
 	emitm(&stream, (i64 >> 32), 4);					\
 } while (0)
 
-/* mov dr32,sr32 */
-#define MOVrd(dr32, sr32) do {						\
+/* movl sr32,dr32 */
+#define MOVrd(sr32, dr32) do {						\
 	emitm(&stream, (8 << 4) | 3 | (1 << 3), 1);			\
 	emitm(&stream,							\
 	    (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
 } while (0)
 
-/* mov dr64,sr64 */
-#define MOVrq(dr64, sr64) do {						\
+/* movq sr64,dr64 */
+#define MOVrq(sr64, dr64) do {						\
 	emitm(&stream, 0x48, 1);					\
 	emitm(&stream, (8 << 4) | 3 | (1 << 3), 1);			\
 	emitm(&stream,							\
 	    (3 << 6) | ((dr64 & 0x7) << 3) | (sr64 & 0x7), 1);		\
 } while (0)
 
-/* mov dr32,sr64[off] */
-#define MOVodd(dr32, sr64, off) do {					\
+/* movl off(sr64),dr32 */
+#define MOVoqd(off, sr64, dr32) do {					\
 	emitm(&stream, (8 << 4) | 3 | (1 << 3), 1);			\
 	emitm(&stream,							\
 	    (1 << 6) | ((dr32 & 0x7) << 3) | (sr64 & 0x7), 1);		\
 	emitm(&stream, off, 1);						\
 } while (0)
 
-/* mov dr64[off],sr32 */
-#define MOVoqd(dr64, off, sr32) do {					\
+/* movl sr32,off(dr64) */
+#define MOVdoq(sr32, off, dr64) do {					\
 	emitm(&stream, (8 << 4) | 1 | (1 << 3), 1);			\
 	emitm(&stream,							\
 	    (1 << 6) | ((sr32 & 0x7) << 3) | (dr64 & 0x7), 1);		\
 	emitm(&stream, off, 1);						\
 } while (0)
 
-/* mov dr32,sr64[or64] */
-#define MOVobd(dr32, sr64, or64) do {					\
+/* movl (sr64,or64,1),dr32 */
+#define MOVobd(sr64, or64, dr32) do {					\
 	emitm(&stream, (8 << 4) | 3 | (1 << 3), 1);			\
 	emitm(&stream, ((dr32 & 0x7) << 3) | 4, 1);			\
 	emitm(&stream, ((or64 & 0x7) << 3) | (sr64 & 0x7), 1);		\
 } while (0)
 
-/* mov dr16,sr64[or64] */
-#define MOVobw(dr32, sr64, or64) do {					\
+/* movw (sr64,or64,1),dr16 */
+#define MOVobw(sr64, or64, dr16) do {					\
 	emitm(&stream, 0x66, 1);					\
 	emitm(&stream, (8 << 4) | 3 | (1 << 3), 1);			\
-	emitm(&stream, ((dr32 & 0x7) << 3) | 4, 1);			\
+	emitm(&stream, ((dr16 & 0x7) << 3) | 4, 1);			\
 	emitm(&stream, ((or64 & 0x7) << 3) | (sr64 & 0x7), 1);		\
 } while (0)
 
-/* mov dr8,sr64[or64] */
-#define MOVobb(dr8, sr64, or64) do {					\
+/* movb (sr64,or64,1),dr8 */
+#define MOVobb(sr64, or64, dr8) do {					\
 	emitm(&stream, 0x8a, 1);					\
 	emitm(&stream, ((dr8 & 0x7) << 3) | 4, 1);			\
 	emitm(&stream, ((or64 & 0x7) << 3) | (sr64 & 0x7), 1);		\
 } while (0)
 
-/* mov [dr64][or64],sr32 */
-#define MOVomd(dr64, or64, sr32) do {					\
+/* movl sr32,(dr64,or64,1) */
+#define MOVomd(sr32, dr64, or64) do {					\
 	emitm(&stream, 0x89, 1);					\
 	emitm(&stream, ((sr32 & 0x7) << 3) | 4, 1);			\
 	emitm(&stream, ((or64 & 0x7) << 3) | (dr64 & 0x7), 1);		\
 } while (0)
 
-/* bswap dr32 */
+/* bswapl dr32 */
 #define BSWAP(dr32) do {						\
 	emitm(&stream, 0xf, 1);						\
 	emitm(&stream, (0x19 << 3) | dr32, 1);				\
 } while (0)
 
-/* xchg al,ah */
+/* xchgb %al,%ah */
 #define SWAP_AX() do {							\
 	emitm(&stream, 0x86, 1);					\
 	emitm(&stream, 0xc4, 1);					\
 } while (0)
 
-/* push r64 */
+/* pushq r64 */
 #define PUSH(r64) do {							\
 	emitm(&stream, (5 << 4) | (0 << 3) | (r64 & 0x7), 1);		\
 } while (0)
 
-/* pop r64 */
+/* popq r64 */
 #define POP(r64) do {							\
 	emitm(&stream, (5 << 4) | (1 << 3) | (r64 & 0x7), 1);		\
 } while (0)
 
-/* leave/ret */
+/* leaveq/retq */
 #define LEAVE_RET() do {						\
 	emitm(&stream, 0xc9, 1);					\
 	emitm(&stream, 0xc3, 1);					\
 } while (0)
 
-/* add dr32,sr32 */
-#define ADDrd(dr32, sr32) do {						\
+/* addl sr32,dr32 */
+#define ADDrd(sr32, dr32) do {						\
 	emitm(&stream, 0x03, 1);					\
 	emitm(&stream,							\
 	    (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);	\
 } while (0)
 
-/* add eax,i32 */
+/* addl i32,%eax */
 #define ADD_EAXi(i32) do {						\
 	emitm(&stream, 0x05, 1);					\
 	emitm(&stream, i32, 4);						\
 } while (0)
 
-/* add r32,i32 */
-#define ADDid(r32, i32) do {						\
+/* addl i32,r32 */
+#define ADDid(i32, r32) do {						\
 	emitm(&stream, 0x81, 1);					\
 	emitm(&stream, (24 << 3) | r32, 1);				\
 	emitm(&stream, i32, 4);						\
 } while (0)
 
-/* add r32,i8 */
-#define ADDib(r32, i8) do {						\
+/* addl i8,r32 */
+#define ADDib(i8, r32) do {						\
 	emitm(&stream, 0x83, 1);					\
 	emitm(&stream, (24 << 3) | r32, 1);				\
 	emitm(&stream, i8, 1);						\
 } while (0)
 
-/* sub dr32,sr32 */
-#define SUBrd(dr32, sr32) do {						\
+/* subl sr32,dr32 */
+#define SUBrd(sr32, dr32) do {						\
 	emitm(&stream, 0x2b, 1);					\
 	emitm(&stream,							\
 	    (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
 } while (0)
 
-/* sub eax,i32 */
+/* subl i32,%eax */
 #define SUB_EAXi(i32) do {						\
 	emitm(&stream, 0x2d, 1);					\
 	emitm(&stream, i32, 4);						\
 } while (0)
 
-/* mul r32 */
+/* mull r32 */
 #define MULrd(r32) do {							\
 	emitm(&stream, 0xf7, 1);					\
 	emitm(&stream, (7 << 5) | (r32 & 0x7), 1);			\
 } while (0)
 
-/* div r32 */
+/* divl r32 */
 #define DIVrd(r32) do {							\
 	emitm(&stream, 0xf7, 1);					\
 	emitm(&stream, (15 << 4) | (r32 & 0x7), 1);			\
 } while (0)
 
-/* and r8,i8 */
-#define ANDib(r8, i8) do {						\
+/* andb i8,r8 */
+#define ANDib(i8, r8) do {						\
 	emitm(&stream, 0x80, 1);					\
 	emitm(&stream, (7 << 5) | r8, 1);				\
 	emitm(&stream, i8, 1);						\
 } while (0)
 
-/* and r32,i32 */
-#define ANDid(r32, i32) do {						\
+/* andl i32,r32 */
+#define ANDid(i32, r32) do {						\
 	if (r32 == EAX) {						\
 		emitm(&stream, 0x25, 1);				\
 		emitm(&stream, i32, 4);					\
 	} else {							\
 		emitm(&stream, 0x81, 1);				\
 		emitm(&stream, (7 << 5) | r32, 1);			\
 		emitm(&stream, i32, 4);					\
 	}								\
 } while (0)
 
-/* and dr32,sr32 */
-#define ANDrd(dr32, sr32) do {						\
+/* andl sr32,dr32 */
+#define ANDrd(sr32, dr32) do {						\
 	emitm(&stream, 0x23, 1);					\
 	emitm(&stream,							\
 	    (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
 } while (0)
 
-/* or dr32,sr32 */
-#define ORrd(dr32, sr32) do {						\
+/* orl sr32,dr32 */
+#define ORrd(sr32, dr32) do {						\
 	emitm(&stream, 0x0b, 1);					\
 	emitm(&stream,							\
 	    (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
 } while (0)
 
-/* or r32,i32 */
-#define ORid(r32, i32) do {						\
+/* orl i32,r32 */
+#define ORid(i32, r32) do {						\
 	if (r32 == EAX) {						\
 		emitm(&stream, 0x0d, 1);				\
 		emitm(&stream, i32, 4);					\
 	} else {							\
 		emitm(&stream, 0x81, 1);				\
 		emitm(&stream, (25 << 3) | r32, 1);			\
 		emitm(&stream, i32, 4);					\
 	}								\
 } while (0)
 
-/* shl r32,i8 */
-#define SHLib(r32, i8) do {						\
+/* shll i8,r32 */
+#define SHLib(i8, r32) do {						\
 	emitm(&stream, 0xc1, 1);					\
 	emitm(&stream, (7 << 5) | (r32 & 0x7), 1);			\
 	emitm(&stream, i8, 1);						\
 } while (0)
 
-/* shl dr32,cl */
+/* shll %cl,dr32 */
 #define SHL_CLrb(dr32) do {						\
 	emitm(&stream, 0xd3, 1);					\
 	emitm(&stream, (7 << 5) | (dr32 & 0x7), 1);			\
 } while (0)
 
-/* shr r32,i8 */
-#define SHRib(r32, i8) do {						\
+/* shrl i8,r32 */
+#define SHRib(i8, r32) do {						\
 	emitm(&stream, 0xc1, 1);					\
 	emitm(&stream, (29 << 3) | (r32 & 0x7), 1);			\
 	emitm(&stream, i8, 1);						\
 } while (0)
 
-/* shr dr32,cl */
+/* shrl %cl,dr32 */
 #define SHR_CLrb(dr32) do {						\
 	emitm(&stream, 0xd3, 1);					\
 	emitm(&stream, (29 << 3) | (dr32 & 0x7), 1);			\
 } while (0)
 
-/* neg r32 */
+/* negl r32 */
 #define NEGd(r32) do {							\
 	emitm(&stream, 0xf7, 1);					\
 	emitm(&stream, (27 << 3) | (r32 & 0x7), 1);			\
 } while (0)
 
-/* cmp dr32,sr64[off] */
-#define CMPodd(dr32, sr64, off) do {					\
+/* cmpl off(sr64),dr32 */
+#define CMPoqd(off, sr64, dr32) do {					\
 	emitm(&stream, (3 << 4) | 3 | (1 << 3), 1);			\
 	emitm(&stream,							\
 	    (1 << 6) | ((dr32 & 0x7) << 3) | (sr64 & 0x7), 1);		\
 	emitm(&stream, off, 1);						\
 } while (0)
 
-/* cmp dr32,sr32 */
-#define CMPrd(dr32, sr32) do {						\
+/* cmpl sr32,dr32 */
+#define CMPrd(sr32, dr32) do {						\
 	emitm(&stream, 0x3b, 1);					\
 	emitm(&stream,							\
 	    (3 << 6) | ((dr32 & 0x7) << 3) | (sr32 & 0x7), 1);		\
 } while (0)
 
-/* cmp dr32,i32 */
-#define CMPid(dr32, i32) do {						\
+/* cmpl i32,dr32 */
+#define CMPid(i32, dr32) do {						\
 	if (dr32 == EAX){						\
 		emitm(&stream, 0x3d, 1);				\
 		emitm(&stream, i32, 4);					\
 	} else {							\
 		emitm(&stream, 0x81, 1);				\
 		emitm(&stream, (0x1f << 3) | (dr32 & 0x7), 1);		\
 		emitm(&stream, i32, 4);					\
 	}								\
 } while (0)
 
-/* jne off32 */
+/* jne off8 */
 #define JNEb(off8) do {							\
 	emitm(&stream, 0x75, 1);					\
 	emitm(&stream, off8, 1);					\
 } while (0)
 
 /* je off32 */
 #define JE(off32) do {							\
 	emitm(&stream, 0x0f, 1);					\
 	emitm(&stream, 0x84, 1);					\
 	emitm(&stream, off32, 4);					\
 } while (0)
 
 /* jle off32 */
 #define JLE(off32) do {							\
 	emitm(&stream, 0x0f, 1);					\
 	emitm(&stream, 0x8e, 1);					\
 	emitm(&stream, off32, 4);					\
 } while (0)
 
 /* jle off8 */
 #define JLEb(off8) do {							\
 	emitm(&stream, 0x7e, 1);					\
 	emitm(&stream, off8, 1);					\
 } while (0)
 
 /* ja off32 */
 #define JA(off32) do {							\
 	emitm(&stream, 0x0f, 1);					\
 	emitm(&stream, 0x87, 1);					\
 	emitm(&stream, off32, 4);					\
 } while (0)
 
 /* jae off32 */
 #define JAE(off32) do {							\
 	emitm(&stream, 0x0f, 1);					\
 	emitm(&stream, 0x83, 1);					\
 	emitm(&stream, off32, 4);					\
 } while (0)
 
 /* jg off32 */
 #define JG(off32) do {							\
 	emitm(&stream, 0x0f, 1);					\
 	emitm(&stream, 0x8f, 1);					\
 	emitm(&stream, off32, 4);					\
 } while (0)
 
 /* jge off32 */
 #define JGE(off32) do {							\
 	emitm(&stream, 0x0f, 1);					\
 	emitm(&stream, 0x8d, 1);					\
 	emitm(&stream, off32, 4);					\
 } while (0)
 
 /* jmp off32 */
 #define JMP(off32) do {							\
 	emitm(&stream, 0xe9, 1);					\
 	emitm(&stream, off32, 4);					\
 } while (0)
 
-/* xor eax,eax */
+/* xorl %eax,%eax */
 #define ZERO_EAX() do {							\
 	emitm(&stream, 0x31, 1);					\
 	emitm(&stream, 0xc0, 1);					\
 } while (0)
 
-/* xor edx,edx */
+/* xorl %edx,%edx */
 #define ZERO_EDX() do {							\
 	emitm(&stream, 0x31, 1);					\
 	emitm(&stream, 0xd2, 1);					\
 } while (0)
 
 #endif	/* _BPF_JIT_MACHDEP_H_ */