Index: user/alc/PQ_LAUNDRY/sbin/pfctl/parse.y
===================================================================
--- user/alc/PQ_LAUNDRY/sbin/pfctl/parse.y	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sbin/pfctl/parse.y	(revision 303667)
@@ -1,6259 +1,6259 @@
 /*	$OpenBSD: parse.y,v 1.554 2008/10/17 12:59:53 henning Exp $	*/
 
 /*
  * Copyright (c) 2001 Markus Friedl.  All rights reserved.
  * Copyright (c) 2001 Daniel Hartmeier.  All rights reserved.
  * Copyright (c) 2001 Theo de Raadt.  All rights reserved.
  * Copyright (c) 2002,2003 Henning Brauer. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 %{
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #ifdef __FreeBSD__
 #include <sys/sysctl.h>
 #endif
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp6.h>
 #include <net/pfvar.h>
 #include <arpa/inet.h>
 #include <net/altq/altq.h>
 #include <net/altq/altq_cbq.h>
 #include <net/altq/altq_codel.h>
 #include <net/altq/altq_priq.h>
 #include <net/altq/altq_hfsc.h>
 #include <net/altq/altq_fairq.h>
 
 #include <stdio.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <netdb.h>
 #include <stdarg.h>
 #include <errno.h>
 #include <string.h>
 #include <ctype.h>
 #include <math.h>
 #include <err.h>
 #include <limits.h>
 #include <pwd.h>
 #include <grp.h>
 #include <md5.h>
 
 #include "pfctl_parser.h"
 #include "pfctl.h"
 
 static struct pfctl	*pf = NULL;
 static int		 debug = 0;
 static int		 rulestate = 0;
 static u_int16_t	 returnicmpdefault =
 			    (ICMP_UNREACH << 8) | ICMP_UNREACH_PORT;
 static u_int16_t	 returnicmp6default =
 			    (ICMP6_DST_UNREACH << 8) | ICMP6_DST_UNREACH_NOPORT;
 static int		 blockpolicy = PFRULE_DROP;
 static int		 require_order = 1;
 static int		 default_statelock;
 
 TAILQ_HEAD(files, file)		 files = TAILQ_HEAD_INITIALIZER(files);
 static struct file {
 	TAILQ_ENTRY(file)	 entry;
 	FILE			*stream;
 	char			*name;
 	int			 lineno;
 	int			 errors;
 } *file;
 struct file	*pushfile(const char *, int);
 int		 popfile(void);
 int		 check_file_secrecy(int, const char *);
 int		 yyparse(void);
 int		 yylex(void);
 int		 yyerror(const char *, ...);
 int		 kw_cmp(const void *, const void *);
 int		 lookup(char *);
 int		 lgetc(int);
 int		 lungetc(int);
 int		 findeol(void);
 
 TAILQ_HEAD(symhead, sym)	 symhead = TAILQ_HEAD_INITIALIZER(symhead);
 struct sym {
 	TAILQ_ENTRY(sym)	 entry;
 	int			 used;
 	int			 persist;
 	char			*nam;
 	char			*val;
 };
 int		 symset(const char *, const char *, int);
 char		*symget(const char *);
 
 int		 atoul(char *, u_long *);
 
 enum {
 	PFCTL_STATE_NONE,
 	PFCTL_STATE_OPTION,
 	PFCTL_STATE_SCRUB,
 	PFCTL_STATE_QUEUE,
 	PFCTL_STATE_NAT,
 	PFCTL_STATE_FILTER
 };
 
 struct node_proto {
 	u_int8_t		 proto;
 	struct node_proto	*next;
 	struct node_proto	*tail;
 };
 
 struct node_port {
 	u_int16_t		 port[2];
 	u_int8_t		 op;
 	struct node_port	*next;
 	struct node_port	*tail;
 };
 
 struct node_uid {
 	uid_t			 uid[2];
 	u_int8_t		 op;
 	struct node_uid		*next;
 	struct node_uid		*tail;
 };
 
 struct node_gid {
 	gid_t			 gid[2];
 	u_int8_t		 op;
 	struct node_gid		*next;
 	struct node_gid		*tail;
 };
 
 struct node_icmp {
 	u_int8_t		 code;
 	u_int8_t		 type;
 	u_int8_t		 proto;
 	struct node_icmp	*next;
 	struct node_icmp	*tail;
 };
 
 enum	{ PF_STATE_OPT_MAX, PF_STATE_OPT_NOSYNC, PF_STATE_OPT_SRCTRACK,
 	    PF_STATE_OPT_MAX_SRC_STATES, PF_STATE_OPT_MAX_SRC_CONN,
 	    PF_STATE_OPT_MAX_SRC_CONN_RATE, PF_STATE_OPT_MAX_SRC_NODES,
 	    PF_STATE_OPT_OVERLOAD, PF_STATE_OPT_STATELOCK,
 	    PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY, };
 
 enum	{ PF_SRCTRACK_NONE, PF_SRCTRACK, PF_SRCTRACK_GLOBAL, PF_SRCTRACK_RULE };
 
 struct node_state_opt {
 	int			 type;
 	union {
 		u_int32_t	 max_states;
 		u_int32_t	 max_src_states;
 		u_int32_t	 max_src_conn;
 		struct {
 			u_int32_t	limit;
 			u_int32_t	seconds;
 		}		 max_src_conn_rate;
 		struct {
 			u_int8_t	flush;
 			char		tblname[PF_TABLE_NAME_SIZE];
 		}		 overload;
 		u_int32_t	 max_src_nodes;
 		u_int8_t	 src_track;
 		u_int32_t	 statelock;
 		struct {
 			int		number;
 			u_int32_t	seconds;
 		}		 timeout;
 	}			 data;
 	struct node_state_opt	*next;
 	struct node_state_opt	*tail;
 };
 
 struct peer {
 	struct node_host	*host;
 	struct node_port	*port;
 };
 
 struct node_queue {
 	char			 queue[PF_QNAME_SIZE];
 	char			 parent[PF_QNAME_SIZE];
 	char			 ifname[IFNAMSIZ];
 	int			 scheduler;
 	struct node_queue	*next;
 	struct node_queue	*tail;
 }	*queues = NULL;
 
 struct node_qassign {
 	char		*qname;
 	char		*pqname;
 };
 
 struct filter_opts {
 	int			 marker;
 #define FOM_FLAGS	0x01
 #define FOM_ICMP	0x02
 #define FOM_TOS		0x04
 #define FOM_KEEP	0x08
 #define FOM_SRCTRACK	0x10
 #define FOM_SETPRIO	0x0400
 #define FOM_PRIO	0x2000
 	struct node_uid		*uid;
 	struct node_gid		*gid;
 	struct {
 		u_int8_t	 b1;
 		u_int8_t	 b2;
 		u_int16_t	 w;
 		u_int16_t	 w2;
 	} flags;
 	struct node_icmp	*icmpspec;
 	u_int32_t		 tos;
 	u_int32_t		 prob;
 	struct {
 		int			 action;
 		struct node_state_opt	*options;
 	} keep;
 	int			 fragment;
 	int			 allowopts;
 	char			*label;
 	struct node_qassign	 queues;
 	char			*tag;
 	char			*match_tag;
 	u_int8_t		 match_tag_not;
 	u_int			 rtableid;
 	u_int8_t		 prio;
 	u_int8_t		 set_prio[2];
 	struct {
 		struct node_host	*addr;
 		u_int16_t		port;
 	}			 divert;
 } filter_opts;
 
 struct antispoof_opts {
 	char			*label;
 	u_int			 rtableid;
 } antispoof_opts;
 
 struct scrub_opts {
 	int			 marker;
 #define SOM_MINTTL	0x01
 #define SOM_MAXMSS	0x02
 #define SOM_FRAGCACHE	0x04
 #define SOM_SETTOS	0x08
 	int			 nodf;
 	int			 minttl;
 	int			 maxmss;
 	int			 settos;
 	int			 fragcache;
 	int			 randomid;
 	int			 reassemble_tcp;
 	char			*match_tag;
 	u_int8_t		 match_tag_not;
 	u_int			 rtableid;
 } scrub_opts;
 
 struct queue_opts {
 	int			marker;
 #define QOM_BWSPEC	0x01
 #define QOM_SCHEDULER	0x02
 #define QOM_PRIORITY	0x04
 #define QOM_TBRSIZE	0x08
 #define QOM_QLIMIT	0x10
 	struct node_queue_bw	queue_bwspec;
 	struct node_queue_opt	scheduler;
 	int			priority;
 	int			tbrsize;
 	int			qlimit;
 } queue_opts;
 
 struct table_opts {
 	int			flags;
 	int			init_addr;
 	struct node_tinithead	init_nodes;
 } table_opts;
 
 struct pool_opts {
 	int			 marker;
 #define POM_TYPE		0x01
 #define POM_STICKYADDRESS	0x02
 	u_int8_t		 opts;
 	int			 type;
 	int			 staticport;
 	struct pf_poolhashkey	*key;
 
 } pool_opts;
 
 struct codel_opts	 codel_opts;
 struct node_hfsc_opts	 hfsc_opts;
 struct node_fairq_opts	 fairq_opts;
 struct node_state_opt	*keep_state_defaults = NULL;
 
 int		 disallow_table(struct node_host *, const char *);
 int		 disallow_urpf_failed(struct node_host *, const char *);
 int		 disallow_alias(struct node_host *, const char *);
 int		 rule_consistent(struct pf_rule *, int);
 int		 filter_consistent(struct pf_rule *, int);
 int		 nat_consistent(struct pf_rule *);
 int		 rdr_consistent(struct pf_rule *);
 int		 process_tabledef(char *, struct table_opts *);
 void		 expand_label_str(char *, size_t, const char *, const char *);
 void		 expand_label_if(const char *, char *, size_t, const char *);
 void		 expand_label_addr(const char *, char *, size_t, u_int8_t,
 		    struct node_host *);
 void		 expand_label_port(const char *, char *, size_t,
 		    struct node_port *);
 void		 expand_label_proto(const char *, char *, size_t, u_int8_t);
 void		 expand_label_nr(const char *, char *, size_t);
 void		 expand_label(char *, size_t, const char *, u_int8_t,
 		    struct node_host *, struct node_port *, struct node_host *,
 		    struct node_port *, u_int8_t);
 void		 expand_rule(struct pf_rule *, struct node_if *,
 		    struct node_host *, struct node_proto *, struct node_os *,
 		    struct node_host *, struct node_port *, struct node_host *,
 		    struct node_port *, struct node_uid *, struct node_gid *,
 		    struct node_icmp *, const char *);
 int		 expand_altq(struct pf_altq *, struct node_if *,
 		    struct node_queue *, struct node_queue_bw bwspec,
 		    struct node_queue_opt *);
 int		 expand_queue(struct pf_altq *, struct node_if *,
 		    struct node_queue *, struct node_queue_bw,
 		    struct node_queue_opt *);
 int		 expand_skip_interface(struct node_if *);
 
 int	 check_rulestate(int);
 int	 getservice(char *);
 int	 rule_label(struct pf_rule *, char *);
 int	 rt_tableid_max(void);
 
 void	 mv_rules(struct pf_ruleset *, struct pf_ruleset *);
 void	 decide_address_family(struct node_host *, sa_family_t *);
 void	 remove_invalid_hosts(struct node_host **, sa_family_t *);
 int	 invalid_redirect(struct node_host *, sa_family_t);
 u_int16_t parseicmpspec(char *, sa_family_t);
 
 TAILQ_HEAD(loadanchorshead, loadanchors)
     loadanchorshead = TAILQ_HEAD_INITIALIZER(loadanchorshead);
 
 struct loadanchors {
 	TAILQ_ENTRY(loadanchors)	 entries;
 	char				*anchorname;
 	char				*filename;
 };
 
 typedef struct {
 	union {
 		int64_t			 number;
 		double			 probability;
 		int			 i;
 		char			*string;
 		u_int			 rtableid;
 		struct {
 			u_int8_t	 b1;
 			u_int8_t	 b2;
 			u_int16_t	 w;
 			u_int16_t	 w2;
 		}			 b;
 		struct range {
 			int		 a;
 			int		 b;
 			int		 t;
 		}			 range;
 		struct node_if		*interface;
 		struct node_proto	*proto;
 		struct node_icmp	*icmp;
 		struct node_host	*host;
 		struct node_os		*os;
 		struct node_port	*port;
 		struct node_uid		*uid;
 		struct node_gid		*gid;
 		struct node_state_opt	*state_opt;
 		struct peer		 peer;
 		struct {
 			struct peer	 src, dst;
 			struct node_os	*src_os;
 		}			 fromto;
 		struct {
 			struct node_host	*host;
 			u_int8_t		 rt;
 			u_int8_t		 pool_opts;
 			sa_family_t		 af;
 			struct pf_poolhashkey	*key;
 		}			 route;
 		struct redirection {
 			struct node_host	*host;
 			struct range		 rport;
 		}			*redirection;
 		struct {
 			int			 action;
 			struct node_state_opt	*options;
 		}			 keep_state;
 		struct {
 			u_int8_t	 log;
 			u_int8_t	 logif;
 			u_int8_t	 quick;
 		}			 logquick;
 		struct {
 			int		 neg;
 			char		*name;
 		}			 tagged;
 		struct pf_poolhashkey	*hashkey;
 		struct node_queue	*queue;
 		struct node_queue_opt	 queue_options;
 		struct node_queue_bw	 queue_bwspec;
 		struct node_qassign	 qassign;
 		struct filter_opts	 filter_opts;
 		struct antispoof_opts	 antispoof_opts;
 		struct queue_opts	 queue_opts;
 		struct scrub_opts	 scrub_opts;
 		struct table_opts	 table_opts;
 		struct pool_opts	 pool_opts;
 		struct node_hfsc_opts	 hfsc_opts;
 		struct node_fairq_opts	 fairq_opts;
 		struct codel_opts	 codel_opts;
 	} v;
 	int lineno;
 } YYSTYPE;
 
 #define PPORT_RANGE	1
 #define PPORT_STAR	2
 int	parseport(char *, struct range *r, int);
 
 #define DYNIF_MULTIADDR(addr) ((addr).type == PF_ADDR_DYNIFTL && \
 	(!((addr).iflags & PFI_AFLAG_NOALIAS) ||		 \
 	!isdigit((addr).v.ifname[strlen((addr).v.ifname)-1])))
 
 %}
 
 %token	PASS BLOCK SCRUB RETURN IN OS OUT LOG QUICK ON FROM TO FLAGS
 %token	RETURNRST RETURNICMP RETURNICMP6 PROTO INET INET6 ALL ANY ICMPTYPE
 %token	ICMP6TYPE CODE KEEP MODULATE STATE PORT RDR NAT BINAT ARROW NODF
 %token	MINTTL ERROR ALLOWOPTS FASTROUTE FILENAME ROUTETO DUPTO REPLYTO NO LABEL
 %token	NOROUTE URPFFAILED FRAGMENT USER GROUP MAXMSS MAXIMUM TTL TOS DROP TABLE
 %token	REASSEMBLE FRAGDROP FRAGCROP ANCHOR NATANCHOR RDRANCHOR BINATANCHOR
 %token	SET OPTIMIZATION TIMEOUT LIMIT LOGINTERFACE BLOCKPOLICY RANDOMID
 %token	REQUIREORDER SYNPROXY FINGERPRINTS NOSYNC DEBUG SKIP HOSTID
 %token	ANTISPOOF FOR INCLUDE
 %token	BITMASK RANDOM SOURCEHASH ROUNDROBIN STATICPORT PROBABILITY
 %token	ALTQ CBQ CODEL PRIQ HFSC FAIRQ BANDWIDTH TBRSIZE LINKSHARE REALTIME
 %token	UPPERLIMIT QUEUE PRIORITY QLIMIT HOGS BUCKETS RTABLE TARGET INTERVAL
 %token	LOAD RULESET_OPTIMIZATION PRIO
 %token	STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE
 %token	MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY
 %token	TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS
 %token	DIVERTTO DIVERTREPLY
 %token	<v.string>		STRING
 %token	<v.number>		NUMBER
 %token	<v.i>			PORTBINARY
 %type	<v.interface>		interface if_list if_item_not if_item
 %type	<v.number>		number icmptype icmp6type uid gid
 %type	<v.number>		tos not yesno
 %type	<v.probability>		probability
 %type	<v.i>			no dir af fragcache optimizer
 %type	<v.i>			sourcetrack flush unaryop statelock
 %type	<v.b>			action nataction natpasslog scrubaction
 %type	<v.b>			flags flag blockspec prio
 %type	<v.range>		portplain portstar portrange
 %type	<v.hashkey>		hashkey
 %type	<v.proto>		proto proto_list proto_item
 %type	<v.number>		protoval
 %type	<v.icmp>		icmpspec
 %type	<v.icmp>		icmp_list icmp_item
 %type	<v.icmp>		icmp6_list icmp6_item
 %type	<v.number>		reticmpspec reticmp6spec
 %type	<v.fromto>		fromto
 %type	<v.peer>		ipportspec from to
 %type	<v.host>		ipspec toipspec xhost host dynaddr host_list
 %type	<v.host>		redir_host_list redirspec
 %type	<v.host>		route_host route_host_list routespec
 %type	<v.os>			os xos os_list
 %type	<v.port>		portspec port_list port_item
 %type	<v.uid>			uids uid_list uid_item
 %type	<v.gid>			gids gid_list gid_item
 %type	<v.route>		route
 %type	<v.redirection>		redirection redirpool
 %type	<v.string>		label stringall tag anchorname
 %type	<v.string>		string varstring numberstring
 %type	<v.keep_state>		keep
 %type	<v.state_opt>		state_opt_spec state_opt_list state_opt_item
 %type	<v.logquick>		logquick quick log logopts logopt
 %type	<v.interface>		antispoof_ifspc antispoof_iflst antispoof_if
 %type	<v.qassign>		qname
 %type	<v.queue>		qassign qassign_list qassign_item
 %type	<v.queue_options>	scheduler
 %type	<v.number>		cbqflags_list cbqflags_item
 %type	<v.number>		priqflags_list priqflags_item
 %type	<v.hfsc_opts>		hfscopts_list hfscopts_item hfsc_opts
 %type	<v.fairq_opts>		fairqopts_list fairqopts_item fairq_opts
 %type	<v.codel_opts>		codelopts_list codelopts_item codel_opts
 %type	<v.queue_bwspec>	bandwidth
 %type	<v.filter_opts>		filter_opts filter_opt filter_opts_l
 %type	<v.filter_opts>		filter_sets filter_set filter_sets_l
 %type	<v.antispoof_opts>	antispoof_opts antispoof_opt antispoof_opts_l
 %type	<v.queue_opts>		queue_opts queue_opt queue_opts_l
 %type	<v.scrub_opts>		scrub_opts scrub_opt scrub_opts_l
 %type	<v.table_opts>		table_opts table_opt table_opts_l
 %type	<v.pool_opts>		pool_opts pool_opt pool_opts_l
 %type	<v.tagged>		tagged
 %type	<v.rtableid>		rtable
 %%
 
 ruleset		: /* empty */
 		| ruleset include '\n'
 		| ruleset '\n'
 		| ruleset option '\n'
 		| ruleset scrubrule '\n'
 		| ruleset natrule '\n'
 		| ruleset binatrule '\n'
 		| ruleset pfrule '\n'
 		| ruleset anchorrule '\n'
 		| ruleset loadrule '\n'
 		| ruleset altqif '\n'
 		| ruleset queuespec '\n'
 		| ruleset varset '\n'
 		| ruleset antispoof '\n'
 		| ruleset tabledef '\n'
 		| '{' fakeanchor '}' '\n';
 		| ruleset error '\n'		{ file->errors++; }
 		;
 
 include		: INCLUDE STRING		{
 			struct file	*nfile;
 
 			if ((nfile = pushfile($2, 0)) == NULL) {
 				yyerror("failed to include file %s", $2);
 				free($2);
 				YYERROR;
 			}
 			free($2);
 
 			file = nfile;
 			lungetc('\n');
 		}
 		;
 
 /*
  * apply to previouslys specified rule: must be careful to note
  * what that is: pf or nat or binat or rdr
  */
 fakeanchor	: fakeanchor '\n'
 		| fakeanchor anchorrule '\n'
 		| fakeanchor binatrule '\n'
 		| fakeanchor natrule '\n'
 		| fakeanchor pfrule '\n'
 		| fakeanchor error '\n'
 		;
 
 optimizer	: string	{
 			if (!strcmp($1, "none"))
 				$$ = 0;
 			else if (!strcmp($1, "basic"))
 				$$ = PF_OPTIMIZE_BASIC;
 			else if (!strcmp($1, "profile"))
 				$$ = PF_OPTIMIZE_BASIC | PF_OPTIMIZE_PROFILE;
 			else {
 				yyerror("unknown ruleset-optimization %s", $1);
 				YYERROR;
 			}
 		}
 		;
 
 option		: SET OPTIMIZATION STRING		{
 			if (check_rulestate(PFCTL_STATE_OPTION)) {
 				free($3);
 				YYERROR;
 			}
 			if (pfctl_set_optimization(pf, $3) != 0) {
 				yyerror("unknown optimization %s", $3);
 				free($3);
 				YYERROR;
 			}
 			free($3);
 		}
 		| SET RULESET_OPTIMIZATION optimizer {
 			if (!(pf->opts & PF_OPT_OPTIMIZE)) {
 				pf->opts |= PF_OPT_OPTIMIZE;
 				pf->optimize = $3;
 			}
 		}
 		| SET TIMEOUT timeout_spec
 		| SET TIMEOUT '{' optnl timeout_list '}'
 		| SET LIMIT limit_spec
 		| SET LIMIT '{' optnl limit_list '}'
 		| SET LOGINTERFACE stringall		{
 			if (check_rulestate(PFCTL_STATE_OPTION)) {
 				free($3);
 				YYERROR;
 			}
 			if (pfctl_set_logif(pf, $3) != 0) {
 				yyerror("error setting loginterface %s", $3);
 				free($3);
 				YYERROR;
 			}
 			free($3);
 		}
 		| SET HOSTID number {
 			if ($3 == 0 || $3 > UINT_MAX) {
 				yyerror("hostid must be non-zero");
 				YYERROR;
 			}
 			if (pfctl_set_hostid(pf, $3) != 0) {
 				yyerror("error setting hostid %08x", $3);
 				YYERROR;
 			}
 		}
 		| SET BLOCKPOLICY DROP	{
 			if (pf->opts & PF_OPT_VERBOSE)
 				printf("set block-policy drop\n");
 			if (check_rulestate(PFCTL_STATE_OPTION))
 				YYERROR;
 			blockpolicy = PFRULE_DROP;
 		}
 		| SET BLOCKPOLICY RETURN {
 			if (pf->opts & PF_OPT_VERBOSE)
 				printf("set block-policy return\n");
 			if (check_rulestate(PFCTL_STATE_OPTION))
 				YYERROR;
 			blockpolicy = PFRULE_RETURN;
 		}
 		| SET REQUIREORDER yesno {
 			if (pf->opts & PF_OPT_VERBOSE)
 				printf("set require-order %s\n",
 				    $3 == 1 ? "yes" : "no");
 			require_order = $3;
 		}
 		| SET FINGERPRINTS STRING {
 			if (pf->opts & PF_OPT_VERBOSE)
 				printf("set fingerprints \"%s\"\n", $3);
 			if (check_rulestate(PFCTL_STATE_OPTION)) {
 				free($3);
 				YYERROR;
 			}
 			if (!pf->anchor->name[0]) {
 				if (pfctl_file_fingerprints(pf->dev,
 				    pf->opts, $3)) {
 					yyerror("error loading "
 					    "fingerprints %s", $3);
 					free($3);
 					YYERROR;
 				}
 			}
 			free($3);
 		}
 		| SET STATEPOLICY statelock {
 			if (pf->opts & PF_OPT_VERBOSE)
 				switch ($3) {
 				case 0:
 					printf("set state-policy floating\n");
 					break;
 				case PFRULE_IFBOUND:
 					printf("set state-policy if-bound\n");
 					break;
 				}
 			default_statelock = $3;
 		}
 		| SET DEBUG STRING {
 			if (check_rulestate(PFCTL_STATE_OPTION)) {
 				free($3);
 				YYERROR;
 			}
 			if (pfctl_set_debug(pf, $3) != 0) {
 				yyerror("error setting debuglevel %s", $3);
 				free($3);
 				YYERROR;
 			}
 			free($3);
 		}
 		| SET SKIP interface {
 			if (expand_skip_interface($3) != 0) {
 				yyerror("error setting skip interface(s)");
 				YYERROR;
 			}
 		}
 		| SET STATEDEFAULTS state_opt_list {
 			if (keep_state_defaults != NULL) {
 				yyerror("cannot redefine state-defaults");
 				YYERROR;
 			}
 			keep_state_defaults = $3;
 		}
 		;
 
 stringall	: STRING	{ $$ = $1; }
 		| ALL		{
 			if (($$ = strdup("all")) == NULL) {
 				err(1, "stringall: strdup");
 			}
 		}
 		;
 
 string		: STRING string				{
 			if (asprintf(&$$, "%s %s", $1, $2) == -1)
 				err(1, "string: asprintf");
 			free($1);
 			free($2);
 		}
 		| STRING
 		;
 
 varstring	: numberstring varstring 		{
 			if (asprintf(&$$, "%s %s", $1, $2) == -1)
 				err(1, "string: asprintf");
 			free($1);
 			free($2);
 		}
 		| numberstring
 		;
 
 numberstring	: NUMBER				{
 			char	*s;
 			if (asprintf(&s, "%lld", (long long)$1) == -1) {
 				yyerror("string: asprintf");
 				YYERROR;
 			}
 			$$ = s;
 		}
 		| STRING
 		;
 
 varset		: STRING '=' varstring	{
 			if (pf->opts & PF_OPT_VERBOSE)
 				printf("%s = \"%s\"\n", $1, $3);
 			if (symset($1, $3, 0) == -1)
 				err(1, "cannot store variable %s", $1);
 			free($1);
 			free($3);
 		}
 		;
 
 anchorname	: STRING			{ $$ = $1; }
 		| /* empty */			{ $$ = NULL; }
 		;
 
 pfa_anchorlist	: /* empty */
 		| pfa_anchorlist '\n'
 		| pfa_anchorlist pfrule '\n'
 		| pfa_anchorlist anchorrule '\n'
 		;
 
 pfa_anchor	: '{'
 		{
 			char ta[PF_ANCHOR_NAME_SIZE];
 			struct pf_ruleset *rs;
 
 			/* steping into a brace anchor */
 			pf->asd++;
 			pf->bn++;
 			pf->brace = 1;
 
 			/* create a holding ruleset in the root */
 			snprintf(ta, PF_ANCHOR_NAME_SIZE, "_%d", pf->bn);
 			rs = pf_find_or_create_ruleset(ta);
 			if (rs == NULL)
 				err(1, "pfa_anchor: pf_find_or_create_ruleset");
 			pf->astack[pf->asd] = rs->anchor;
 			pf->anchor = rs->anchor;
 		} '\n' pfa_anchorlist '}'
 		{
 			pf->alast = pf->anchor;
 			pf->asd--;
 			pf->anchor = pf->astack[pf->asd];
 		}
 		| /* empty */
 		;
 
 anchorrule	: ANCHOR anchorname dir quick interface af proto fromto
 		    filter_opts pfa_anchor
 		{
 			struct pf_rule	r;
 			struct node_proto	*proto;
 
 			if (check_rulestate(PFCTL_STATE_FILTER)) {
 				if ($2)
 					free($2);
 				YYERROR;
 			}
 
 			if ($2 && ($2[0] == '_' || strstr($2, "/_") != NULL)) {
 				free($2);
 				yyerror("anchor names beginning with '_' "
 				    "are reserved for internal use");
 				YYERROR;
 			}
 
 			memset(&r, 0, sizeof(r));
 			if (pf->astack[pf->asd + 1]) {
 				/* move inline rules into relative location */
 				pf_anchor_setup(&r,
 				    &pf->astack[pf->asd]->ruleset,
 				    $2 ? $2 : pf->alast->name);
 		
 				if (r.anchor == NULL)
 					err(1, "anchorrule: unable to "
 					    "create ruleset");
 
 				if (pf->alast != r.anchor) {
 					if (r.anchor->match) {
 						yyerror("inline anchor '%s' "
 						    "already exists",
 						    r.anchor->name);
 						YYERROR;
 					}
 					mv_rules(&pf->alast->ruleset,
 					    &r.anchor->ruleset);
 				}
 				pf_remove_if_empty_ruleset(&pf->alast->ruleset);
 				pf->alast = r.anchor;
 			} else {
 				if (!$2) {
 					yyerror("anchors without explicit "
 					    "rules must specify a name");
 					YYERROR;
 				}
 			}
 			r.direction = $3;
 			r.quick = $4.quick;
 			r.af = $6;
 			r.prob = $9.prob;
 			r.rtableid = $9.rtableid;
 
 			if ($9.tag)
 				if (strlcpy(r.tagname, $9.tag,
 				    PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) {
 					yyerror("tag too long, max %u chars",
 					    PF_TAG_NAME_SIZE - 1);
 					YYERROR;
 				}
 			if ($9.match_tag)
 				if (strlcpy(r.match_tagname, $9.match_tag,
 				    PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) {
 					yyerror("tag too long, max %u chars",
 					    PF_TAG_NAME_SIZE - 1);
 					YYERROR;
 				}
 			r.match_tag_not = $9.match_tag_not;
 			if (rule_label(&r, $9.label))
 				YYERROR;
 			free($9.label);
 			r.flags = $9.flags.b1;
 			r.flagset = $9.flags.b2;
 			if (($9.flags.b1 & $9.flags.b2) != $9.flags.b1) {
 				yyerror("flags always false");
 				YYERROR;
 			}
 			if ($9.flags.b1 || $9.flags.b2 || $8.src_os) {
 				for (proto = $7; proto != NULL &&
 				    proto->proto != IPPROTO_TCP;
 				    proto = proto->next)
 					;	/* nothing */
 				if (proto == NULL && $7 != NULL) {
 					if ($9.flags.b1 || $9.flags.b2)
 						yyerror(
 						    "flags only apply to tcp");
 					if ($8.src_os)
 						yyerror(
 						    "OS fingerprinting only "
 						    "applies to tcp");
 					YYERROR;
 				}
 			}
 
 			r.tos = $9.tos;
 
 			if ($9.keep.action) {
 				yyerror("cannot specify state handling "
 				    "on anchors");
 				YYERROR;
 			}
 
 			if ($9.match_tag)
 				if (strlcpy(r.match_tagname, $9.match_tag,
 				    PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) {
 					yyerror("tag too long, max %u chars",
 					    PF_TAG_NAME_SIZE - 1);
 					YYERROR;
 				}
 			r.match_tag_not = $9.match_tag_not;
 			if ($9.marker & FOM_PRIO) {
 				if ($9.prio == 0)
 					r.prio = PF_PRIO_ZERO;
 				else
 					r.prio = $9.prio;
 			}
 			if ($9.marker & FOM_SETPRIO) {
 				r.set_prio[0] = $9.set_prio[0];
 				r.set_prio[1] = $9.set_prio[1];
 				r.scrub_flags |= PFSTATE_SETPRIO;
 			}
 
 			decide_address_family($8.src.host, &r.af);
 			decide_address_family($8.dst.host, &r.af);
 
 			expand_rule(&r, $5, NULL, $7, $8.src_os,
 			    $8.src.host, $8.src.port, $8.dst.host, $8.dst.port,
 			    $9.uid, $9.gid, $9.icmpspec,
 			    pf->astack[pf->asd + 1] ? pf->alast->name : $2);
 			free($2);
 			pf->astack[pf->asd + 1] = NULL;
 		}
 		| NATANCHOR string interface af proto fromto rtable {
 			struct pf_rule	r;
 
 			if (check_rulestate(PFCTL_STATE_NAT)) {
 				free($2);
 				YYERROR;
 			}
 
 			memset(&r, 0, sizeof(r));
 			r.action = PF_NAT;
 			r.af = $4;
 			r.rtableid = $7;
 
 			decide_address_family($6.src.host, &r.af);
 			decide_address_family($6.dst.host, &r.af);
 
 			expand_rule(&r, $3, NULL, $5, $6.src_os,
 			    $6.src.host, $6.src.port, $6.dst.host, $6.dst.port,
 			    0, 0, 0, $2);
 			free($2);
 		}
 		| RDRANCHOR string interface af proto fromto rtable {
 			struct pf_rule	r;
 
 			if (check_rulestate(PFCTL_STATE_NAT)) {
 				free($2);
 				YYERROR;
 			}
 
 			memset(&r, 0, sizeof(r));
 			r.action = PF_RDR;
 			r.af = $4;
 			r.rtableid = $7;
 
 			decide_address_family($6.src.host, &r.af);
 			decide_address_family($6.dst.host, &r.af);
 
 			if ($6.src.port != NULL) {
 				yyerror("source port parameter not supported"
 				    " in rdr-anchor");
 				YYERROR;
 			}
 			if ($6.dst.port != NULL) {
 				if ($6.dst.port->next != NULL) {
 					yyerror("destination port list "
 					    "expansion not supported in "
 					    "rdr-anchor");
 					YYERROR;
 				} else if ($6.dst.port->op != PF_OP_EQ) {
 					yyerror("destination port operators"
 					    " not supported in rdr-anchor");
 					YYERROR;
 				}
 				r.dst.port[0] = $6.dst.port->port[0];
 				r.dst.port[1] = $6.dst.port->port[1];
 				r.dst.port_op = $6.dst.port->op;
 			}
 
 			expand_rule(&r, $3, NULL, $5, $6.src_os,
 			    $6.src.host, $6.src.port, $6.dst.host, $6.dst.port,
 			    0, 0, 0, $2);
 			free($2);
 		}
 		| BINATANCHOR string interface af proto fromto rtable {
 			struct pf_rule	r;
 
 			if (check_rulestate(PFCTL_STATE_NAT)) {
 				free($2);
 				YYERROR;
 			}
 
 			memset(&r, 0, sizeof(r));
 			r.action = PF_BINAT;
 			r.af = $4;
 			r.rtableid = $7;
 			if ($5 != NULL) {
 				if ($5->next != NULL) {
 					yyerror("proto list expansion"
 					    " not supported in binat-anchor");
 					YYERROR;
 				}
 				r.proto = $5->proto;
 				free($5);
 			}
 
 			if ($6.src.host != NULL || $6.src.port != NULL ||
 			    $6.dst.host != NULL || $6.dst.port != NULL) {
 				yyerror("fromto parameter not supported"
 				    " in binat-anchor");
 				YYERROR;
 			}
 
 			decide_address_family($6.src.host, &r.af);
 			decide_address_family($6.dst.host, &r.af);
 
 			pfctl_add_rule(pf, &r, $2);
 			free($2);
 		}
 		;
 
 loadrule	: LOAD ANCHOR string FROM string	{
 			struct loadanchors	*loadanchor;
 
 			if (strlen(pf->anchor->name) + 1 +
 			    strlen($3) >= MAXPATHLEN) {
 				yyerror("anchorname %s too long, max %u\n",
 				    $3, MAXPATHLEN - 1);
 				free($3);
 				YYERROR;
 			}
 			loadanchor = calloc(1, sizeof(struct loadanchors));
 			if (loadanchor == NULL)
 				err(1, "loadrule: calloc");
 			if ((loadanchor->anchorname = malloc(MAXPATHLEN)) ==
 			    NULL)
 				err(1, "loadrule: malloc");
 			if (pf->anchor->name[0])
 				snprintf(loadanchor->anchorname, MAXPATHLEN,
 				    "%s/%s", pf->anchor->name, $3);
 			else
 				strlcpy(loadanchor->anchorname, $3, MAXPATHLEN);
 			if ((loadanchor->filename = strdup($5)) == NULL)
 				err(1, "loadrule: strdup");
 
 			TAILQ_INSERT_TAIL(&loadanchorshead, loadanchor,
 			    entries);
 
 			free($3);
 			free($5);
 		};
 
 scrubaction	: no SCRUB {
 			$$.b2 = $$.w = 0;
 			if ($1)
 				$$.b1 = PF_NOSCRUB;
 			else
 				$$.b1 = PF_SCRUB;
 		}
 		;
 
 scrubrule	: scrubaction dir logquick interface af proto fromto scrub_opts
 		{
 			struct pf_rule	r;
 
 			if (check_rulestate(PFCTL_STATE_SCRUB))
 				YYERROR;
 
 			memset(&r, 0, sizeof(r));
 
 			r.action = $1.b1;
 			r.direction = $2;
 
 			r.log = $3.log;
 			r.logif = $3.logif;
 			if ($3.quick) {
 				yyerror("scrub rules do not support 'quick'");
 				YYERROR;
 			}
 
 			r.af = $5;
 			if ($8.nodf)
 				r.rule_flag |= PFRULE_NODF;
 			if ($8.randomid)
 				r.rule_flag |= PFRULE_RANDOMID;
 			if ($8.reassemble_tcp) {
 				if (r.direction != PF_INOUT) {
 					yyerror("reassemble tcp rules can not "
 					    "specify direction");
 					YYERROR;
 				}
 				r.rule_flag |= PFRULE_REASSEMBLE_TCP;
 			}
 			if ($8.minttl)
 				r.min_ttl = $8.minttl;
 			if ($8.maxmss)
 				r.max_mss = $8.maxmss;
 			if ($8.marker & SOM_SETTOS) {
 				r.rule_flag |= PFRULE_SET_TOS;
 				r.set_tos = $8.settos;
 			}
 			if ($8.fragcache)
 				r.rule_flag |= $8.fragcache;
 			if ($8.match_tag)
 				if (strlcpy(r.match_tagname, $8.match_tag,
 				    PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) {
 					yyerror("tag too long, max %u chars",
 					    PF_TAG_NAME_SIZE - 1);
 					YYERROR;
 				}
 			r.match_tag_not = $8.match_tag_not;
 			r.rtableid = $8.rtableid;
 
 			expand_rule(&r, $4, NULL, $6, $7.src_os,
 			    $7.src.host, $7.src.port, $7.dst.host, $7.dst.port,
 			    NULL, NULL, NULL, "");
 		}
 		;
 
 scrub_opts	:	{
 				bzero(&scrub_opts, sizeof scrub_opts);
 				scrub_opts.rtableid = -1;
 			}
 		    scrub_opts_l
 			{ $$ = scrub_opts; }
 		| /* empty */ {
 			bzero(&scrub_opts, sizeof scrub_opts);
 			scrub_opts.rtableid = -1;
 			$$ = scrub_opts;
 		}
 		;
 
 scrub_opts_l	: scrub_opts_l scrub_opt
 		| scrub_opt
 		;
 
 scrub_opt	: NODF	{
 			if (scrub_opts.nodf) {
 				yyerror("no-df cannot be respecified");
 				YYERROR;
 			}
 			scrub_opts.nodf = 1;
 		}
 		| MINTTL NUMBER {
 			if (scrub_opts.marker & SOM_MINTTL) {
 				yyerror("min-ttl cannot be respecified");
 				YYERROR;
 			}
 			if ($2 < 0 || $2 > 255) {
 				yyerror("illegal min-ttl value %d", $2);
 				YYERROR;
 			}
 			scrub_opts.marker |= SOM_MINTTL;
 			scrub_opts.minttl = $2;
 		}
 		| MAXMSS NUMBER {
 			if (scrub_opts.marker & SOM_MAXMSS) {
 				yyerror("max-mss cannot be respecified");
 				YYERROR;
 			}
 			if ($2 < 0 || $2 > 65535) {
 				yyerror("illegal max-mss value %d", $2);
 				YYERROR;
 			}
 			scrub_opts.marker |= SOM_MAXMSS;
 			scrub_opts.maxmss = $2;
 		}
 		| SETTOS tos {
 			if (scrub_opts.marker & SOM_SETTOS) {
 				yyerror("set-tos cannot be respecified");
 				YYERROR;
 			}
 			scrub_opts.marker |= SOM_SETTOS;
 			scrub_opts.settos = $2;
 		}
 		| fragcache {
 			if (scrub_opts.marker & SOM_FRAGCACHE) {
 				yyerror("fragcache cannot be respecified");
 				YYERROR;
 			}
 			scrub_opts.marker |= SOM_FRAGCACHE;
 			scrub_opts.fragcache = $1;
 		}
 		| REASSEMBLE STRING {
 			if (strcasecmp($2, "tcp") != 0) {
 				yyerror("scrub reassemble supports only tcp, "
 				    "not '%s'", $2);
 				free($2);
 				YYERROR;
 			}
 			free($2);
 			if (scrub_opts.reassemble_tcp) {
 				yyerror("reassemble tcp cannot be respecified");
 				YYERROR;
 			}
 			scrub_opts.reassemble_tcp = 1;
 		}
 		| RANDOMID {
 			if (scrub_opts.randomid) {
 				yyerror("random-id cannot be respecified");
 				YYERROR;
 			}
 			scrub_opts.randomid = 1;
 		}
 		| RTABLE NUMBER				{
 			if ($2 < 0 || $2 > rt_tableid_max()) {
 				yyerror("invalid rtable id");
 				YYERROR;
 			}
 			scrub_opts.rtableid = $2;
 		}
 		| not TAGGED string			{
 			scrub_opts.match_tag = $3;
 			scrub_opts.match_tag_not = $1;
 		}
 		;
 
 fragcache	: FRAGMENT REASSEMBLE	{ $$ = 0; /* default */ }
 		| FRAGMENT FRAGCROP	{ $$ = 0; }
 		| FRAGMENT FRAGDROP	{ $$ = 0; }
 		;
 
 antispoof	: ANTISPOOF logquick antispoof_ifspc af antispoof_opts {
 			struct pf_rule		 r;
 			struct node_host	*h = NULL, *hh;
 			struct node_if		*i, *j;
 
 			if (check_rulestate(PFCTL_STATE_FILTER))
 				YYERROR;
 
 			for (i = $3; i; i = i->next) {
 				bzero(&r, sizeof(r));
 
 				r.action = PF_DROP;
 				r.direction = PF_IN;
 				r.log = $2.log;
 				r.logif = $2.logif;
 				r.quick = $2.quick;
 				r.af = $4;
 				if (rule_label(&r, $5.label))
 					YYERROR;
 				r.rtableid = $5.rtableid;
 				j = calloc(1, sizeof(struct node_if));
 				if (j == NULL)
 					err(1, "antispoof: calloc");
 				if (strlcpy(j->ifname, i->ifname,
 				    sizeof(j->ifname)) >= sizeof(j->ifname)) {
 					free(j);
 					yyerror("interface name too long");
 					YYERROR;
 				}
 				j->not = 1;
 				if (i->dynamic) {
 					h = calloc(1, sizeof(*h));
 					if (h == NULL)
 						err(1, "address: calloc");
 					h->addr.type = PF_ADDR_DYNIFTL;
 					set_ipmask(h, 128);
 					if (strlcpy(h->addr.v.ifname, i->ifname,
 					    sizeof(h->addr.v.ifname)) >=
 					    sizeof(h->addr.v.ifname)) {
 						free(h);
 						yyerror(
 						    "interface name too long");
 						YYERROR;
 					}
 					hh = malloc(sizeof(*hh));
 					if (hh == NULL)
 						 err(1, "address: malloc");
 					bcopy(h, hh, sizeof(*hh));
 					h->addr.iflags = PFI_AFLAG_NETWORK;
 				} else {
 					h = ifa_lookup(j->ifname,
 					    PFI_AFLAG_NETWORK);
 					hh = NULL;
 				}
 
 				if (h != NULL)
 					expand_rule(&r, j, NULL, NULL, NULL, h,
 					    NULL, NULL, NULL, NULL, NULL,
 					    NULL, "");
 
 				if ((i->ifa_flags & IFF_LOOPBACK) == 0) {
 					bzero(&r, sizeof(r));
 
 					r.action = PF_DROP;
 					r.direction = PF_IN;
 					r.log = $2.log;
 					r.logif = $2.logif;
 					r.quick = $2.quick;
 					r.af = $4;
 					if (rule_label(&r, $5.label))
 						YYERROR;
 					r.rtableid = $5.rtableid;
 					if (hh != NULL)
 						h = hh;
 					else
 						h = ifa_lookup(i->ifname, 0);
 					if (h != NULL)
 						expand_rule(&r, NULL, NULL,
 						    NULL, NULL, h, NULL, NULL,
 						    NULL, NULL, NULL, NULL, "");
 				} else
 					free(hh);
 			}
 			free($5.label);
 		}
 		;
 
 antispoof_ifspc	: FOR antispoof_if			{ $$ = $2; }
 		| FOR '{' optnl antispoof_iflst '}'	{ $$ = $4; }
 		;
 
 antispoof_iflst	: antispoof_if optnl			{ $$ = $1; }
 		| antispoof_iflst comma antispoof_if optnl {
 			$1->tail->next = $3;
 			$1->tail = $3;
 			$$ = $1;
 		}
 		;
 
 antispoof_if	: if_item				{ $$ = $1; }
 		| '(' if_item ')'			{
 			$2->dynamic = 1;
 			$$ = $2;
 		}
 		;
 
 antispoof_opts	:	{
 				bzero(&antispoof_opts, sizeof antispoof_opts);
 				antispoof_opts.rtableid = -1;
 			}
 		    antispoof_opts_l
 			{ $$ = antispoof_opts; }
 		| /* empty */	{
 			bzero(&antispoof_opts, sizeof antispoof_opts);
 			antispoof_opts.rtableid = -1;
 			$$ = antispoof_opts;
 		}
 		;
 
 antispoof_opts_l	: antispoof_opts_l antispoof_opt
 			| antispoof_opt
 			;
 
 antispoof_opt	: label	{
 			if (antispoof_opts.label) {
 				yyerror("label cannot be redefined");
 				YYERROR;
 			}
 			antispoof_opts.label = $1;
 		}
 		| RTABLE NUMBER				{
 			if ($2 < 0 || $2 > rt_tableid_max()) {
 				yyerror("invalid rtable id");
 				YYERROR;
 			}
 			antispoof_opts.rtableid = $2;
 		}
 		;
 
 not		: '!'		{ $$ = 1; }
 		| /* empty */	{ $$ = 0; }
 		;
 
 tabledef	: TABLE '<' STRING '>' table_opts {
 			struct node_host	 *h, *nh;
 			struct node_tinit	 *ti, *nti;
 
 			if (strlen($3) >= PF_TABLE_NAME_SIZE) {
 				yyerror("table name too long, max %d chars",
 				    PF_TABLE_NAME_SIZE - 1);
 				free($3);
 				YYERROR;
 			}
 			if (pf->loadopt & PFCTL_FLAG_TABLE)
 				if (process_tabledef($3, &$5)) {
 					free($3);
 					YYERROR;
 				}
 			free($3);
 			for (ti = SIMPLEQ_FIRST(&$5.init_nodes);
 			    ti != SIMPLEQ_END(&$5.init_nodes); ti = nti) {
 				if (ti->file)
 					free(ti->file);
 				for (h = ti->host; h != NULL; h = nh) {
 					nh = h->next;
 					free(h);
 				}
 				nti = SIMPLEQ_NEXT(ti, entries);
 				free(ti);
 			}
 		}
 		;
 
 table_opts	:	{
 			bzero(&table_opts, sizeof table_opts);
 			SIMPLEQ_INIT(&table_opts.init_nodes);
 		}
 		    table_opts_l
 			{ $$ = table_opts; }
 		| /* empty */
 			{
 			bzero(&table_opts, sizeof table_opts);
 			SIMPLEQ_INIT(&table_opts.init_nodes);
 			$$ = table_opts;
 		}
 		;
 
 table_opts_l	: table_opts_l table_opt
 		| table_opt
 		;
 
 table_opt	: STRING		{
 			if (!strcmp($1, "const"))
 				table_opts.flags |= PFR_TFLAG_CONST;
 			else if (!strcmp($1, "persist"))
 				table_opts.flags |= PFR_TFLAG_PERSIST;
 			else if (!strcmp($1, "counters"))
 				table_opts.flags |= PFR_TFLAG_COUNTERS;
 			else {
 				yyerror("invalid table option '%s'", $1);
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		| '{' optnl '}'		{ table_opts.init_addr = 1; }
 		| '{' optnl host_list '}'	{
 			struct node_host	*n;
 			struct node_tinit	*ti;
 
 			for (n = $3; n != NULL; n = n->next) {
 				switch (n->addr.type) {
 				case PF_ADDR_ADDRMASK:
 					continue; /* ok */
 				case PF_ADDR_RANGE:
 					yyerror("address ranges are not "
 					    "permitted inside tables");
 					break;
 				case PF_ADDR_DYNIFTL:
 					yyerror("dynamic addresses are not "
 					    "permitted inside tables");
 					break;
 				case PF_ADDR_TABLE:
 					yyerror("tables cannot contain tables");
 					break;
 				case PF_ADDR_NOROUTE:
 					yyerror("\"no-route\" is not permitted "
 					    "inside tables");
 					break;
 				case PF_ADDR_URPFFAILED:
 					yyerror("\"urpf-failed\" is not "
 					    "permitted inside tables");
 					break;
 				default:
 					yyerror("unknown address type %d",
 					    n->addr.type);
 				}
 				YYERROR;
 			}
 			if (!(ti = calloc(1, sizeof(*ti))))
 				err(1, "table_opt: calloc");
 			ti->host = $3;
 			SIMPLEQ_INSERT_TAIL(&table_opts.init_nodes, ti,
 			    entries);
 			table_opts.init_addr = 1;
 		}
 		| FILENAME STRING	{
 			struct node_tinit	*ti;
 
 			if (!(ti = calloc(1, sizeof(*ti))))
 				err(1, "table_opt: calloc");
 			ti->file = $2;
 			SIMPLEQ_INSERT_TAIL(&table_opts.init_nodes, ti,
 			    entries);
 			table_opts.init_addr = 1;
 		}
 		;
 
 altqif		: ALTQ interface queue_opts QUEUE qassign {
 			struct pf_altq	a;
 
 			if (check_rulestate(PFCTL_STATE_QUEUE))
 				YYERROR;
 
 			memset(&a, 0, sizeof(a));
 			if ($3.scheduler.qtype == ALTQT_NONE) {
 				yyerror("no scheduler specified!");
 				YYERROR;
 			}
 			a.scheduler = $3.scheduler.qtype;
 			a.qlimit = $3.qlimit;
 			a.tbrsize = $3.tbrsize;
 			if ($5 == NULL && $3.scheduler.qtype != ALTQT_CODEL) {
 				yyerror("no child queues specified");
 				YYERROR;
 			}
 			if (expand_altq(&a, $2, $5, $3.queue_bwspec,
 			    &$3.scheduler))
 				YYERROR;
 		}
 		;
 
 queuespec	: QUEUE STRING interface queue_opts qassign {
 			struct pf_altq	a;
 
 			if (check_rulestate(PFCTL_STATE_QUEUE)) {
 				free($2);
 				YYERROR;
 			}
 
 			memset(&a, 0, sizeof(a));
 
 			if (strlcpy(a.qname, $2, sizeof(a.qname)) >=
 			    sizeof(a.qname)) {
 				yyerror("queue name too long (max "
 				    "%d chars)", PF_QNAME_SIZE-1);
 				free($2);
 				YYERROR;
 			}
 			free($2);
 			if ($4.tbrsize) {
 				yyerror("cannot specify tbrsize for queue");
 				YYERROR;
 			}
 			if ($4.priority > 255) {
 				yyerror("priority out of range: max 255");
 				YYERROR;
 			}
 			a.priority = $4.priority;
 			a.qlimit = $4.qlimit;
 			a.scheduler = $4.scheduler.qtype;
 			if (expand_queue(&a, $3, $5, $4.queue_bwspec,
 			    &$4.scheduler)) {
 				yyerror("errors in queue definition");
 				YYERROR;
 			}
 		}
 		;
 
 queue_opts	:	{
 			bzero(&queue_opts, sizeof queue_opts);
 			queue_opts.priority = DEFAULT_PRIORITY;
 			queue_opts.qlimit = DEFAULT_QLIMIT;
 			queue_opts.scheduler.qtype = ALTQT_NONE;
 			queue_opts.queue_bwspec.bw_percent = 100;
 		}
 		    queue_opts_l
 			{ $$ = queue_opts; }
 		| /* empty */ {
 			bzero(&queue_opts, sizeof queue_opts);
 			queue_opts.priority = DEFAULT_PRIORITY;
 			queue_opts.qlimit = DEFAULT_QLIMIT;
 			queue_opts.scheduler.qtype = ALTQT_NONE;
 			queue_opts.queue_bwspec.bw_percent = 100;
 			$$ = queue_opts;
 		}
 		;
 
 queue_opts_l	: queue_opts_l queue_opt
 		| queue_opt
 		;
 
 queue_opt	: BANDWIDTH bandwidth	{
 			if (queue_opts.marker & QOM_BWSPEC) {
 				yyerror("bandwidth cannot be respecified");
 				YYERROR;
 			}
 			queue_opts.marker |= QOM_BWSPEC;
 			queue_opts.queue_bwspec = $2;
 		}
 		| PRIORITY NUMBER	{
 			if (queue_opts.marker & QOM_PRIORITY) {
 				yyerror("priority cannot be respecified");
 				YYERROR;
 			}
 			if ($2 < 0 || $2 > 255) {
 				yyerror("priority out of range: max 255");
 				YYERROR;
 			}
 			queue_opts.marker |= QOM_PRIORITY;
 			queue_opts.priority = $2;
 		}
 		| QLIMIT NUMBER	{
 			if (queue_opts.marker & QOM_QLIMIT) {
 				yyerror("qlimit cannot be respecified");
 				YYERROR;
 			}
 			if ($2 < 0 || $2 > 65535) {
 				yyerror("qlimit out of range: max 65535");
 				YYERROR;
 			}
 			queue_opts.marker |= QOM_QLIMIT;
 			queue_opts.qlimit = $2;
 		}
 		| scheduler	{
 			if (queue_opts.marker & QOM_SCHEDULER) {
 				yyerror("scheduler cannot be respecified");
 				YYERROR;
 			}
 			queue_opts.marker |= QOM_SCHEDULER;
 			queue_opts.scheduler = $1;
 		}
 		| TBRSIZE NUMBER	{
 			if (queue_opts.marker & QOM_TBRSIZE) {
 				yyerror("tbrsize cannot be respecified");
 				YYERROR;
 			}
 			if ($2 < 0 || $2 > 65535) {
 				yyerror("tbrsize too big: max 65535");
 				YYERROR;
 			}
 			queue_opts.marker |= QOM_TBRSIZE;
 			queue_opts.tbrsize = $2;
 		}
 		;
 
 bandwidth	: STRING {
 			double	 bps;
 			char	*cp;
 
 			$$.bw_percent = 0;
 
 			bps = strtod($1, &cp);
 			if (cp != NULL) {
 				if (strlen(cp) > 1) {
 					char *cu = cp + 1;
 					if (!strcmp(cu, "Bit") ||
 					    !strcmp(cu, "B") ||
 					    !strcmp(cu, "bit") ||
 					    !strcmp(cu, "b")) {
 						*cu = 0;
 					}
 				}
 				if (!strcmp(cp, "b"))
 					; /* nothing */
 				else if (!strcmp(cp, "K"))
 					bps *= 1000;
 				else if (!strcmp(cp, "M"))
 					bps *= 1000 * 1000;
 				else if (!strcmp(cp, "G"))
 					bps *= 1000 * 1000 * 1000;
 				else if (!strcmp(cp, "%")) {
 					if (bps < 0 || bps > 100) {
 						yyerror("bandwidth spec "
 						    "out of range");
 						free($1);
 						YYERROR;
 					}
 					$$.bw_percent = bps;
 					bps = 0;
 				} else {
 					yyerror("unknown unit %s", cp);
 					free($1);
 					YYERROR;
 				}
 			}
 			free($1);
 			$$.bw_absolute = (u_int32_t)bps;
 		}
 		| NUMBER {
 			if ($1 < 0 || $1 > UINT_MAX) {
 				yyerror("bandwidth number too big");
 				YYERROR;
 			}
 			$$.bw_percent = 0;
 			$$.bw_absolute = $1;
 		}
 		;
 
 scheduler	: CBQ				{
 			$$.qtype = ALTQT_CBQ;
 			$$.data.cbq_opts.flags = 0;
 		}
 		| CBQ '(' cbqflags_list ')'	{
 			$$.qtype = ALTQT_CBQ;
 			$$.data.cbq_opts.flags = $3;
 		}
 		| PRIQ				{
 			$$.qtype = ALTQT_PRIQ;
 			$$.data.priq_opts.flags = 0;
 		}
 		| PRIQ '(' priqflags_list ')'	{
 			$$.qtype = ALTQT_PRIQ;
 			$$.data.priq_opts.flags = $3;
 		}
 		| HFSC				{
 			$$.qtype = ALTQT_HFSC;
 			bzero(&$$.data.hfsc_opts,
 			    sizeof(struct node_hfsc_opts));
 		}
 		| HFSC '(' hfsc_opts ')'	{
 			$$.qtype = ALTQT_HFSC;
 			$$.data.hfsc_opts = $3;
 		}
 		| FAIRQ				{
 			$$.qtype = ALTQT_FAIRQ;
 			bzero(&$$.data.fairq_opts,
 				sizeof(struct node_fairq_opts));
 		}
 		| FAIRQ '(' fairq_opts ')'      {
 			$$.qtype = ALTQT_FAIRQ;
 			$$.data.fairq_opts = $3;
 		}
 		| CODEL				{
 			$$.qtype = ALTQT_CODEL;
 			bzero(&$$.data.codel_opts,
 				sizeof(struct codel_opts));
 		}
 		| CODEL '(' codel_opts ')'	{
 			$$.qtype = ALTQT_CODEL;
 			$$.data.codel_opts = $3;
 		}
 		;
 
 cbqflags_list	: cbqflags_item				{ $$ |= $1; }
 		| cbqflags_list comma cbqflags_item	{ $$ |= $3; }
 		;
 
 cbqflags_item	: STRING	{
 			if (!strcmp($1, "default"))
 				$$ = CBQCLF_DEFCLASS;
 			else if (!strcmp($1, "borrow"))
 				$$ = CBQCLF_BORROW;
 			else if (!strcmp($1, "red"))
 				$$ = CBQCLF_RED;
 			else if (!strcmp($1, "ecn"))
 				$$ = CBQCLF_RED|CBQCLF_ECN;
 			else if (!strcmp($1, "rio"))
 				$$ = CBQCLF_RIO;
 			else if (!strcmp($1, "codel"))
 				$$ = CBQCLF_CODEL;
 			else {
 				yyerror("unknown cbq flag \"%s\"", $1);
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		;
 
 priqflags_list	: priqflags_item			{ $$ |= $1; }
 		| priqflags_list comma priqflags_item	{ $$ |= $3; }
 		;
 
 priqflags_item	: STRING	{
 			if (!strcmp($1, "default"))
 				$$ = PRCF_DEFAULTCLASS;
 			else if (!strcmp($1, "red"))
 				$$ = PRCF_RED;
 			else if (!strcmp($1, "ecn"))
 				$$ = PRCF_RED|PRCF_ECN;
 			else if (!strcmp($1, "rio"))
 				$$ = PRCF_RIO;
 			else if (!strcmp($1, "codel"))
 				$$ = PRCF_CODEL;
 			else {
 				yyerror("unknown priq flag \"%s\"", $1);
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		;
 
 hfsc_opts	:	{
 				bzero(&hfsc_opts,
 				    sizeof(struct node_hfsc_opts));
 			}
 		    hfscopts_list				{
 			$$ = hfsc_opts;
 		}
 		;
 
 hfscopts_list	: hfscopts_item
 		| hfscopts_list comma hfscopts_item
 		;
 
 hfscopts_item	: LINKSHARE bandwidth				{
 			if (hfsc_opts.linkshare.used) {
 				yyerror("linkshare already specified");
 				YYERROR;
 			}
 			hfsc_opts.linkshare.m2 = $2;
 			hfsc_opts.linkshare.used = 1;
 		}
 		| LINKSHARE '(' bandwidth comma NUMBER comma bandwidth ')'
 		    {
 			if ($5 < 0 || $5 > INT_MAX) {
 				yyerror("timing in curve out of range");
 				YYERROR;
 			}
 			if (hfsc_opts.linkshare.used) {
 				yyerror("linkshare already specified");
 				YYERROR;
 			}
 			hfsc_opts.linkshare.m1 = $3;
 			hfsc_opts.linkshare.d = $5;
 			hfsc_opts.linkshare.m2 = $7;
 			hfsc_opts.linkshare.used = 1;
 		}
 		| REALTIME bandwidth				{
 			if (hfsc_opts.realtime.used) {
 				yyerror("realtime already specified");
 				YYERROR;
 			}
 			hfsc_opts.realtime.m2 = $2;
 			hfsc_opts.realtime.used = 1;
 		}
 		| REALTIME '(' bandwidth comma NUMBER comma bandwidth ')'
 		    {
 			if ($5 < 0 || $5 > INT_MAX) {
 				yyerror("timing in curve out of range");
 				YYERROR;
 			}
 			if (hfsc_opts.realtime.used) {
 				yyerror("realtime already specified");
 				YYERROR;
 			}
 			hfsc_opts.realtime.m1 = $3;
 			hfsc_opts.realtime.d = $5;
 			hfsc_opts.realtime.m2 = $7;
 			hfsc_opts.realtime.used = 1;
 		}
 		| UPPERLIMIT bandwidth				{
 			if (hfsc_opts.upperlimit.used) {
 				yyerror("upperlimit already specified");
 				YYERROR;
 			}
 			hfsc_opts.upperlimit.m2 = $2;
 			hfsc_opts.upperlimit.used = 1;
 		}
 		| UPPERLIMIT '(' bandwidth comma NUMBER comma bandwidth ')'
 		    {
 			if ($5 < 0 || $5 > INT_MAX) {
 				yyerror("timing in curve out of range");
 				YYERROR;
 			}
 			if (hfsc_opts.upperlimit.used) {
 				yyerror("upperlimit already specified");
 				YYERROR;
 			}
 			hfsc_opts.upperlimit.m1 = $3;
 			hfsc_opts.upperlimit.d = $5;
 			hfsc_opts.upperlimit.m2 = $7;
 			hfsc_opts.upperlimit.used = 1;
 		}
 		| STRING	{
 			if (!strcmp($1, "default"))
 				hfsc_opts.flags |= HFCF_DEFAULTCLASS;
 			else if (!strcmp($1, "red"))
 				hfsc_opts.flags |= HFCF_RED;
 			else if (!strcmp($1, "ecn"))
 				hfsc_opts.flags |= HFCF_RED|HFCF_ECN;
 			else if (!strcmp($1, "rio"))
 				hfsc_opts.flags |= HFCF_RIO;
 			else if (!strcmp($1, "codel"))
 				hfsc_opts.flags |= HFCF_CODEL;
 			else {
 				yyerror("unknown hfsc flag \"%s\"", $1);
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		;
 
 fairq_opts	:	{
 				bzero(&fairq_opts,
 				    sizeof(struct node_fairq_opts));
 			}
 		    fairqopts_list				{
 			$$ = fairq_opts;
 		}
 		;
 
 fairqopts_list	: fairqopts_item
 		| fairqopts_list comma fairqopts_item
 		;
 
 fairqopts_item	: LINKSHARE bandwidth				{
 			if (fairq_opts.linkshare.used) {
 				yyerror("linkshare already specified");
 				YYERROR;
 			}
 			fairq_opts.linkshare.m2 = $2;
 			fairq_opts.linkshare.used = 1;
 		}
 		| LINKSHARE '(' bandwidth number bandwidth ')'	{
 			if (fairq_opts.linkshare.used) {
 				yyerror("linkshare already specified");
 				YYERROR;
 			}
 			fairq_opts.linkshare.m1 = $3;
 			fairq_opts.linkshare.d = $4;
 			fairq_opts.linkshare.m2 = $5;
 			fairq_opts.linkshare.used = 1;
 		}
 		| HOGS bandwidth {
 			fairq_opts.hogs_bw = $2;
 		}
 		| BUCKETS number {
 			fairq_opts.nbuckets = $2;
 		}
 		| STRING	{
 			if (!strcmp($1, "default"))
 				fairq_opts.flags |= FARF_DEFAULTCLASS;
 			else if (!strcmp($1, "red"))
 				fairq_opts.flags |= FARF_RED;
 			else if (!strcmp($1, "ecn"))
 				fairq_opts.flags |= FARF_RED|FARF_ECN;
 			else if (!strcmp($1, "rio"))
 				fairq_opts.flags |= FARF_RIO;
 			else if (!strcmp($1, "codel"))
 				fairq_opts.flags |= FARF_CODEL;
 			else {
 				yyerror("unknown fairq flag \"%s\"", $1);
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		;
 
 codel_opts	:	{
 				bzero(&codel_opts,
 				    sizeof(struct codel_opts));
 			}
 		    codelopts_list				{
 			$$ = codel_opts;
 		}
 		;
 
 codelopts_list	: codelopts_item
 		| codelopts_list comma codelopts_item
 		;
 
 codelopts_item	: INTERVAL number				{
 			if (codel_opts.interval) {
 				yyerror("interval already specified");
 				YYERROR;
 			}
 			codel_opts.interval = $2;
 		}
 		| TARGET number					{
 			if (codel_opts.target) {
 				yyerror("target already specified");
 				YYERROR;
 			}
 			codel_opts.target = $2;
 		}
 		| STRING					{
 			if (!strcmp($1, "ecn"))
 				codel_opts.ecn = 1;
 			else {
 				yyerror("unknown codel option \"%s\"", $1);
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		;
 
 qassign		: /* empty */		{ $$ = NULL; }
 		| qassign_item		{ $$ = $1; }
 		| '{' optnl qassign_list '}'	{ $$ = $3; }
 		;
 
 qassign_list	: qassign_item optnl		{ $$ = $1; }
 		| qassign_list comma qassign_item optnl	{
 			$1->tail->next = $3;
 			$1->tail = $3;
 			$$ = $1;
 		}
 		;
 
 qassign_item	: STRING			{
 			$$ = calloc(1, sizeof(struct node_queue));
 			if ($$ == NULL)
 				err(1, "qassign_item: calloc");
 			if (strlcpy($$->queue, $1, sizeof($$->queue)) >=
 			    sizeof($$->queue)) {
 				yyerror("queue name '%s' too long (max "
 				    "%d chars)", $1, sizeof($$->queue)-1);
 				free($1);
 				free($$);
 				YYERROR;
 			}
 			free($1);
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		;
 
 pfrule		: action dir logquick interface route af proto fromto
 		    filter_opts
 		{
 			struct pf_rule		 r;
 			struct node_state_opt	*o;
 			struct node_proto	*proto;
 			int			 srctrack = 0;
 			int			 statelock = 0;
 			int			 adaptive = 0;
 			int			 defaults = 0;
 
 			if (check_rulestate(PFCTL_STATE_FILTER))
 				YYERROR;
 
 			memset(&r, 0, sizeof(r));
 
 			r.action = $1.b1;
 			switch ($1.b2) {
 			case PFRULE_RETURNRST:
 				r.rule_flag |= PFRULE_RETURNRST;
 				r.return_ttl = $1.w;
 				break;
 			case PFRULE_RETURNICMP:
 				r.rule_flag |= PFRULE_RETURNICMP;
 				r.return_icmp = $1.w;
 				r.return_icmp6 = $1.w2;
 				break;
 			case PFRULE_RETURN:
 				r.rule_flag |= PFRULE_RETURN;
 				r.return_icmp = $1.w;
 				r.return_icmp6 = $1.w2;
 				break;
 			}
 			r.direction = $2;
 			r.log = $3.log;
 			r.logif = $3.logif;
 			r.quick = $3.quick;
 			r.prob = $9.prob;
 			r.rtableid = $9.rtableid;
 
 			if ($9.marker & FOM_PRIO) {
 				if ($9.prio == 0)
 					r.prio = PF_PRIO_ZERO;
 				else
 					r.prio = $9.prio;
 			}
 			if ($9.marker & FOM_SETPRIO) {
 				r.set_prio[0] = $9.set_prio[0];
 				r.set_prio[1] = $9.set_prio[1];
 				r.scrub_flags |= PFSTATE_SETPRIO;
 			}
 
 			r.af = $6;
 			if ($9.tag)
 				if (strlcpy(r.tagname, $9.tag,
 				    PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) {
 					yyerror("tag too long, max %u chars",
 					    PF_TAG_NAME_SIZE - 1);
 					YYERROR;
 				}
 			if ($9.match_tag)
 				if (strlcpy(r.match_tagname, $9.match_tag,
 				    PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) {
 					yyerror("tag too long, max %u chars",
 					    PF_TAG_NAME_SIZE - 1);
 					YYERROR;
 				}
 			r.match_tag_not = $9.match_tag_not;
 			if (rule_label(&r, $9.label))
 				YYERROR;
 			free($9.label);
 			r.flags = $9.flags.b1;
 			r.flagset = $9.flags.b2;
 			if (($9.flags.b1 & $9.flags.b2) != $9.flags.b1) {
 				yyerror("flags always false");
 				YYERROR;
 			}
 			if ($9.flags.b1 || $9.flags.b2 || $8.src_os) {
 				for (proto = $7; proto != NULL &&
 				    proto->proto != IPPROTO_TCP;
 				    proto = proto->next)
 					;	/* nothing */
 				if (proto == NULL && $7 != NULL) {
 					if ($9.flags.b1 || $9.flags.b2)
 						yyerror(
 						    "flags only apply to tcp");
 					if ($8.src_os)
 						yyerror(
 						    "OS fingerprinting only "
 						    "apply to tcp");
 					YYERROR;
 				}
 #if 0
 				if (($9.flags.b1 & parse_flags("S")) == 0 &&
 				    $8.src_os) {
 					yyerror("OS fingerprinting requires "
 					    "the SYN TCP flag (flags S/SA)");
 					YYERROR;
 				}
 #endif
 			}
 
 			r.tos = $9.tos;
 			r.keep_state = $9.keep.action;
 			o = $9.keep.options;
 
 			/* 'keep state' by default on pass rules. */
 			if (!r.keep_state && !r.action &&
 			    !($9.marker & FOM_KEEP)) {
 				r.keep_state = PF_STATE_NORMAL;
 				o = keep_state_defaults;
 				defaults = 1;
 			}
 
 			while (o) {
 				struct node_state_opt	*p = o;
 
 				switch (o->type) {
 				case PF_STATE_OPT_MAX:
 					if (r.max_states) {
 						yyerror("state option 'max' "
 						    "multiple definitions");
 						YYERROR;
 					}
 					r.max_states = o->data.max_states;
 					break;
 				case PF_STATE_OPT_NOSYNC:
 					if (r.rule_flag & PFRULE_NOSYNC) {
 						yyerror("state option 'sync' "
 						    "multiple definitions");
 						YYERROR;
 					}
 					r.rule_flag |= PFRULE_NOSYNC;
 					break;
 				case PF_STATE_OPT_SRCTRACK:
 					if (srctrack) {
 						yyerror("state option "
 						    "'source-track' "
 						    "multiple definitions");
 						YYERROR;
 					}
 					srctrack =  o->data.src_track;
 					r.rule_flag |= PFRULE_SRCTRACK;
 					break;
 				case PF_STATE_OPT_MAX_SRC_STATES:
 					if (r.max_src_states) {
 						yyerror("state option "
 						    "'max-src-states' "
 						    "multiple definitions");
 						YYERROR;
 					}
 					if (o->data.max_src_states == 0) {
 						yyerror("'max-src-states' must "
 						    "be > 0");
 						YYERROR;
 					}
 					r.max_src_states =
 					    o->data.max_src_states;
 					r.rule_flag |= PFRULE_SRCTRACK;
 					break;
 				case PF_STATE_OPT_OVERLOAD:
 					if (r.overload_tblname[0]) {
 						yyerror("multiple 'overload' "
 						    "table definitions");
 						YYERROR;
 					}
 					if (strlcpy(r.overload_tblname,
 					    o->data.overload.tblname,
 					    PF_TABLE_NAME_SIZE) >=
 					    PF_TABLE_NAME_SIZE) {
 						yyerror("state option: "
 						    "strlcpy");
 						YYERROR;
 					}
 					r.flush = o->data.overload.flush;
 					break;
 				case PF_STATE_OPT_MAX_SRC_CONN:
 					if (r.max_src_conn) {
 						yyerror("state option "
 						    "'max-src-conn' "
 						    "multiple definitions");
 						YYERROR;
 					}
 					if (o->data.max_src_conn == 0) {
 						yyerror("'max-src-conn' "
 						    "must be > 0");
 						YYERROR;
 					}
 					r.max_src_conn =
 					    o->data.max_src_conn;
 					r.rule_flag |= PFRULE_SRCTRACK |
 					    PFRULE_RULESRCTRACK;
 					break;
 				case PF_STATE_OPT_MAX_SRC_CONN_RATE:
 					if (r.max_src_conn_rate.limit) {
 						yyerror("state option "
 						    "'max-src-conn-rate' "
 						    "multiple definitions");
 						YYERROR;
 					}
 					if (!o->data.max_src_conn_rate.limit ||
 					    !o->data.max_src_conn_rate.seconds) {
 						yyerror("'max-src-conn-rate' "
 						    "values must be > 0");
 						YYERROR;
 					}
 					if (o->data.max_src_conn_rate.limit >
 					    PF_THRESHOLD_MAX) {
 						yyerror("'max-src-conn-rate' "
 						    "maximum rate must be < %u",
 						    PF_THRESHOLD_MAX);
 						YYERROR;
 					}
 					r.max_src_conn_rate.limit =
 					    o->data.max_src_conn_rate.limit;
 					r.max_src_conn_rate.seconds =
 					    o->data.max_src_conn_rate.seconds;
 					r.rule_flag |= PFRULE_SRCTRACK |
 					    PFRULE_RULESRCTRACK;
 					break;
 				case PF_STATE_OPT_MAX_SRC_NODES:
 					if (r.max_src_nodes) {
 						yyerror("state option "
 						    "'max-src-nodes' "
 						    "multiple definitions");
 						YYERROR;
 					}
 					if (o->data.max_src_nodes == 0) {
 						yyerror("'max-src-nodes' must "
 						    "be > 0");
 						YYERROR;
 					}
 					r.max_src_nodes =
 					    o->data.max_src_nodes;
 					r.rule_flag |= PFRULE_SRCTRACK |
 					    PFRULE_RULESRCTRACK;
 					break;
 				case PF_STATE_OPT_STATELOCK:
 					if (statelock) {
 						yyerror("state locking option: "
 						    "multiple definitions");
 						YYERROR;
 					}
 					statelock = 1;
 					r.rule_flag |= o->data.statelock;
 					break;
 				case PF_STATE_OPT_SLOPPY:
 					if (r.rule_flag & PFRULE_STATESLOPPY) {
 						yyerror("state sloppy option: "
 						    "multiple definitions");
 						YYERROR;
 					}
 					r.rule_flag |= PFRULE_STATESLOPPY;
 					break;
 				case PF_STATE_OPT_TIMEOUT:
 					if (o->data.timeout.number ==
 					    PFTM_ADAPTIVE_START ||
 					    o->data.timeout.number ==
 					    PFTM_ADAPTIVE_END)
 						adaptive = 1;
 					if (r.timeout[o->data.timeout.number]) {
 						yyerror("state timeout %s "
 						    "multiple definitions",
 						    pf_timeouts[o->data.
 						    timeout.number].name);
 						YYERROR;
 					}
 					r.timeout[o->data.timeout.number] =
 					    o->data.timeout.seconds;
 				}
 				o = o->next;
 				if (!defaults)
 					free(p);
 			}
 
 			/* 'flags S/SA' by default on stateful rules */
 			if (!r.action && !r.flags && !r.flagset &&
 			    !$9.fragment && !($9.marker & FOM_FLAGS) &&
 			    r.keep_state) {
 				r.flags = parse_flags("S");
 				r.flagset =  parse_flags("SA");
 			}
 			if (!adaptive && r.max_states) {
 				r.timeout[PFTM_ADAPTIVE_START] =
 				    (r.max_states / 10) * 6;
 				r.timeout[PFTM_ADAPTIVE_END] =
 				    (r.max_states / 10) * 12;
 			}
 			if (r.rule_flag & PFRULE_SRCTRACK) {
 				if (srctrack == PF_SRCTRACK_GLOBAL &&
 				    r.max_src_nodes) {
 					yyerror("'max-src-nodes' is "
 					    "incompatible with "
 					    "'source-track global'");
 					YYERROR;
 				}
 				if (srctrack == PF_SRCTRACK_GLOBAL &&
 				    r.max_src_conn) {
 					yyerror("'max-src-conn' is "
 					    "incompatible with "
 					    "'source-track global'");
 					YYERROR;
 				}
 				if (srctrack == PF_SRCTRACK_GLOBAL &&
 				    r.max_src_conn_rate.seconds) {
 					yyerror("'max-src-conn-rate' is "
 					    "incompatible with "
 					    "'source-track global'");
 					YYERROR;
 				}
 				if (r.timeout[PFTM_SRC_NODE] <
 				    r.max_src_conn_rate.seconds)
 					r.timeout[PFTM_SRC_NODE] =
 					    r.max_src_conn_rate.seconds;
 				r.rule_flag |= PFRULE_SRCTRACK;
 				if (srctrack == PF_SRCTRACK_RULE)
 					r.rule_flag |= PFRULE_RULESRCTRACK;
 			}
 			if (r.keep_state && !statelock)
 				r.rule_flag |= default_statelock;
 
 			if ($9.fragment)
 				r.rule_flag |= PFRULE_FRAGMENT;
 			r.allow_opts = $9.allowopts;
 
 			decide_address_family($8.src.host, &r.af);
 			decide_address_family($8.dst.host, &r.af);
 
 			if ($5.rt) {
 				if (!r.direction) {
 					yyerror("direction must be explicit "
 					    "with rules that specify routing");
 					YYERROR;
 				}
 				r.rt = $5.rt;
 				r.rpool.opts = $5.pool_opts;
 				if ($5.key != NULL)
 					memcpy(&r.rpool.key, $5.key,
 					    sizeof(struct pf_poolhashkey));
 			}
 			if (r.rt && r.rt != PF_FASTROUTE) {
 				decide_address_family($5.host, &r.af);
 				remove_invalid_hosts(&$5.host, &r.af);
 				if ($5.host == NULL) {
 					yyerror("no routing address with "
 					    "matching address family found.");
 					YYERROR;
 				}
 				if ((r.rpool.opts & PF_POOL_TYPEMASK) ==
 				    PF_POOL_NONE && ($5.host->next != NULL ||
 				    $5.host->addr.type == PF_ADDR_TABLE ||
 				    DYNIF_MULTIADDR($5.host->addr)))
 					r.rpool.opts |= PF_POOL_ROUNDROBIN;
 				if ((r.rpool.opts & PF_POOL_TYPEMASK) !=
 				    PF_POOL_ROUNDROBIN &&
 				    disallow_table($5.host, "tables are only "
 				    "supported in round-robin routing pools"))
 					YYERROR;
 				if ((r.rpool.opts & PF_POOL_TYPEMASK) !=
 				    PF_POOL_ROUNDROBIN &&
 				    disallow_alias($5.host, "interface (%s) "
 				    "is only supported in round-robin "
 				    "routing pools"))
 					YYERROR;
 				if ($5.host->next != NULL) {
 					if ((r.rpool.opts & PF_POOL_TYPEMASK) !=
 					    PF_POOL_ROUNDROBIN) {
 						yyerror("r.rpool.opts must "
 						    "be PF_POOL_ROUNDROBIN");
 						YYERROR;
 					}
 				}
 			}
 			if ($9.queues.qname != NULL) {
 				if (strlcpy(r.qname, $9.queues.qname,
 				    sizeof(r.qname)) >= sizeof(r.qname)) {
 					yyerror("rule qname too long (max "
 					    "%d chars)", sizeof(r.qname)-1);
 					YYERROR;
 				}
 				free($9.queues.qname);
 			}
 			if ($9.queues.pqname != NULL) {
 				if (strlcpy(r.pqname, $9.queues.pqname,
 				    sizeof(r.pqname)) >= sizeof(r.pqname)) {
 					yyerror("rule pqname too long (max "
 					    "%d chars)", sizeof(r.pqname)-1);
 					YYERROR;
 				}
 				free($9.queues.pqname);
 			}
 #ifdef __FreeBSD__
 			r.divert.port = $9.divert.port;
 #else
 			if ((r.divert.port = $9.divert.port)) {
 				if (r.direction == PF_OUT) {
 					if ($9.divert.addr) {
 						yyerror("address specified "
 						    "for outgoing divert");
 						YYERROR;
 					}
 					bzero(&r.divert.addr,
 					    sizeof(r.divert.addr));
 				} else {
 					if (!$9.divert.addr) {
 						yyerror("no address specified "
 						    "for incoming divert");
 						YYERROR;
 					}
 					if ($9.divert.addr->af != r.af) {
 						yyerror("address family "
 						    "mismatch for divert");
 						YYERROR;
 					}
 					r.divert.addr =
 					    $9.divert.addr->addr.v.a.addr;
 				}
 			}
 #endif
 
 			expand_rule(&r, $4, $5.host, $7, $8.src_os,
 			    $8.src.host, $8.src.port, $8.dst.host, $8.dst.port,
 			    $9.uid, $9.gid, $9.icmpspec, "");
 		}
 		;
 
 filter_opts	:	{
 				bzero(&filter_opts, sizeof filter_opts);
 				filter_opts.rtableid = -1;
 			}
 		    filter_opts_l
 			{ $$ = filter_opts; }
 		| /* empty */	{
 			bzero(&filter_opts, sizeof filter_opts);
 			filter_opts.rtableid = -1;
 			$$ = filter_opts;
 		}
 		;
 
 filter_opts_l	: filter_opts_l filter_opt
 		| filter_opt
 		;
 
 filter_opt	: USER uids {
 			if (filter_opts.uid)
 				$2->tail->next = filter_opts.uid;
 			filter_opts.uid = $2;
 		}
 		| GROUP gids {
 			if (filter_opts.gid)
 				$2->tail->next = filter_opts.gid;
 			filter_opts.gid = $2;
 		}
 		| flags {
 			if (filter_opts.marker & FOM_FLAGS) {
 				yyerror("flags cannot be redefined");
 				YYERROR;
 			}
 			filter_opts.marker |= FOM_FLAGS;
 			filter_opts.flags.b1 |= $1.b1;
 			filter_opts.flags.b2 |= $1.b2;
 			filter_opts.flags.w |= $1.w;
 			filter_opts.flags.w2 |= $1.w2;
 		}
 		| icmpspec {
 			if (filter_opts.marker & FOM_ICMP) {
 				yyerror("icmp-type cannot be redefined");
 				YYERROR;
 			}
 			filter_opts.marker |= FOM_ICMP;
 			filter_opts.icmpspec = $1;
 		}
 		| PRIO NUMBER {
 			if (filter_opts.marker & FOM_PRIO) {
 				yyerror("prio cannot be redefined");
 				YYERROR;
 			}
 			if ($2 < 0 || $2 > PF_PRIO_MAX) {
 				yyerror("prio must be 0 - %u", PF_PRIO_MAX);
 				YYERROR;
 			}
 			filter_opts.marker |= FOM_PRIO;
 			filter_opts.prio = $2;
 		}
 		| TOS tos {
 			if (filter_opts.marker & FOM_TOS) {
 				yyerror("tos cannot be redefined");
 				YYERROR;
 			}
 			filter_opts.marker |= FOM_TOS;
 			filter_opts.tos = $2;
 		}
 		| keep {
 			if (filter_opts.marker & FOM_KEEP) {
 				yyerror("modulate or keep cannot be redefined");
 				YYERROR;
 			}
 			filter_opts.marker |= FOM_KEEP;
 			filter_opts.keep.action = $1.action;
 			filter_opts.keep.options = $1.options;
 		}
 		| FRAGMENT {
 			filter_opts.fragment = 1;
 		}
 		| ALLOWOPTS {
 			filter_opts.allowopts = 1;
 		}
 		| label	{
 			if (filter_opts.label) {
 				yyerror("label cannot be redefined");
 				YYERROR;
 			}
 			filter_opts.label = $1;
 		}
 		| qname	{
 			if (filter_opts.queues.qname) {
 				yyerror("queue cannot be redefined");
 				YYERROR;
 			}
 			filter_opts.queues = $1;
 		}
 		| TAG string				{
 			filter_opts.tag = $2;
 		}
 		| not TAGGED string			{
 			filter_opts.match_tag = $3;
 			filter_opts.match_tag_not = $1;
 		}
 		| PROBABILITY probability		{
 			double	p;
 
 			p = floor($2 * UINT_MAX + 0.5);
 			if (p < 0.0 || p > UINT_MAX) {
 				yyerror("invalid probability: %lf", p);
 				YYERROR;
 			}
 			filter_opts.prob = (u_int32_t)p;
 			if (filter_opts.prob == 0)
 				filter_opts.prob = 1;
 		}
 		| RTABLE NUMBER				{
 			if ($2 < 0 || $2 > rt_tableid_max()) {
 				yyerror("invalid rtable id");
 				YYERROR;
 			}
 			filter_opts.rtableid = $2;
 		}
 		| DIVERTTO portplain {
 #ifdef __FreeBSD__
 			filter_opts.divert.port = $2.a;
 			if (!filter_opts.divert.port) {
 				yyerror("invalid divert port: %u", ntohs($2.a));
 				YYERROR;
 			}
 #endif
 		}
 		| DIVERTTO STRING PORT portplain {
 #ifndef __FreeBSD__
 			if ((filter_opts.divert.addr = host($2)) == NULL) {
 				yyerror("could not parse divert address: %s",
 				    $2);
 				free($2);
 				YYERROR;
 			}
 #else
 			if ($2)
 #endif
 			free($2);
 			filter_opts.divert.port = $4.a;
 			if (!filter_opts.divert.port) {
 				yyerror("invalid divert port: %u", ntohs($4.a));
 				YYERROR;
 			}
 		}
 		| DIVERTREPLY {
 #ifdef __FreeBSD__
 			yyerror("divert-reply has no meaning in FreeBSD pf(4)");
 			YYERROR;
 #else
 			filter_opts.divert.port = 1;	/* some random value */
 #endif
 		}
 		| filter_sets
 		;
 
 filter_sets	: SET '(' filter_sets_l ')'	{ $$ = filter_opts; }
 		| SET filter_set		{ $$ = filter_opts; }
 		;
 
 filter_sets_l	: filter_sets_l comma filter_set
 		| filter_set
 		;
 
 filter_set	: prio {
 			if (filter_opts.marker & FOM_SETPRIO) {
 				yyerror("prio cannot be redefined");
 				YYERROR;
 			}
 			filter_opts.marker |= FOM_SETPRIO;
 			filter_opts.set_prio[0] = $1.b1;
 			filter_opts.set_prio[1] = $1.b2;
 		}
 prio		: PRIO NUMBER {
 			if ($2 < 0 || $2 > PF_PRIO_MAX) {
 				yyerror("prio must be 0 - %u", PF_PRIO_MAX);
 				YYERROR;
 			}
 			$$.b1 = $$.b2 = $2;
 		}
 		| PRIO '(' NUMBER comma NUMBER ')' {
 			if ($3 < 0 || $3 > PF_PRIO_MAX ||
 			    $5 < 0 || $5 > PF_PRIO_MAX) {
 				yyerror("prio must be 0 - %u", PF_PRIO_MAX);
 				YYERROR;
 			}
 			$$.b1 = $3;
 			$$.b2 = $5;
 		}
 		;
 
 probability	: STRING				{
 			char	*e;
 			double	 p = strtod($1, &e);
 
 			if (*e == '%') {
 				p *= 0.01;
 				e++;
 			}
 			if (*e) {
 				yyerror("invalid probability: %s", $1);
 				free($1);
 				YYERROR;
 			}
 			free($1);
 			$$ = p;
 		}
 		| NUMBER				{
 			$$ = (double)$1;
 		}
 		;
 
 
 action		: PASS			{ $$.b1 = PF_PASS; $$.b2 = $$.w = 0; }
 		| BLOCK blockspec	{ $$ = $2; $$.b1 = PF_DROP; }
 		;
 
 blockspec	: /* empty */		{
 			$$.b2 = blockpolicy;
 			$$.w = returnicmpdefault;
 			$$.w2 = returnicmp6default;
 		}
 		| DROP			{
 			$$.b2 = PFRULE_DROP;
 			$$.w = 0;
 			$$.w2 = 0;
 		}
 		| RETURNRST		{
 			$$.b2 = PFRULE_RETURNRST;
 			$$.w = 0;
 			$$.w2 = 0;
 		}
 		| RETURNRST '(' TTL NUMBER ')'	{
 			if ($4 < 0 || $4 > 255) {
 				yyerror("illegal ttl value %d", $4);
 				YYERROR;
 			}
 			$$.b2 = PFRULE_RETURNRST;
 			$$.w = $4;
 			$$.w2 = 0;
 		}
 		| RETURNICMP		{
 			$$.b2 = PFRULE_RETURNICMP;
 			$$.w = returnicmpdefault;
 			$$.w2 = returnicmp6default;
 		}
 		| RETURNICMP6		{
 			$$.b2 = PFRULE_RETURNICMP;
 			$$.w = returnicmpdefault;
 			$$.w2 = returnicmp6default;
 		}
 		| RETURNICMP '(' reticmpspec ')'	{
 			$$.b2 = PFRULE_RETURNICMP;
 			$$.w = $3;
 			$$.w2 = returnicmpdefault;
 		}
 		| RETURNICMP6 '(' reticmp6spec ')'	{
 			$$.b2 = PFRULE_RETURNICMP;
 			$$.w = returnicmpdefault;
 			$$.w2 = $3;
 		}
 		| RETURNICMP '(' reticmpspec comma reticmp6spec ')' {
 			$$.b2 = PFRULE_RETURNICMP;
 			$$.w = $3;
 			$$.w2 = $5;
 		}
 		| RETURN {
 			$$.b2 = PFRULE_RETURN;
 			$$.w = returnicmpdefault;
 			$$.w2 = returnicmp6default;
 		}
 		;
 
 reticmpspec	: STRING			{
 			if (!($$ = parseicmpspec($1, AF_INET))) {
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		| NUMBER			{
 			u_int8_t		icmptype;
 
 			if ($1 < 0 || $1 > 255) {
 				yyerror("invalid icmp code %lu", $1);
 				YYERROR;
 			}
 			icmptype = returnicmpdefault >> 8;
 			$$ = (icmptype << 8 | $1);
 		}
 		;
 
 reticmp6spec	: STRING			{
 			if (!($$ = parseicmpspec($1, AF_INET6))) {
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		| NUMBER			{
 			u_int8_t		icmptype;
 
 			if ($1 < 0 || $1 > 255) {
 				yyerror("invalid icmp code %lu", $1);
 				YYERROR;
 			}
 			icmptype = returnicmp6default >> 8;
 			$$ = (icmptype << 8 | $1);
 		}
 		;
 
 dir		: /* empty */			{ $$ = PF_INOUT; }
 		| IN				{ $$ = PF_IN; }
 		| OUT				{ $$ = PF_OUT; }
 		;
 
 quick		: /* empty */			{ $$.quick = 0; }
 		| QUICK				{ $$.quick = 1; }
 		;
 
 logquick	: /* empty */	{ $$.log = 0; $$.quick = 0; $$.logif = 0; }
 		| log		{ $$ = $1; $$.quick = 0; }
 		| QUICK		{ $$.quick = 1; $$.log = 0; $$.logif = 0; }
 		| log QUICK	{ $$ = $1; $$.quick = 1; }
 		| QUICK log	{ $$ = $2; $$.quick = 1; }
 		;
 
 log		: LOG			{ $$.log = PF_LOG; $$.logif = 0; }
 		| LOG '(' logopts ')'	{
 			$$.log = PF_LOG | $3.log;
 			$$.logif = $3.logif;
 		}
 		;
 
 logopts		: logopt			{ $$ = $1; }
 		| logopts comma logopt		{
 			$$.log = $1.log | $3.log;
 			$$.logif = $3.logif;
 			if ($$.logif == 0)
 				$$.logif = $1.logif;
 		}
 		;
 
 logopt		: ALL		{ $$.log = PF_LOG_ALL; $$.logif = 0; }
 		| USER		{ $$.log = PF_LOG_SOCKET_LOOKUP; $$.logif = 0; }
 		| GROUP		{ $$.log = PF_LOG_SOCKET_LOOKUP; $$.logif = 0; }
 		| TO string	{
 			const char	*errstr;
 			u_int		 i;
 
 			$$.log = 0;
 			if (strncmp($2, "pflog", 5)) {
 				yyerror("%s: should be a pflog interface", $2);
 				free($2);
 				YYERROR;
 			}
 			i = strtonum($2 + 5, 0, 255, &errstr);
 			if (errstr) {
 				yyerror("%s: %s", $2, errstr);
 				free($2);
 				YYERROR;
 			}
 			free($2);
 			$$.logif = i;
 		}
 		;
 
 interface	: /* empty */			{ $$ = NULL; }
 		| ON if_item_not		{ $$ = $2; }
 		| ON '{' optnl if_list '}'	{ $$ = $4; }
 		;
 
 if_list		: if_item_not optnl		{ $$ = $1; }
 		| if_list comma if_item_not optnl	{
 			$1->tail->next = $3;
 			$1->tail = $3;
 			$$ = $1;
 		}
 		;
 
 if_item_not	: not if_item			{ $$ = $2; $$->not = $1; }
 		;
 
 if_item		: STRING			{
 			struct node_host	*n;
 
 			$$ = calloc(1, sizeof(struct node_if));
 			if ($$ == NULL)
 				err(1, "if_item: calloc");
 			if (strlcpy($$->ifname, $1, sizeof($$->ifname)) >=
 			    sizeof($$->ifname)) {
 				free($1);
 				free($$);
 				yyerror("interface name too long");
 				YYERROR;
 			}
 
 			if ((n = ifa_exists($1)) != NULL)
 				$$->ifa_flags = n->ifa_flags;
 
 			free($1);
 			$$->not = 0;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		;
 
 af		: /* empty */			{ $$ = 0; }
 		| INET				{ $$ = AF_INET; }
 		| INET6				{ $$ = AF_INET6; }
 		;
 
 proto		: /* empty */				{ $$ = NULL; }
 		| PROTO proto_item			{ $$ = $2; }
 		| PROTO '{' optnl proto_list '}'	{ $$ = $4; }
 		;
 
 proto_list	: proto_item optnl		{ $$ = $1; }
 		| proto_list comma proto_item optnl	{
 			$1->tail->next = $3;
 			$1->tail = $3;
 			$$ = $1;
 		}
 		;
 
 proto_item	: protoval			{
 			u_int8_t	pr;
 
 			pr = (u_int8_t)$1;
 			if (pr == 0) {
 				yyerror("proto 0 cannot be used");
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_proto));
 			if ($$ == NULL)
 				err(1, "proto_item: calloc");
 			$$->proto = pr;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		;
 
 protoval	: STRING			{
 			struct protoent	*p;
 
 			p = getprotobyname($1);
 			if (p == NULL) {
 				yyerror("unknown protocol %s", $1);
 				free($1);
 				YYERROR;
 			}
 			$$ = p->p_proto;
 			free($1);
 		}
 		| NUMBER			{
 			if ($1 < 0 || $1 > 255) {
 				yyerror("protocol outside range");
 				YYERROR;
 			}
 		}
 		;
 
 fromto		: ALL				{
 			$$.src.host = NULL;
 			$$.src.port = NULL;
 			$$.dst.host = NULL;
 			$$.dst.port = NULL;
 			$$.src_os = NULL;
 		}
 		| from os to			{
 			$$.src = $1;
 			$$.src_os = $2;
 			$$.dst = $3;
 		}
 		;
 
 os		: /* empty */			{ $$ = NULL; }
 		| OS xos			{ $$ = $2; }
 		| OS '{' optnl os_list '}'	{ $$ = $4; }
 		;
 
 xos		: STRING {
 			$$ = calloc(1, sizeof(struct node_os));
 			if ($$ == NULL)
 				err(1, "os: calloc");
 			$$->os = $1;
 			$$->tail = $$;
 		}
 		;
 
 os_list		: xos optnl 			{ $$ = $1; }
 		| os_list comma xos optnl	{
 			$1->tail->next = $3;
 			$1->tail = $3;
 			$$ = $1;
 		}
 		;
 
 from		: /* empty */			{
 			$$.host = NULL;
 			$$.port = NULL;
 		}
 		| FROM ipportspec		{
 			$$ = $2;
 		}
 		;
 
 to		: /* empty */			{
 			$$.host = NULL;
 			$$.port = NULL;
 		}
 		| TO ipportspec		{
 			if (disallow_urpf_failed($2.host, "\"urpf-failed\" is "
 			    "not permitted in a destination address"))
 				YYERROR;
 			$$ = $2;
 		}
 		;
 
 ipportspec	: ipspec			{
 			$$.host = $1;
 			$$.port = NULL;
 		}
 		| ipspec PORT portspec		{
 			$$.host = $1;
 			$$.port = $3;
 		}
 		| PORT portspec			{
 			$$.host = NULL;
 			$$.port = $2;
 		}
 		;
 
 optnl		: '\n' optnl
 		|
 		;
 
 ipspec		: ANY				{ $$ = NULL; }
 		| xhost				{ $$ = $1; }
 		| '{' optnl host_list '}'	{ $$ = $3; }
 		;
 
 toipspec	: TO ipspec			{ $$ = $2; }
 		| /* empty */			{ $$ = NULL; }
 		;
 
 host_list	: ipspec optnl			{ $$ = $1; }
 		| host_list comma ipspec optnl	{
 			if ($3 == NULL)
 				$$ = $1;
 			else if ($1 == NULL)
 				$$ = $3;
 			else {
 				$1->tail->next = $3;
 				$1->tail = $3->tail;
 				$$ = $1;
 			}
 		}
 		;
 
 xhost		: not host			{
 			struct node_host	*n;
 
 			for (n = $2; n != NULL; n = n->next)
 				n->not = $1;
 			$$ = $2;
 		}
 		| not NOROUTE			{
 			$$ = calloc(1, sizeof(struct node_host));
 			if ($$ == NULL)
 				err(1, "xhost: calloc");
 			$$->addr.type = PF_ADDR_NOROUTE;
 			$$->next = NULL;
 			$$->not = $1;
 			$$->tail = $$;
 		}
 		| not URPFFAILED		{
 			$$ = calloc(1, sizeof(struct node_host));
 			if ($$ == NULL)
 				err(1, "xhost: calloc");
 			$$->addr.type = PF_ADDR_URPFFAILED;
 			$$->next = NULL;
 			$$->not = $1;
 			$$->tail = $$;
 		}
 		;
 
 host		: STRING			{
 			if (($$ = host($1)) == NULL)	{
 				/* error. "any" is handled elsewhere */
 				free($1);
 				yyerror("could not parse host specification");
 				YYERROR;
 			}
 			free($1);
 
 		}
 		| STRING '-' STRING		{
 			struct node_host *b, *e;
 
 			if ((b = host($1)) == NULL || (e = host($3)) == NULL) {
 				free($1);
 				free($3);
 				yyerror("could not parse host specification");
 				YYERROR;
 			}
 			if (b->af != e->af ||
 			    b->addr.type != PF_ADDR_ADDRMASK ||
 			    e->addr.type != PF_ADDR_ADDRMASK ||
 			    unmask(&b->addr.v.a.mask, b->af) !=
 			    (b->af == AF_INET ? 32 : 128) ||
 			    unmask(&e->addr.v.a.mask, e->af) !=
 			    (e->af == AF_INET ? 32 : 128) ||
 			    b->next != NULL || b->not ||
 			    e->next != NULL || e->not) {
 				free(b);
 				free(e);
 				free($1);
 				free($3);
 				yyerror("invalid address range");
 				YYERROR;
 			}
 			memcpy(&b->addr.v.a.mask, &e->addr.v.a.addr,
 			    sizeof(b->addr.v.a.mask));
 			b->addr.type = PF_ADDR_RANGE;
 			$$ = b;
 			free(e);
 			free($1);
 			free($3);
 		}
 		| STRING '/' NUMBER		{
 			char	*buf;
 
 			if (asprintf(&buf, "%s/%lld", $1, (long long)$3) == -1)
 				err(1, "host: asprintf");
 			free($1);
 			if (($$ = host(buf)) == NULL)	{
 				/* error. "any" is handled elsewhere */
 				free(buf);
 				yyerror("could not parse host specification");
 				YYERROR;
 			}
 			free(buf);
 		}
 		| NUMBER '/' NUMBER		{
 			char	*buf;
 
 			/* ie. for 10/8 parsing */
 #ifdef __FreeBSD__
 			if (asprintf(&buf, "%lld/%lld", (long long)$1, (long long)$3) == -1)
 #else
 			if (asprintf(&buf, "%lld/%lld", $1, $3) == -1)
 #endif
 				err(1, "host: asprintf");
 			if (($$ = host(buf)) == NULL)	{
 				/* error. "any" is handled elsewhere */
 				free(buf);
 				yyerror("could not parse host specification");
 				YYERROR;
 			}
 			free(buf);
 		}
 		| dynaddr
 		| dynaddr '/' NUMBER		{
 			struct node_host	*n;
 
 			if ($3 < 0 || $3 > 128) {
 				yyerror("bit number too big");
 				YYERROR;
 			}
 			$$ = $1;
 			for (n = $1; n != NULL; n = n->next)
 				set_ipmask(n, $3);
 		}
 		| '<' STRING '>'	{
 			if (strlen($2) >= PF_TABLE_NAME_SIZE) {
 				yyerror("table name '%s' too long", $2);
 				free($2);
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_host));
 			if ($$ == NULL)
 				err(1, "host: calloc");
 			$$->addr.type = PF_ADDR_TABLE;
 			if (strlcpy($$->addr.v.tblname, $2,
 			    sizeof($$->addr.v.tblname)) >=
 			    sizeof($$->addr.v.tblname))
 				errx(1, "host: strlcpy");
 			free($2);
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		;
 
 number		: NUMBER
 		| STRING		{
 			u_long	ulval;
 
 			if (atoul($1, &ulval) == -1) {
 				yyerror("%s is not a number", $1);
 				free($1);
 				YYERROR;
 			} else
 				$$ = ulval;
 			free($1);
 		}
 		;
 
 dynaddr		: '(' STRING ')'		{
 			int	 flags = 0;
 			char	*p, *op;
 
 			op = $2;
 			if (!isalpha(op[0])) {
 				yyerror("invalid interface name '%s'", op);
 				free(op);
 				YYERROR;
 			}
 			while ((p = strrchr($2, ':')) != NULL) {
 				if (!strcmp(p+1, "network"))
 					flags |= PFI_AFLAG_NETWORK;
 				else if (!strcmp(p+1, "broadcast"))
 					flags |= PFI_AFLAG_BROADCAST;
 				else if (!strcmp(p+1, "peer"))
 					flags |= PFI_AFLAG_PEER;
 				else if (!strcmp(p+1, "0"))
 					flags |= PFI_AFLAG_NOALIAS;
 				else {
 					yyerror("interface %s has bad modifier",
 					    $2);
 					free(op);
 					YYERROR;
 				}
 				*p = '\0';
 			}
 			if (flags & (flags - 1) & PFI_AFLAG_MODEMASK) {
 				free(op);
 				yyerror("illegal combination of "
 				    "interface modifiers");
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_host));
 			if ($$ == NULL)
 				err(1, "address: calloc");
 			$$->af = 0;
 			set_ipmask($$, 128);
 			$$->addr.type = PF_ADDR_DYNIFTL;
 			$$->addr.iflags = flags;
 			if (strlcpy($$->addr.v.ifname, $2,
 			    sizeof($$->addr.v.ifname)) >=
 			    sizeof($$->addr.v.ifname)) {
 				free(op);
 				free($$);
 				yyerror("interface name too long");
 				YYERROR;
 			}
 			free(op);
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		;
 
 portspec	: port_item			{ $$ = $1; }
 		| '{' optnl port_list '}'	{ $$ = $3; }
 		;
 
 port_list	: port_item optnl		{ $$ = $1; }
 		| port_list comma port_item optnl	{
 			$1->tail->next = $3;
 			$1->tail = $3;
 			$$ = $1;
 		}
 		;
 
 port_item	: portrange			{
 			$$ = calloc(1, sizeof(struct node_port));
 			if ($$ == NULL)
 				err(1, "port_item: calloc");
 			$$->port[0] = $1.a;
 			$$->port[1] = $1.b;
 			if ($1.t)
 				$$->op = PF_OP_RRG;
 			else
 				$$->op = PF_OP_EQ;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| unaryop portrange	{
 			if ($2.t) {
 				yyerror("':' cannot be used with an other "
 				    "port operator");
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_port));
 			if ($$ == NULL)
 				err(1, "port_item: calloc");
 			$$->port[0] = $2.a;
 			$$->port[1] = $2.b;
 			$$->op = $1;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| portrange PORTBINARY portrange	{
 			if ($1.t || $3.t) {
 				yyerror("':' cannot be used with an other "
 				    "port operator");
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_port));
 			if ($$ == NULL)
 				err(1, "port_item: calloc");
 			$$->port[0] = $1.a;
 			$$->port[1] = $3.a;
 			$$->op = $2;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		;
 
 portplain	: numberstring			{
 			if (parseport($1, &$$, 0) == -1) {
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		;
 
 portrange	: numberstring			{
 			if (parseport($1, &$$, PPORT_RANGE) == -1) {
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		;
 
 uids		: uid_item			{ $$ = $1; }
 		| '{' optnl uid_list '}'	{ $$ = $3; }
 		;
 
 uid_list	: uid_item optnl		{ $$ = $1; }
 		| uid_list comma uid_item optnl	{
 			$1->tail->next = $3;
 			$1->tail = $3;
 			$$ = $1;
 		}
 		;
 
 uid_item	: uid				{
 			$$ = calloc(1, sizeof(struct node_uid));
 			if ($$ == NULL)
 				err(1, "uid_item: calloc");
 			$$->uid[0] = $1;
 			$$->uid[1] = $1;
 			$$->op = PF_OP_EQ;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| unaryop uid			{
 			if ($2 == UID_MAX && $1 != PF_OP_EQ && $1 != PF_OP_NE) {
 				yyerror("user unknown requires operator = or "
 				    "!=");
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_uid));
 			if ($$ == NULL)
 				err(1, "uid_item: calloc");
 			$$->uid[0] = $2;
 			$$->uid[1] = $2;
 			$$->op = $1;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| uid PORTBINARY uid		{
 			if ($1 == UID_MAX || $3 == UID_MAX) {
 				yyerror("user unknown requires operator = or "
 				    "!=");
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_uid));
 			if ($$ == NULL)
 				err(1, "uid_item: calloc");
 			$$->uid[0] = $1;
 			$$->uid[1] = $3;
 			$$->op = $2;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		;
 
 uid		: STRING			{
 			if (!strcmp($1, "unknown"))
 				$$ = UID_MAX;
 			else {
 				struct passwd	*pw;
 
 				if ((pw = getpwnam($1)) == NULL) {
 					yyerror("unknown user %s", $1);
 					free($1);
 					YYERROR;
 				}
 				$$ = pw->pw_uid;
 			}
 			free($1);
 		}
 		| NUMBER			{
 			if ($1 < 0 || $1 >= UID_MAX) {
 				yyerror("illegal uid value %lu", $1);
 				YYERROR;
 			}
 			$$ = $1;
 		}
 		;
 
 gids		: gid_item			{ $$ = $1; }
 		| '{' optnl gid_list '}'	{ $$ = $3; }
 		;
 
 gid_list	: gid_item optnl		{ $$ = $1; }
 		| gid_list comma gid_item optnl	{
 			$1->tail->next = $3;
 			$1->tail = $3;
 			$$ = $1;
 		}
 		;
 
 gid_item	: gid				{
 			$$ = calloc(1, sizeof(struct node_gid));
 			if ($$ == NULL)
 				err(1, "gid_item: calloc");
 			$$->gid[0] = $1;
 			$$->gid[1] = $1;
 			$$->op = PF_OP_EQ;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| unaryop gid			{
 			if ($2 == GID_MAX && $1 != PF_OP_EQ && $1 != PF_OP_NE) {
 				yyerror("group unknown requires operator = or "
 				    "!=");
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_gid));
 			if ($$ == NULL)
 				err(1, "gid_item: calloc");
 			$$->gid[0] = $2;
 			$$->gid[1] = $2;
 			$$->op = $1;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| gid PORTBINARY gid		{
 			if ($1 == GID_MAX || $3 == GID_MAX) {
 				yyerror("group unknown requires operator = or "
 				    "!=");
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_gid));
 			if ($$ == NULL)
 				err(1, "gid_item: calloc");
 			$$->gid[0] = $1;
 			$$->gid[1] = $3;
 			$$->op = $2;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		;
 
 gid		: STRING			{
 			if (!strcmp($1, "unknown"))
 				$$ = GID_MAX;
 			else {
 				struct group	*grp;
 
 				if ((grp = getgrnam($1)) == NULL) {
 					yyerror("unknown group %s", $1);
 					free($1);
 					YYERROR;
 				}
 				$$ = grp->gr_gid;
 			}
 			free($1);
 		}
 		| NUMBER			{
 			if ($1 < 0 || $1 >= GID_MAX) {
 				yyerror("illegal gid value %lu", $1);
 				YYERROR;
 			}
 			$$ = $1;
 		}
 		;
 
 flag		: STRING			{
 			int	f;
 
 			if ((f = parse_flags($1)) < 0) {
 				yyerror("bad flags %s", $1);
 				free($1);
 				YYERROR;
 			}
 			free($1);
 			$$.b1 = f;
 		}
 		;
 
 flags		: FLAGS flag '/' flag	{ $$.b1 = $2.b1; $$.b2 = $4.b1; }
 		| FLAGS '/' flag	{ $$.b1 = 0; $$.b2 = $3.b1; }
 		| FLAGS ANY		{ $$.b1 = 0; $$.b2 = 0; }
 		;
 
 icmpspec	: ICMPTYPE icmp_item			{ $$ = $2; }
 		| ICMPTYPE '{' optnl icmp_list '}'	{ $$ = $4; }
 		| ICMP6TYPE icmp6_item			{ $$ = $2; }
 		| ICMP6TYPE '{' optnl icmp6_list '}'	{ $$ = $4; }
 		;
 
 icmp_list	: icmp_item optnl		{ $$ = $1; }
 		| icmp_list comma icmp_item optnl {
 			$1->tail->next = $3;
 			$1->tail = $3;
 			$$ = $1;
 		}
 		;
 
 icmp6_list	: icmp6_item optnl		{ $$ = $1; }
 		| icmp6_list comma icmp6_item optnl {
 			$1->tail->next = $3;
 			$1->tail = $3;
 			$$ = $1;
 		}
 		;
 
 icmp_item	: icmptype		{
 			$$ = calloc(1, sizeof(struct node_icmp));
 			if ($$ == NULL)
 				err(1, "icmp_item: calloc");
 			$$->type = $1;
 			$$->code = 0;
 			$$->proto = IPPROTO_ICMP;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| icmptype CODE STRING	{
 			const struct icmpcodeent	*p;
 
 			if ((p = geticmpcodebyname($1-1, $3, AF_INET)) == NULL) {
 				yyerror("unknown icmp-code %s", $3);
 				free($3);
 				YYERROR;
 			}
 
 			free($3);
 			$$ = calloc(1, sizeof(struct node_icmp));
 			if ($$ == NULL)
 				err(1, "icmp_item: calloc");
 			$$->type = $1;
 			$$->code = p->code + 1;
 			$$->proto = IPPROTO_ICMP;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| icmptype CODE NUMBER	{
 			if ($3 < 0 || $3 > 255) {
 				yyerror("illegal icmp-code %lu", $3);
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_icmp));
 			if ($$ == NULL)
 				err(1, "icmp_item: calloc");
 			$$->type = $1;
 			$$->code = $3 + 1;
 			$$->proto = IPPROTO_ICMP;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		;
 
 icmp6_item	: icmp6type		{
 			$$ = calloc(1, sizeof(struct node_icmp));
 			if ($$ == NULL)
 				err(1, "icmp_item: calloc");
 			$$->type = $1;
 			$$->code = 0;
 			$$->proto = IPPROTO_ICMPV6;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| icmp6type CODE STRING	{
 			const struct icmpcodeent	*p;
 
 			if ((p = geticmpcodebyname($1-1, $3, AF_INET6)) == NULL) {
 				yyerror("unknown icmp6-code %s", $3);
 				free($3);
 				YYERROR;
 			}
 			free($3);
 
 			$$ = calloc(1, sizeof(struct node_icmp));
 			if ($$ == NULL)
 				err(1, "icmp_item: calloc");
 			$$->type = $1;
 			$$->code = p->code + 1;
 			$$->proto = IPPROTO_ICMPV6;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| icmp6type CODE NUMBER	{
 			if ($3 < 0 || $3 > 255) {
 				yyerror("illegal icmp-code %lu", $3);
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_icmp));
 			if ($$ == NULL)
 				err(1, "icmp_item: calloc");
 			$$->type = $1;
 			$$->code = $3 + 1;
 			$$->proto = IPPROTO_ICMPV6;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		;
 
 icmptype	: STRING			{
 			const struct icmptypeent	*p;
 
 			if ((p = geticmptypebyname($1, AF_INET)) == NULL) {
 				yyerror("unknown icmp-type %s", $1);
 				free($1);
 				YYERROR;
 			}
 			$$ = p->type + 1;
 			free($1);
 		}
 		| NUMBER			{
 			if ($1 < 0 || $1 > 255) {
 				yyerror("illegal icmp-type %lu", $1);
 				YYERROR;
 			}
 			$$ = $1 + 1;
 		}
 		;
 
 icmp6type	: STRING			{
 			const struct icmptypeent	*p;
 
 			if ((p = geticmptypebyname($1, AF_INET6)) ==
 			    NULL) {
 				yyerror("unknown icmp6-type %s", $1);
 				free($1);
 				YYERROR;
 			}
 			$$ = p->type + 1;
 			free($1);
 		}
 		| NUMBER			{
 			if ($1 < 0 || $1 > 255) {
 				yyerror("illegal icmp6-type %lu", $1);
 				YYERROR;
 			}
 			$$ = $1 + 1;
 		}
 		;
 
 tos	: STRING			{
 			if (!strcmp($1, "lowdelay"))
 				$$ = IPTOS_LOWDELAY;
 			else if (!strcmp($1, "throughput"))
 				$$ = IPTOS_THROUGHPUT;
 			else if (!strcmp($1, "reliability"))
 				$$ = IPTOS_RELIABILITY;
 			else if ($1[0] == '0' && $1[1] == 'x')
 				$$ = strtoul($1, NULL, 16);
 			else
-				$$ = 0;		/* flag bad argument */
-			if (!$$ || $$ > 255) {
+				$$ = 256;		/* flag bad argument */
+			if ($$ < 0 || $$ > 255) {
 				yyerror("illegal tos value %s", $1);
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		| NUMBER			{
 			$$ = $1;
-			if (!$$ || $$ > 255) {
+			if ($$ < 0 || $$ > 255) {
 				yyerror("illegal tos value %s", $1);
 				YYERROR;
 			}
 		}
 		;
 
 sourcetrack	: SOURCETRACK		{ $$ = PF_SRCTRACK; }
 		| SOURCETRACK GLOBAL	{ $$ = PF_SRCTRACK_GLOBAL; }
 		| SOURCETRACK RULE	{ $$ = PF_SRCTRACK_RULE; }
 		;
 
 statelock	: IFBOUND {
 			$$ = PFRULE_IFBOUND;
 		}
 		| FLOATING {
 			$$ = 0;
 		}
 		;
 
 keep		: NO STATE			{
 			$$.action = 0;
 			$$.options = NULL;
 		}
 		| KEEP STATE state_opt_spec	{
 			$$.action = PF_STATE_NORMAL;
 			$$.options = $3;
 		}
 		| MODULATE STATE state_opt_spec {
 			$$.action = PF_STATE_MODULATE;
 			$$.options = $3;
 		}
 		| SYNPROXY STATE state_opt_spec {
 			$$.action = PF_STATE_SYNPROXY;
 			$$.options = $3;
 		}
 		;
 
 flush		: /* empty */			{ $$ = 0; }
 		| FLUSH				{ $$ = PF_FLUSH; }
 		| FLUSH GLOBAL			{
 			$$ = PF_FLUSH | PF_FLUSH_GLOBAL;
 		}
 		;
 
 state_opt_spec	: '(' state_opt_list ')'	{ $$ = $2; }
 		| /* empty */			{ $$ = NULL; }
 		;
 
 state_opt_list	: state_opt_item		{ $$ = $1; }
 		| state_opt_list comma state_opt_item {
 			$1->tail->next = $3;
 			$1->tail = $3;
 			$$ = $1;
 		}
 		;
 
 state_opt_item	: MAXIMUM NUMBER		{
 			if ($2 < 0 || $2 > UINT_MAX) {
 				yyerror("only positive values permitted");
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_state_opt));
 			if ($$ == NULL)
 				err(1, "state_opt_item: calloc");
 			$$->type = PF_STATE_OPT_MAX;
 			$$->data.max_states = $2;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| NOSYNC				{
 			$$ = calloc(1, sizeof(struct node_state_opt));
 			if ($$ == NULL)
 				err(1, "state_opt_item: calloc");
 			$$->type = PF_STATE_OPT_NOSYNC;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| MAXSRCSTATES NUMBER			{
 			if ($2 < 0 || $2 > UINT_MAX) {
 				yyerror("only positive values permitted");
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_state_opt));
 			if ($$ == NULL)
 				err(1, "state_opt_item: calloc");
 			$$->type = PF_STATE_OPT_MAX_SRC_STATES;
 			$$->data.max_src_states = $2;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| MAXSRCCONN NUMBER			{
 			if ($2 < 0 || $2 > UINT_MAX) {
 				yyerror("only positive values permitted");
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_state_opt));
 			if ($$ == NULL)
 				err(1, "state_opt_item: calloc");
 			$$->type = PF_STATE_OPT_MAX_SRC_CONN;
 			$$->data.max_src_conn = $2;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| MAXSRCCONNRATE NUMBER '/' NUMBER	{
 			if ($2 < 0 || $2 > UINT_MAX ||
 			    $4 < 0 || $4 > UINT_MAX) {
 				yyerror("only positive values permitted");
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_state_opt));
 			if ($$ == NULL)
 				err(1, "state_opt_item: calloc");
 			$$->type = PF_STATE_OPT_MAX_SRC_CONN_RATE;
 			$$->data.max_src_conn_rate.limit = $2;
 			$$->data.max_src_conn_rate.seconds = $4;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| OVERLOAD '<' STRING '>' flush		{
 			if (strlen($3) >= PF_TABLE_NAME_SIZE) {
 				yyerror("table name '%s' too long", $3);
 				free($3);
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_state_opt));
 			if ($$ == NULL)
 				err(1, "state_opt_item: calloc");
 			if (strlcpy($$->data.overload.tblname, $3,
 			    PF_TABLE_NAME_SIZE) >= PF_TABLE_NAME_SIZE)
 				errx(1, "state_opt_item: strlcpy");
 			free($3);
 			$$->type = PF_STATE_OPT_OVERLOAD;
 			$$->data.overload.flush = $5;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| MAXSRCNODES NUMBER			{
 			if ($2 < 0 || $2 > UINT_MAX) {
 				yyerror("only positive values permitted");
 				YYERROR;
 			}
 			$$ = calloc(1, sizeof(struct node_state_opt));
 			if ($$ == NULL)
 				err(1, "state_opt_item: calloc");
 			$$->type = PF_STATE_OPT_MAX_SRC_NODES;
 			$$->data.max_src_nodes = $2;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| sourcetrack {
 			$$ = calloc(1, sizeof(struct node_state_opt));
 			if ($$ == NULL)
 				err(1, "state_opt_item: calloc");
 			$$->type = PF_STATE_OPT_SRCTRACK;
 			$$->data.src_track = $1;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| statelock {
 			$$ = calloc(1, sizeof(struct node_state_opt));
 			if ($$ == NULL)
 				err(1, "state_opt_item: calloc");
 			$$->type = PF_STATE_OPT_STATELOCK;
 			$$->data.statelock = $1;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| SLOPPY {
 			$$ = calloc(1, sizeof(struct node_state_opt));
 			if ($$ == NULL)
 				err(1, "state_opt_item: calloc");
 			$$->type = PF_STATE_OPT_SLOPPY;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| STRING NUMBER			{
 			int	i;
 
 			if ($2 < 0 || $2 > UINT_MAX) {
 				yyerror("only positive values permitted");
 				YYERROR;
 			}
 			for (i = 0; pf_timeouts[i].name &&
 			    strcmp(pf_timeouts[i].name, $1); ++i)
 				;	/* nothing */
 			if (!pf_timeouts[i].name) {
 				yyerror("illegal timeout name %s", $1);
 				free($1);
 				YYERROR;
 			}
 			if (strchr(pf_timeouts[i].name, '.') == NULL) {
 				yyerror("illegal state timeout %s", $1);
 				free($1);
 				YYERROR;
 			}
 			free($1);
 			$$ = calloc(1, sizeof(struct node_state_opt));
 			if ($$ == NULL)
 				err(1, "state_opt_item: calloc");
 			$$->type = PF_STATE_OPT_TIMEOUT;
 			$$->data.timeout.number = pf_timeouts[i].timeout;
 			$$->data.timeout.seconds = $2;
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		;
 
 label		: LABEL STRING			{
 			$$ = $2;
 		}
 		;
 
 qname		: QUEUE STRING				{
 			$$.qname = $2;
 			$$.pqname = NULL;
 		}
 		| QUEUE '(' STRING ')'			{
 			$$.qname = $3;
 			$$.pqname = NULL;
 		}
 		| QUEUE '(' STRING comma STRING ')'	{
 			$$.qname = $3;
 			$$.pqname = $5;
 		}
 		;
 
 no		: /* empty */			{ $$ = 0; }
 		| NO				{ $$ = 1; }
 		;
 
 portstar	: numberstring			{
 			if (parseport($1, &$$, PPORT_RANGE|PPORT_STAR) == -1) {
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		;
 
 redirspec	: host				{ $$ = $1; }
 		| '{' optnl redir_host_list '}'	{ $$ = $3; }
 		;
 
 redir_host_list	: host optnl			{ $$ = $1; }
 		| redir_host_list comma host optnl {
 			$1->tail->next = $3;
 			$1->tail = $3->tail;
 			$$ = $1;
 		}
 		;
 
 redirpool	: /* empty */			{ $$ = NULL; }
 		| ARROW redirspec		{
 			$$ = calloc(1, sizeof(struct redirection));
 			if ($$ == NULL)
 				err(1, "redirection: calloc");
 			$$->host = $2;
 			$$->rport.a = $$->rport.b = $$->rport.t = 0;
 		}
 		| ARROW redirspec PORT portstar	{
 			$$ = calloc(1, sizeof(struct redirection));
 			if ($$ == NULL)
 				err(1, "redirection: calloc");
 			$$->host = $2;
 			$$->rport = $4;
 		}
 		;
 
 hashkey		: /* empty */
 		{
 			$$ = calloc(1, sizeof(struct pf_poolhashkey));
 			if ($$ == NULL)
 				err(1, "hashkey: calloc");
 			$$->key32[0] = arc4random();
 			$$->key32[1] = arc4random();
 			$$->key32[2] = arc4random();
 			$$->key32[3] = arc4random();
 		}
 		| string
 		{
 			if (!strncmp($1, "0x", 2)) {
 				if (strlen($1) != 34) {
 					free($1);
 					yyerror("hex key must be 128 bits "
 						"(32 hex digits) long");
 					YYERROR;
 				}
 				$$ = calloc(1, sizeof(struct pf_poolhashkey));
 				if ($$ == NULL)
 					err(1, "hashkey: calloc");
 
 				if (sscanf($1, "0x%8x%8x%8x%8x",
 				    &$$->key32[0], &$$->key32[1],
 				    &$$->key32[2], &$$->key32[3]) != 4) {
 					free($$);
 					free($1);
 					yyerror("invalid hex key");
 					YYERROR;
 				}
 			} else {
 				MD5_CTX	context;
 
 				$$ = calloc(1, sizeof(struct pf_poolhashkey));
 				if ($$ == NULL)
 					err(1, "hashkey: calloc");
 				MD5Init(&context);
 				MD5Update(&context, (unsigned char *)$1,
 				    strlen($1));
 				MD5Final((unsigned char *)$$, &context);
 				HTONL($$->key32[0]);
 				HTONL($$->key32[1]);
 				HTONL($$->key32[2]);
 				HTONL($$->key32[3]);
 			}
 			free($1);
 		}
 		;
 
 pool_opts	:	{ bzero(&pool_opts, sizeof pool_opts); }
 		    pool_opts_l
 			{ $$ = pool_opts; }
 		| /* empty */	{
 			bzero(&pool_opts, sizeof pool_opts);
 			$$ = pool_opts;
 		}
 		;
 
 pool_opts_l	: pool_opts_l pool_opt
 		| pool_opt
 		;
 
 pool_opt	: BITMASK	{
 			if (pool_opts.type) {
 				yyerror("pool type cannot be redefined");
 				YYERROR;
 			}
 			pool_opts.type =  PF_POOL_BITMASK;
 		}
 		| RANDOM	{
 			if (pool_opts.type) {
 				yyerror("pool type cannot be redefined");
 				YYERROR;
 			}
 			pool_opts.type = PF_POOL_RANDOM;
 		}
 		| SOURCEHASH hashkey {
 			if (pool_opts.type) {
 				yyerror("pool type cannot be redefined");
 				YYERROR;
 			}
 			pool_opts.type = PF_POOL_SRCHASH;
 			pool_opts.key = $2;
 		}
 		| ROUNDROBIN	{
 			if (pool_opts.type) {
 				yyerror("pool type cannot be redefined");
 				YYERROR;
 			}
 			pool_opts.type = PF_POOL_ROUNDROBIN;
 		}
 		| STATICPORT	{
 			if (pool_opts.staticport) {
 				yyerror("static-port cannot be redefined");
 				YYERROR;
 			}
 			pool_opts.staticport = 1;
 		}
 		| STICKYADDRESS	{
 			if (filter_opts.marker & POM_STICKYADDRESS) {
 				yyerror("sticky-address cannot be redefined");
 				YYERROR;
 			}
 			pool_opts.marker |= POM_STICKYADDRESS;
 			pool_opts.opts |= PF_POOL_STICKYADDR;
 		}
 		;
 
 redirection	: /* empty */			{ $$ = NULL; }
 		| ARROW host			{
 			$$ = calloc(1, sizeof(struct redirection));
 			if ($$ == NULL)
 				err(1, "redirection: calloc");
 			$$->host = $2;
 			$$->rport.a = $$->rport.b = $$->rport.t = 0;
 		}
 		| ARROW host PORT portstar	{
 			$$ = calloc(1, sizeof(struct redirection));
 			if ($$ == NULL)
 				err(1, "redirection: calloc");
 			$$->host = $2;
 			$$->rport = $4;
 		}
 		;
 
 natpasslog	: /* empty */	{ $$.b1 = $$.b2 = 0; $$.w2 = 0; }
 		| PASS		{ $$.b1 = 1; $$.b2 = 0; $$.w2 = 0; }
 		| PASS log	{ $$.b1 = 1; $$.b2 = $2.log; $$.w2 = $2.logif; }
 		| log		{ $$.b1 = 0; $$.b2 = $1.log; $$.w2 = $1.logif; }
 		;
 
 nataction	: no NAT natpasslog {
 			if ($1 && $3.b1) {
 				yyerror("\"pass\" not valid with \"no\"");
 				YYERROR;
 			}
 			if ($1)
 				$$.b1 = PF_NONAT;
 			else
 				$$.b1 = PF_NAT;
 			$$.b2 = $3.b1;
 			$$.w = $3.b2;
 			$$.w2 = $3.w2;
 		}
 		| no RDR natpasslog {
 			if ($1 && $3.b1) {
 				yyerror("\"pass\" not valid with \"no\"");
 				YYERROR;
 			}
 			if ($1)
 				$$.b1 = PF_NORDR;
 			else
 				$$.b1 = PF_RDR;
 			$$.b2 = $3.b1;
 			$$.w = $3.b2;
 			$$.w2 = $3.w2;
 		}
 		;
 
 natrule		: nataction interface af proto fromto tag tagged rtable
 		    redirpool pool_opts
 		{
 			struct pf_rule	r;
 
 			if (check_rulestate(PFCTL_STATE_NAT))
 				YYERROR;
 
 			memset(&r, 0, sizeof(r));
 
 			r.action = $1.b1;
 			r.natpass = $1.b2;
 			r.log = $1.w;
 			r.logif = $1.w2;
 			r.af = $3;
 
 			if (!r.af) {
 				if ($5.src.host && $5.src.host->af &&
 				    !$5.src.host->ifindex)
 					r.af = $5.src.host->af;
 				else if ($5.dst.host && $5.dst.host->af &&
 				    !$5.dst.host->ifindex)
 					r.af = $5.dst.host->af;
 			}
 
 			if ($6 != NULL)
 				if (strlcpy(r.tagname, $6, PF_TAG_NAME_SIZE) >=
 				    PF_TAG_NAME_SIZE) {
 					yyerror("tag too long, max %u chars",
 					    PF_TAG_NAME_SIZE - 1);
 					YYERROR;
 				}
 
 			if ($7.name)
 				if (strlcpy(r.match_tagname, $7.name,
 				    PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) {
 					yyerror("tag too long, max %u chars",
 					    PF_TAG_NAME_SIZE - 1);
 					YYERROR;
 				}
 			r.match_tag_not = $7.neg;
 			r.rtableid = $8;
 
 			if (r.action == PF_NONAT || r.action == PF_NORDR) {
 				if ($9 != NULL) {
 					yyerror("translation rule with 'no' "
 					    "does not need '->'");
 					YYERROR;
 				}
 			} else {
 				if ($9 == NULL || $9->host == NULL) {
 					yyerror("translation rule requires '-> "
 					    "address'");
 					YYERROR;
 				}
 				if (!r.af && ! $9->host->ifindex)
 					r.af = $9->host->af;
 
 				remove_invalid_hosts(&$9->host, &r.af);
 				if (invalid_redirect($9->host, r.af))
 					YYERROR;
 				if (check_netmask($9->host, r.af))
 					YYERROR;
 
 				r.rpool.proxy_port[0] = ntohs($9->rport.a);
 
 				switch (r.action) {
 				case PF_RDR:
 					if (!$9->rport.b && $9->rport.t &&
 					    $5.dst.port != NULL) {
 						r.rpool.proxy_port[1] =
 						    ntohs($9->rport.a) +
 						    (ntohs(
 						    $5.dst.port->port[1]) -
 						    ntohs(
 						    $5.dst.port->port[0]));
 					} else
 						r.rpool.proxy_port[1] =
 						    ntohs($9->rport.b);
 					break;
 				case PF_NAT:
 					r.rpool.proxy_port[1] =
 					    ntohs($9->rport.b);
 					if (!r.rpool.proxy_port[0] &&
 					    !r.rpool.proxy_port[1]) {
 						r.rpool.proxy_port[0] =
 						    PF_NAT_PROXY_PORT_LOW;
 						r.rpool.proxy_port[1] =
 						    PF_NAT_PROXY_PORT_HIGH;
 					} else if (!r.rpool.proxy_port[1])
 						r.rpool.proxy_port[1] =
 						    r.rpool.proxy_port[0];
 					break;
 				default:
 					break;
 				}
 
 				r.rpool.opts = $10.type;
 				if ((r.rpool.opts & PF_POOL_TYPEMASK) ==
 				    PF_POOL_NONE && ($9->host->next != NULL ||
 				    $9->host->addr.type == PF_ADDR_TABLE ||
 				    DYNIF_MULTIADDR($9->host->addr)))
 					r.rpool.opts = PF_POOL_ROUNDROBIN;
 				if ((r.rpool.opts & PF_POOL_TYPEMASK) !=
 				    PF_POOL_ROUNDROBIN &&
 				    disallow_table($9->host, "tables are only "
 				    "supported in round-robin redirection "
 				    "pools"))
 					YYERROR;
 				if ((r.rpool.opts & PF_POOL_TYPEMASK) !=
 				    PF_POOL_ROUNDROBIN &&
 				    disallow_alias($9->host, "interface (%s) "
 				    "is only supported in round-robin "
 				    "redirection pools"))
 					YYERROR;
 				if ($9->host->next != NULL) {
 					if ((r.rpool.opts & PF_POOL_TYPEMASK) !=
 					    PF_POOL_ROUNDROBIN) {
 						yyerror("only round-robin "
 						    "valid for multiple "
 						    "redirection addresses");
 						YYERROR;
 					}
 				}
 			}
 
 			if ($10.key != NULL)
 				memcpy(&r.rpool.key, $10.key,
 				    sizeof(struct pf_poolhashkey));
 
 			 if ($10.opts)
 				r.rpool.opts |= $10.opts;
 
 			if ($10.staticport) {
 				if (r.action != PF_NAT) {
 					yyerror("the 'static-port' option is "
 					    "only valid with nat rules");
 					YYERROR;
 				}
 				if (r.rpool.proxy_port[0] !=
 				    PF_NAT_PROXY_PORT_LOW &&
 				    r.rpool.proxy_port[1] !=
 				    PF_NAT_PROXY_PORT_HIGH) {
 					yyerror("the 'static-port' option can't"
 					    " be used when specifying a port"
 					    " range");
 					YYERROR;
 				}
 				r.rpool.proxy_port[0] = 0;
 				r.rpool.proxy_port[1] = 0;
 			}
 
 			expand_rule(&r, $2, $9 == NULL ? NULL : $9->host, $4,
 			    $5.src_os, $5.src.host, $5.src.port, $5.dst.host,
 			    $5.dst.port, 0, 0, 0, "");
 			free($9);
 		}
 		;
 
 binatrule	: no BINAT natpasslog interface af proto FROM host toipspec tag
 		    tagged rtable redirection
 		{
 			struct pf_rule		binat;
 			struct pf_pooladdr	*pa;
 
 			if (check_rulestate(PFCTL_STATE_NAT))
 				YYERROR;
 			if (disallow_urpf_failed($9, "\"urpf-failed\" is not "
 			    "permitted as a binat destination"))
 				YYERROR;
 
 			memset(&binat, 0, sizeof(binat));
 
 			if ($1 && $3.b1) {
 				yyerror("\"pass\" not valid with \"no\"");
 				YYERROR;
 			}
 			if ($1)
 				binat.action = PF_NOBINAT;
 			else
 				binat.action = PF_BINAT;
 			binat.natpass = $3.b1;
 			binat.log = $3.b2;
 			binat.logif = $3.w2;
 			binat.af = $5;
 			if (!binat.af && $8 != NULL && $8->af)
 				binat.af = $8->af;
 			if (!binat.af && $9 != NULL && $9->af)
 				binat.af = $9->af;
 
 			if (!binat.af && $13 != NULL && $13->host)
 				binat.af = $13->host->af;
 			if (!binat.af) {
 				yyerror("address family (inet/inet6) "
 				    "undefined");
 				YYERROR;
 			}
 
 			if ($4 != NULL) {
 				memcpy(binat.ifname, $4->ifname,
 				    sizeof(binat.ifname));
 				binat.ifnot = $4->not;
 				free($4);
 			}
 
 			if ($10 != NULL)
 				if (strlcpy(binat.tagname, $10,
 				    PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) {
 					yyerror("tag too long, max %u chars",
 					    PF_TAG_NAME_SIZE - 1);
 					YYERROR;
 				}
 			if ($11.name)
 				if (strlcpy(binat.match_tagname, $11.name,
 				    PF_TAG_NAME_SIZE) >= PF_TAG_NAME_SIZE) {
 					yyerror("tag too long, max %u chars",
 					    PF_TAG_NAME_SIZE - 1);
 					YYERROR;
 				}
 			binat.match_tag_not = $11.neg;
 			binat.rtableid = $12;
 
 			if ($6 != NULL) {
 				binat.proto = $6->proto;
 				free($6);
 			}
 
 			if ($8 != NULL && disallow_table($8, "invalid use of "
 			    "table <%s> as the source address of a binat rule"))
 				YYERROR;
 			if ($8 != NULL && disallow_alias($8, "invalid use of "
 			    "interface (%s) as the source address of a binat "
 			    "rule"))
 				YYERROR;
 			if ($13 != NULL && $13->host != NULL && disallow_table(
 			    $13->host, "invalid use of table <%s> as the "
 			    "redirect address of a binat rule"))
 				YYERROR;
 			if ($13 != NULL && $13->host != NULL && disallow_alias(
 			    $13->host, "invalid use of interface (%s) as the "
 			    "redirect address of a binat rule"))
 				YYERROR;
 
 			if ($8 != NULL) {
 				if ($8->next) {
 					yyerror("multiple binat ip addresses");
 					YYERROR;
 				}
 				if ($8->addr.type == PF_ADDR_DYNIFTL)
 					$8->af = binat.af;
 				if ($8->af != binat.af) {
 					yyerror("binat ip versions must match");
 					YYERROR;
 				}
 				if (check_netmask($8, binat.af))
 					YYERROR;
 				memcpy(&binat.src.addr, &$8->addr,
 				    sizeof(binat.src.addr));
 				free($8);
 			}
 			if ($9 != NULL) {
 				if ($9->next) {
 					yyerror("multiple binat ip addresses");
 					YYERROR;
 				}
 				if ($9->af != binat.af && $9->af) {
 					yyerror("binat ip versions must match");
 					YYERROR;
 				}
 				if (check_netmask($9, binat.af))
 					YYERROR;
 				memcpy(&binat.dst.addr, &$9->addr,
 				    sizeof(binat.dst.addr));
 				binat.dst.neg = $9->not;
 				free($9);
 			}
 
 			if (binat.action == PF_NOBINAT) {
 				if ($13 != NULL) {
 					yyerror("'no binat' rule does not need"
 					    " '->'");
 					YYERROR;
 				}
 			} else {
 				if ($13 == NULL || $13->host == NULL) {
 					yyerror("'binat' rule requires"
 					    " '-> address'");
 					YYERROR;
 				}
 
 				remove_invalid_hosts(&$13->host, &binat.af);
 				if (invalid_redirect($13->host, binat.af))
 					YYERROR;
 				if ($13->host->next != NULL) {
 					yyerror("binat rule must redirect to "
 					    "a single address");
 					YYERROR;
 				}
 				if (check_netmask($13->host, binat.af))
 					YYERROR;
 
 				if (!PF_AZERO(&binat.src.addr.v.a.mask,
 				    binat.af) &&
 				    !PF_AEQ(&binat.src.addr.v.a.mask,
 				    &$13->host->addr.v.a.mask, binat.af)) {
 					yyerror("'binat' source mask and "
 					    "redirect mask must be the same");
 					YYERROR;
 				}
 
 				TAILQ_INIT(&binat.rpool.list);
 				pa = calloc(1, sizeof(struct pf_pooladdr));
 				if (pa == NULL)
 					err(1, "binat: calloc");
 				pa->addr = $13->host->addr;
 				pa->ifname[0] = 0;
 				TAILQ_INSERT_TAIL(&binat.rpool.list,
 				    pa, entries);
 
 				free($13);
 			}
 
 			pfctl_add_rule(pf, &binat, "");
 		}
 		;
 
 tag		: /* empty */		{ $$ = NULL; }
 		| TAG STRING		{ $$ = $2; }
 		;
 
 tagged		: /* empty */		{ $$.neg = 0; $$.name = NULL; }
 		| not TAGGED string	{ $$.neg = $1; $$.name = $3; }
 		;
 
 rtable		: /* empty */		{ $$ = -1; }
 		| RTABLE NUMBER		{
 			if ($2 < 0 || $2 > rt_tableid_max()) {
 				yyerror("invalid rtable id");
 				YYERROR;
 			}
 			$$ = $2;
 		}
 		;
 
 route_host	: STRING			{
 			$$ = calloc(1, sizeof(struct node_host));
 			if ($$ == NULL)
 				err(1, "route_host: calloc");
 			$$->ifname = $1;
 			set_ipmask($$, 128);
 			$$->next = NULL;
 			$$->tail = $$;
 		}
 		| '(' STRING host ')'		{
 			$$ = $3;
 			$$->ifname = $2;
 		}
 		;
 
 route_host_list	: route_host optnl			{ $$ = $1; }
 		| route_host_list comma route_host optnl {
 			if ($1->af == 0)
 				$1->af = $3->af;
 			if ($1->af != $3->af) {
 				yyerror("all pool addresses must be in the "
 				    "same address family");
 				YYERROR;
 			}
 			$1->tail->next = $3;
 			$1->tail = $3->tail;
 			$$ = $1;
 		}
 		;
 
 routespec	: route_host			{ $$ = $1; }
 		| '{' optnl route_host_list '}'	{ $$ = $3; }
 		;
 
 route		: /* empty */			{
 			$$.host = NULL;
 			$$.rt = 0;
 			$$.pool_opts = 0;
 		}
 		| FASTROUTE {
 			$$.host = NULL;
 			$$.rt = PF_FASTROUTE;
 			$$.pool_opts = 0;
 		}
 		| ROUTETO routespec pool_opts {
 			$$.host = $2;
 			$$.rt = PF_ROUTETO;
 			$$.pool_opts = $3.type | $3.opts;
 			if ($3.key != NULL)
 				$$.key = $3.key;
 		}
 		| REPLYTO routespec pool_opts {
 			$$.host = $2;
 			$$.rt = PF_REPLYTO;
 			$$.pool_opts = $3.type | $3.opts;
 			if ($3.key != NULL)
 				$$.key = $3.key;
 		}
 		| DUPTO routespec pool_opts {
 			$$.host = $2;
 			$$.rt = PF_DUPTO;
 			$$.pool_opts = $3.type | $3.opts;
 			if ($3.key != NULL)
 				$$.key = $3.key;
 		}
 		;
 
 timeout_spec	: STRING NUMBER
 		{
 			if (check_rulestate(PFCTL_STATE_OPTION)) {
 				free($1);
 				YYERROR;
 			}
 			if ($2 < 0 || $2 > UINT_MAX) {
 				yyerror("only positive values permitted");
 				YYERROR;
 			}
 			if (pfctl_set_timeout(pf, $1, $2, 0) != 0) {
 				yyerror("unknown timeout %s", $1);
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		;
 
 timeout_list	: timeout_list comma timeout_spec optnl
 		| timeout_spec optnl
 		;
 
 limit_spec	: STRING NUMBER
 		{
 			if (check_rulestate(PFCTL_STATE_OPTION)) {
 				free($1);
 				YYERROR;
 			}
 			if ($2 < 0 || $2 > UINT_MAX) {
 				yyerror("only positive values permitted");
 				YYERROR;
 			}
 			if (pfctl_set_limit(pf, $1, $2) != 0) {
 				yyerror("unable to set limit %s %u", $1, $2);
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		;
 
 limit_list	: limit_list comma limit_spec optnl
 		| limit_spec optnl
 		;
 
 comma		: ','
 		| /* empty */
 		;
 
 yesno		: NO			{ $$ = 0; }
 		| STRING		{
 			if (!strcmp($1, "yes"))
 				$$ = 1;
 			else {
 				yyerror("invalid value '%s', expected 'yes' "
 				    "or 'no'", $1);
 				free($1);
 				YYERROR;
 			}
 			free($1);
 		}
 		;
 
 unaryop		: '='		{ $$ = PF_OP_EQ; }
 		| '!' '='	{ $$ = PF_OP_NE; }
 		| '<' '='	{ $$ = PF_OP_LE; }
 		| '<'		{ $$ = PF_OP_LT; }
 		| '>' '='	{ $$ = PF_OP_GE; }
 		| '>'		{ $$ = PF_OP_GT; }
 		;
 
 %%
 
 int
 yyerror(const char *fmt, ...)
 {
 	va_list		 ap;
 
 	file->errors++;
 	va_start(ap, fmt);
 	fprintf(stderr, "%s:%d: ", file->name, yylval.lineno);
 	vfprintf(stderr, fmt, ap);
 	fprintf(stderr, "\n");
 	va_end(ap);
 	return (0);
 }
 
 int
 disallow_table(struct node_host *h, const char *fmt)
 {
 	for (; h != NULL; h = h->next)
 		if (h->addr.type == PF_ADDR_TABLE) {
 			yyerror(fmt, h->addr.v.tblname);
 			return (1);
 		}
 	return (0);
 }
 
 int
 disallow_urpf_failed(struct node_host *h, const char *fmt)
 {
 	for (; h != NULL; h = h->next)
 		if (h->addr.type == PF_ADDR_URPFFAILED) {
 			yyerror(fmt);
 			return (1);
 		}
 	return (0);
 }
 
 int
 disallow_alias(struct node_host *h, const char *fmt)
 {
 	for (; h != NULL; h = h->next)
 		if (DYNIF_MULTIADDR(h->addr)) {
 			yyerror(fmt, h->addr.v.tblname);
 			return (1);
 		}
 	return (0);
 }
 
 int
 rule_consistent(struct pf_rule *r, int anchor_call)
 {
 	int	problems = 0;
 
 	switch (r->action) {
 	case PF_PASS:
 	case PF_DROP:
 	case PF_SCRUB:
 	case PF_NOSCRUB:
 		problems = filter_consistent(r, anchor_call);
 		break;
 	case PF_NAT:
 	case PF_NONAT:
 		problems = nat_consistent(r);
 		break;
 	case PF_RDR:
 	case PF_NORDR:
 		problems = rdr_consistent(r);
 		break;
 	case PF_BINAT:
 	case PF_NOBINAT:
 	default:
 		break;
 	}
 	return (problems);
 }
 
 int
 filter_consistent(struct pf_rule *r, int anchor_call)
 {
 	int	problems = 0;
 
 	if (r->proto != IPPROTO_TCP && r->proto != IPPROTO_UDP &&
 	    (r->src.port_op || r->dst.port_op)) {
 		yyerror("port only applies to tcp/udp");
 		problems++;
 	}
 	if (r->proto != IPPROTO_ICMP && r->proto != IPPROTO_ICMPV6 &&
 	    (r->type || r->code)) {
 		yyerror("icmp-type/code only applies to icmp");
 		problems++;
 	}
 	if (!r->af && (r->type || r->code)) {
 		yyerror("must indicate address family with icmp-type/code");
 		problems++;
 	}
 	if (r->overload_tblname[0] &&
 	    r->max_src_conn == 0 && r->max_src_conn_rate.seconds == 0) {
 		yyerror("'overload' requires 'max-src-conn' "
 		    "or 'max-src-conn-rate'");
 		problems++;
 	}
 	if ((r->proto == IPPROTO_ICMP && r->af == AF_INET6) ||
 	    (r->proto == IPPROTO_ICMPV6 && r->af == AF_INET)) {
 		yyerror("proto %s doesn't match address family %s",
 		    r->proto == IPPROTO_ICMP ? "icmp" : "icmp6",
 		    r->af == AF_INET ? "inet" : "inet6");
 		problems++;
 	}
 	if (r->allow_opts && r->action != PF_PASS) {
 		yyerror("allow-opts can only be specified for pass rules");
 		problems++;
 	}
 	if (r->rule_flag & PFRULE_FRAGMENT && (r->src.port_op ||
 	    r->dst.port_op || r->flagset || r->type || r->code)) {
 		yyerror("fragments can be filtered only on IP header fields");
 		problems++;
 	}
 	if (r->rule_flag & PFRULE_RETURNRST && r->proto != IPPROTO_TCP) {
 		yyerror("return-rst can only be applied to TCP rules");
 		problems++;
 	}
 	if (r->max_src_nodes && !(r->rule_flag & PFRULE_RULESRCTRACK)) {
 		yyerror("max-src-nodes requires 'source-track rule'");
 		problems++;
 	}
 	if (r->action == PF_DROP && r->keep_state) {
 		yyerror("keep state on block rules doesn't make sense");
 		problems++;
 	}
 	if (r->rule_flag & PFRULE_STATESLOPPY &&
 	    (r->keep_state == PF_STATE_MODULATE ||
 	    r->keep_state == PF_STATE_SYNPROXY)) {
 		yyerror("sloppy state matching cannot be used with "
 		    "synproxy state or modulate state");
 		problems++;
 	}
 	return (-problems);
 }
 
 int
 nat_consistent(struct pf_rule *r)
 {
 	return (0);	/* yeah! */
 }
 
 int
 rdr_consistent(struct pf_rule *r)
 {
 	int			 problems = 0;
 
 	if (r->proto != IPPROTO_TCP && r->proto != IPPROTO_UDP) {
 		if (r->src.port_op) {
 			yyerror("src port only applies to tcp/udp");
 			problems++;
 		}
 		if (r->dst.port_op) {
 			yyerror("dst port only applies to tcp/udp");
 			problems++;
 		}
 		if (r->rpool.proxy_port[0]) {
 			yyerror("rpool port only applies to tcp/udp");
 			problems++;
 		}
 	}
 	if (r->dst.port_op &&
 	    r->dst.port_op != PF_OP_EQ && r->dst.port_op != PF_OP_RRG) {
 		yyerror("invalid port operator for rdr destination port");
 		problems++;
 	}
 	return (-problems);
 }
 
 int
 process_tabledef(char *name, struct table_opts *opts)
 {
 	struct pfr_buffer	 ab;
 	struct node_tinit	*ti;
 
 	bzero(&ab, sizeof(ab));
 	ab.pfrb_type = PFRB_ADDRS;
 	SIMPLEQ_FOREACH(ti, &opts->init_nodes, entries) {
 		if (ti->file)
 			if (pfr_buf_load(&ab, ti->file, 0, append_addr)) {
 				if (errno)
 					yyerror("cannot load \"%s\": %s",
 					    ti->file, strerror(errno));
 				else
 					yyerror("file \"%s\" contains bad data",
 					    ti->file);
 				goto _error;
 			}
 		if (ti->host)
 			if (append_addr_host(&ab, ti->host, 0, 0)) {
 				yyerror("cannot create address buffer: %s",
 				    strerror(errno));
 				goto _error;
 			}
 	}
 	if (pf->opts & PF_OPT_VERBOSE)
 		print_tabledef(name, opts->flags, opts->init_addr,
 		    &opts->init_nodes);
 	if (!(pf->opts & PF_OPT_NOACTION) &&
 	    pfctl_define_table(name, opts->flags, opts->init_addr,
 	    pf->anchor->name, &ab, pf->anchor->ruleset.tticket)) {
 		yyerror("cannot define table %s: %s", name,
 		    pfr_strerror(errno));
 		goto _error;
 	}
 	pf->tdirty = 1;
 	pfr_buf_clear(&ab);
 	return (0);
 _error:
 	pfr_buf_clear(&ab);
 	return (-1);
 }
 
 struct keywords {
 	const char	*k_name;
 	int		 k_val;
 };
 
 /* macro gore, but you should've seen the prior indentation nightmare... */
 
 #define FREE_LIST(T,r) \
 	do { \
 		T *p, *node = r; \
 		while (node != NULL) { \
 			p = node; \
 			node = node->next; \
 			free(p); \
 		} \
 	} while (0)
 
 #define LOOP_THROUGH(T,n,r,C) \
 	do { \
 		T *n; \
 		if (r == NULL) { \
 			r = calloc(1, sizeof(T)); \
 			if (r == NULL) \
 				err(1, "LOOP: calloc"); \
 			r->next = NULL; \
 		} \
 		n = r; \
 		while (n != NULL) { \
 			do { \
 				C; \
 			} while (0); \
 			n = n->next; \
 		} \
 	} while (0)
 
 void
 expand_label_str(char *label, size_t len, const char *srch, const char *repl)
 {
 	char *tmp;
 	char *p, *q;
 
 	if ((tmp = calloc(1, len)) == NULL)
 		err(1, "expand_label_str: calloc");
 	p = q = label;
 	while ((q = strstr(p, srch)) != NULL) {
 		*q = '\0';
 		if ((strlcat(tmp, p, len) >= len) ||
 		    (strlcat(tmp, repl, len) >= len))
 			errx(1, "expand_label: label too long");
 		q += strlen(srch);
 		p = q;
 	}
 	if (strlcat(tmp, p, len) >= len)
 		errx(1, "expand_label: label too long");
 	strlcpy(label, tmp, len);	/* always fits */
 	free(tmp);
 }
 
 void
 expand_label_if(const char *name, char *label, size_t len, const char *ifname)
 {
 	if (strstr(label, name) != NULL) {
 		if (!*ifname)
 			expand_label_str(label, len, name, "any");
 		else
 			expand_label_str(label, len, name, ifname);
 	}
 }
 
 void
 expand_label_addr(const char *name, char *label, size_t len, sa_family_t af,
     struct node_host *h)
 {
 	char tmp[64], tmp_not[66];
 
 	if (strstr(label, name) != NULL) {
 		switch (h->addr.type) {
 		case PF_ADDR_DYNIFTL:
 			snprintf(tmp, sizeof(tmp), "(%s)", h->addr.v.ifname);
 			break;
 		case PF_ADDR_TABLE:
 			snprintf(tmp, sizeof(tmp), "<%s>", h->addr.v.tblname);
 			break;
 		case PF_ADDR_NOROUTE:
 			snprintf(tmp, sizeof(tmp), "no-route");
 			break;
 		case PF_ADDR_URPFFAILED:
 			snprintf(tmp, sizeof(tmp), "urpf-failed");
 			break;
 		case PF_ADDR_ADDRMASK:
 			if (!af || (PF_AZERO(&h->addr.v.a.addr, af) &&
 			    PF_AZERO(&h->addr.v.a.mask, af)))
 				snprintf(tmp, sizeof(tmp), "any");
 			else {
 				char	a[48];
 				int	bits;
 
 				if (inet_ntop(af, &h->addr.v.a.addr, a,
 				    sizeof(a)) == NULL)
 					snprintf(tmp, sizeof(tmp), "?");
 				else {
 					bits = unmask(&h->addr.v.a.mask, af);
 					if ((af == AF_INET && bits < 32) ||
 					    (af == AF_INET6 && bits < 128))
 						snprintf(tmp, sizeof(tmp),
 						    "%s/%d", a, bits);
 					else
 						snprintf(tmp, sizeof(tmp),
 						    "%s", a);
 				}
 			}
 			break;
 		default:
 			snprintf(tmp, sizeof(tmp), "?");
 			break;
 		}
 
 		if (h->not) {
 			snprintf(tmp_not, sizeof(tmp_not), "! %s", tmp);
 			expand_label_str(label, len, name, tmp_not);
 		} else
 			expand_label_str(label, len, name, tmp);
 	}
 }
 
 void
 expand_label_port(const char *name, char *label, size_t len,
     struct node_port *port)
 {
 	char	 a1[6], a2[6], op[13] = "";
 
 	if (strstr(label, name) != NULL) {
 		snprintf(a1, sizeof(a1), "%u", ntohs(port->port[0]));
 		snprintf(a2, sizeof(a2), "%u", ntohs(port->port[1]));
 		if (!port->op)
 			;
 		else if (port->op == PF_OP_IRG)
 			snprintf(op, sizeof(op), "%s><%s", a1, a2);
 		else if (port->op == PF_OP_XRG)
 			snprintf(op, sizeof(op), "%s<>%s", a1, a2);
 		else if (port->op == PF_OP_EQ)
 			snprintf(op, sizeof(op), "%s", a1);
 		else if (port->op == PF_OP_NE)
 			snprintf(op, sizeof(op), "!=%s", a1);
 		else if (port->op == PF_OP_LT)
 			snprintf(op, sizeof(op), "<%s", a1);
 		else if (port->op == PF_OP_LE)
 			snprintf(op, sizeof(op), "<=%s", a1);
 		else if (port->op == PF_OP_GT)
 			snprintf(op, sizeof(op), ">%s", a1);
 		else if (port->op == PF_OP_GE)
 			snprintf(op, sizeof(op), ">=%s", a1);
 		expand_label_str(label, len, name, op);
 	}
 }
 
 void
 expand_label_proto(const char *name, char *label, size_t len, u_int8_t proto)
 {
 	struct protoent *pe;
 	char n[4];
 
 	if (strstr(label, name) != NULL) {
 		pe = getprotobynumber(proto);
 		if (pe != NULL)
 			expand_label_str(label, len, name, pe->p_name);
 		else {
 			snprintf(n, sizeof(n), "%u", proto);
 			expand_label_str(label, len, name, n);
 		}
 	}
 }
 
 void
 expand_label_nr(const char *name, char *label, size_t len)
 {
 	char n[11];
 
 	if (strstr(label, name) != NULL) {
 		snprintf(n, sizeof(n), "%u", pf->anchor->match);
 		expand_label_str(label, len, name, n);
 	}
 }
 
 void
 expand_label(char *label, size_t len, const char *ifname, sa_family_t af,
     struct node_host *src_host, struct node_port *src_port,
     struct node_host *dst_host, struct node_port *dst_port,
     u_int8_t proto)
 {
 	expand_label_if("$if", label, len, ifname);
 	expand_label_addr("$srcaddr", label, len, af, src_host);
 	expand_label_addr("$dstaddr", label, len, af, dst_host);
 	expand_label_port("$srcport", label, len, src_port);
 	expand_label_port("$dstport", label, len, dst_port);
 	expand_label_proto("$proto", label, len, proto);
 	expand_label_nr("$nr", label, len);
 }
 
 int
 expand_altq(struct pf_altq *a, struct node_if *interfaces,
     struct node_queue *nqueues, struct node_queue_bw bwspec,
     struct node_queue_opt *opts)
 {
 	struct pf_altq		 pa, pb;
 	char			 qname[PF_QNAME_SIZE];
 	struct node_queue	*n;
 	struct node_queue_bw	 bw;
 	int			 errs = 0;
 
 	if ((pf->loadopt & PFCTL_FLAG_ALTQ) == 0) {
 		FREE_LIST(struct node_if, interfaces);
 		if (nqueues)
 			FREE_LIST(struct node_queue, nqueues);
 		return (0);
 	}
 
 	LOOP_THROUGH(struct node_if, interface, interfaces,
 		memcpy(&pa, a, sizeof(struct pf_altq));
 		if (strlcpy(pa.ifname, interface->ifname,
 		    sizeof(pa.ifname)) >= sizeof(pa.ifname))
 			errx(1, "expand_altq: strlcpy");
 
 		if (interface->not) {
 			yyerror("altq on ! <interface> is not supported");
 			errs++;
 		} else {
 			if (eval_pfaltq(pf, &pa, &bwspec, opts))
 				errs++;
 			else
 				if (pfctl_add_altq(pf, &pa))
 					errs++;
 
 			if (pf->opts & PF_OPT_VERBOSE) {
 				print_altq(&pf->paltq->altq, 0,
 				    &bwspec, opts);
 				if (nqueues && nqueues->tail) {
 					printf("queue { ");
 					LOOP_THROUGH(struct node_queue, queue,
 					    nqueues,
 						printf("%s ",
 						    queue->queue);
 					);
 					printf("}");
 				}
 				printf("\n");
 			}
 
 			if (pa.scheduler == ALTQT_CBQ ||
 			    pa.scheduler == ALTQT_HFSC) {
 				/* now create a root queue */
 				memset(&pb, 0, sizeof(struct pf_altq));
 				if (strlcpy(qname, "root_", sizeof(qname)) >=
 				    sizeof(qname))
 					errx(1, "expand_altq: strlcpy");
 				if (strlcat(qname, interface->ifname,
 				    sizeof(qname)) >= sizeof(qname))
 					errx(1, "expand_altq: strlcat");
 				if (strlcpy(pb.qname, qname,
 				    sizeof(pb.qname)) >= sizeof(pb.qname))
 					errx(1, "expand_altq: strlcpy");
 				if (strlcpy(pb.ifname, interface->ifname,
 				    sizeof(pb.ifname)) >= sizeof(pb.ifname))
 					errx(1, "expand_altq: strlcpy");
 				pb.qlimit = pa.qlimit;
 				pb.scheduler = pa.scheduler;
 				bw.bw_absolute = pa.ifbandwidth;
 				bw.bw_percent = 0;
 				if (eval_pfqueue(pf, &pb, &bw, opts))
 					errs++;
 				else
 					if (pfctl_add_altq(pf, &pb))
 						errs++;
 			}
 
 			LOOP_THROUGH(struct node_queue, queue, nqueues,
 				n = calloc(1, sizeof(struct node_queue));
 				if (n == NULL)
 					err(1, "expand_altq: calloc");
 				if (pa.scheduler == ALTQT_CBQ ||
 				    pa.scheduler == ALTQT_HFSC)
 					if (strlcpy(n->parent, qname,
 					    sizeof(n->parent)) >=
 					    sizeof(n->parent))
 						errx(1, "expand_altq: strlcpy");
 				if (strlcpy(n->queue, queue->queue,
 				    sizeof(n->queue)) >= sizeof(n->queue))
 					errx(1, "expand_altq: strlcpy");
 				if (strlcpy(n->ifname, interface->ifname,
 				    sizeof(n->ifname)) >= sizeof(n->ifname))
 					errx(1, "expand_altq: strlcpy");
 				n->scheduler = pa.scheduler;
 				n->next = NULL;
 				n->tail = n;
 				if (queues == NULL)
 					queues = n;
 				else {
 					queues->tail->next = n;
 					queues->tail = n;
 				}
 			);
 		}
 	);
 	FREE_LIST(struct node_if, interfaces);
 	if (nqueues)
 		FREE_LIST(struct node_queue, nqueues);
 
 	return (errs);
 }
 
 int
 expand_queue(struct pf_altq *a, struct node_if *interfaces,
     struct node_queue *nqueues, struct node_queue_bw bwspec,
     struct node_queue_opt *opts)
 {
 	struct node_queue	*n, *nq;
 	struct pf_altq		 pa;
 	u_int8_t		 found = 0;
 	u_int8_t		 errs = 0;
 
 	if ((pf->loadopt & PFCTL_FLAG_ALTQ) == 0) {
 		FREE_LIST(struct node_queue, nqueues);
 		return (0);
 	}
 
 	if (queues == NULL) {
 		yyerror("queue %s has no parent", a->qname);
 		FREE_LIST(struct node_queue, nqueues);
 		return (1);
 	}
 
 	LOOP_THROUGH(struct node_if, interface, interfaces,
 		LOOP_THROUGH(struct node_queue, tqueue, queues,
 			if (!strncmp(a->qname, tqueue->queue, PF_QNAME_SIZE) &&
 			    (interface->ifname[0] == 0 ||
 			    (!interface->not && !strncmp(interface->ifname,
 			    tqueue->ifname, IFNAMSIZ)) ||
 			    (interface->not && strncmp(interface->ifname,
 			    tqueue->ifname, IFNAMSIZ)))) {
 				/* found ourself in queues */
 				found++;
 
 				memcpy(&pa, a, sizeof(struct pf_altq));
 
 				if (pa.scheduler != ALTQT_NONE &&
 				    pa.scheduler != tqueue->scheduler) {
 					yyerror("exactly one scheduler type "
 					    "per interface allowed");
 					return (1);
 				}
 				pa.scheduler = tqueue->scheduler;
 
 				/* scheduler dependent error checking */
 				switch (pa.scheduler) {
 				case ALTQT_PRIQ:
 					if (nqueues != NULL) {
 						yyerror("priq queues cannot "
 						    "have child queues");
 						return (1);
 					}
 					if (bwspec.bw_absolute > 0 ||
 					    bwspec.bw_percent < 100) {
 						yyerror("priq doesn't take "
 						    "bandwidth");
 						return (1);
 					}
 					break;
 				default:
 					break;
 				}
 
 				if (strlcpy(pa.ifname, tqueue->ifname,
 				    sizeof(pa.ifname)) >= sizeof(pa.ifname))
 					errx(1, "expand_queue: strlcpy");
 				if (strlcpy(pa.parent, tqueue->parent,
 				    sizeof(pa.parent)) >= sizeof(pa.parent))
 					errx(1, "expand_queue: strlcpy");
 
 				if (eval_pfqueue(pf, &pa, &bwspec, opts))
 					errs++;
 				else
 					if (pfctl_add_altq(pf, &pa))
 						errs++;
 
 				for (nq = nqueues; nq != NULL; nq = nq->next) {
 					if (!strcmp(a->qname, nq->queue)) {
 						yyerror("queue cannot have "
 						    "itself as child");
 						errs++;
 						continue;
 					}
 					n = calloc(1,
 					    sizeof(struct node_queue));
 					if (n == NULL)
 						err(1, "expand_queue: calloc");
 					if (strlcpy(n->parent, a->qname,
 					    sizeof(n->parent)) >=
 					    sizeof(n->parent))
 						errx(1, "expand_queue strlcpy");
 					if (strlcpy(n->queue, nq->queue,
 					    sizeof(n->queue)) >=
 					    sizeof(n->queue))
 						errx(1, "expand_queue strlcpy");
 					if (strlcpy(n->ifname, tqueue->ifname,
 					    sizeof(n->ifname)) >=
 					    sizeof(n->ifname))
 						errx(1, "expand_queue strlcpy");
 					n->scheduler = tqueue->scheduler;
 					n->next = NULL;
 					n->tail = n;
 					if (queues == NULL)
 						queues = n;
 					else {
 						queues->tail->next = n;
 						queues->tail = n;
 					}
 				}
 				if ((pf->opts & PF_OPT_VERBOSE) && (
 				    (found == 1 && interface->ifname[0] == 0) ||
 				    (found > 0 && interface->ifname[0] != 0))) {
 					print_queue(&pf->paltq->altq, 0,
 					    &bwspec, interface->ifname[0] != 0,
 					    opts);
 					if (nqueues && nqueues->tail) {
 						printf("{ ");
 						LOOP_THROUGH(struct node_queue,
 						    queue, nqueues,
 							printf("%s ",
 							    queue->queue);
 						);
 						printf("}");
 					}
 					printf("\n");
 				}
 			}
 		);
 	);
 
 	FREE_LIST(struct node_queue, nqueues);
 	FREE_LIST(struct node_if, interfaces);
 
 	if (!found) {
 		yyerror("queue %s has no parent", a->qname);
 		errs++;
 	}
 
 	if (errs)
 		return (1);
 	else
 		return (0);
 }
 
 void
 expand_rule(struct pf_rule *r,
     struct node_if *interfaces, struct node_host *rpool_hosts,
     struct node_proto *protos, struct node_os *src_oses,
     struct node_host *src_hosts, struct node_port *src_ports,
     struct node_host *dst_hosts, struct node_port *dst_ports,
     struct node_uid *uids, struct node_gid *gids, struct node_icmp *icmp_types,
     const char *anchor_call)
 {
 	sa_family_t		 af = r->af;
 	int			 added = 0, error = 0;
 	char			 ifname[IF_NAMESIZE];
 	char			 label[PF_RULE_LABEL_SIZE];
 	char			 tagname[PF_TAG_NAME_SIZE];
 	char			 match_tagname[PF_TAG_NAME_SIZE];
 	struct pf_pooladdr	*pa;
 	struct node_host	*h;
 	u_int8_t		 flags, flagset, keep_state;
 
 	if (strlcpy(label, r->label, sizeof(label)) >= sizeof(label))
 		errx(1, "expand_rule: strlcpy");
 	if (strlcpy(tagname, r->tagname, sizeof(tagname)) >= sizeof(tagname))
 		errx(1, "expand_rule: strlcpy");
 	if (strlcpy(match_tagname, r->match_tagname, sizeof(match_tagname)) >=
 	    sizeof(match_tagname))
 		errx(1, "expand_rule: strlcpy");
 	flags = r->flags;
 	flagset = r->flagset;
 	keep_state = r->keep_state;
 
 	LOOP_THROUGH(struct node_if, interface, interfaces,
 	LOOP_THROUGH(struct node_proto, proto, protos,
 	LOOP_THROUGH(struct node_icmp, icmp_type, icmp_types,
 	LOOP_THROUGH(struct node_host, src_host, src_hosts,
 	LOOP_THROUGH(struct node_port, src_port, src_ports,
 	LOOP_THROUGH(struct node_os, src_os, src_oses,
 	LOOP_THROUGH(struct node_host, dst_host, dst_hosts,
 	LOOP_THROUGH(struct node_port, dst_port, dst_ports,
 	LOOP_THROUGH(struct node_uid, uid, uids,
 	LOOP_THROUGH(struct node_gid, gid, gids,
 
 		r->af = af;
 		/* for link-local IPv6 address, interface must match up */
 		if ((r->af && src_host->af && r->af != src_host->af) ||
 		    (r->af && dst_host->af && r->af != dst_host->af) ||
 		    (src_host->af && dst_host->af &&
 		    src_host->af != dst_host->af) ||
 		    (src_host->ifindex && dst_host->ifindex &&
 		    src_host->ifindex != dst_host->ifindex) ||
 		    (src_host->ifindex && *interface->ifname &&
 		    src_host->ifindex != if_nametoindex(interface->ifname)) ||
 		    (dst_host->ifindex && *interface->ifname &&
 		    dst_host->ifindex != if_nametoindex(interface->ifname)))
 			continue;
 		if (!r->af && src_host->af)
 			r->af = src_host->af;
 		else if (!r->af && dst_host->af)
 			r->af = dst_host->af;
 
 		if (*interface->ifname)
 			strlcpy(r->ifname, interface->ifname,
 			    sizeof(r->ifname));
 		else if (if_indextoname(src_host->ifindex, ifname))
 			strlcpy(r->ifname, ifname, sizeof(r->ifname));
 		else if (if_indextoname(dst_host->ifindex, ifname))
 			strlcpy(r->ifname, ifname, sizeof(r->ifname));
 		else
 			memset(r->ifname, '\0', sizeof(r->ifname));
 
 		if (strlcpy(r->label, label, sizeof(r->label)) >=
 		    sizeof(r->label))
 			errx(1, "expand_rule: strlcpy");
 		if (strlcpy(r->tagname, tagname, sizeof(r->tagname)) >=
 		    sizeof(r->tagname))
 			errx(1, "expand_rule: strlcpy");
 		if (strlcpy(r->match_tagname, match_tagname,
 		    sizeof(r->match_tagname)) >= sizeof(r->match_tagname))
 			errx(1, "expand_rule: strlcpy");
 		expand_label(r->label, PF_RULE_LABEL_SIZE, r->ifname, r->af,
 		    src_host, src_port, dst_host, dst_port, proto->proto);
 		expand_label(r->tagname, PF_TAG_NAME_SIZE, r->ifname, r->af,
 		    src_host, src_port, dst_host, dst_port, proto->proto);
 		expand_label(r->match_tagname, PF_TAG_NAME_SIZE, r->ifname,
 		    r->af, src_host, src_port, dst_host, dst_port,
 		    proto->proto);
 
 		error += check_netmask(src_host, r->af);
 		error += check_netmask(dst_host, r->af);
 
 		r->ifnot = interface->not;
 		r->proto = proto->proto;
 		r->src.addr = src_host->addr;
 		r->src.neg = src_host->not;
 		r->src.port[0] = src_port->port[0];
 		r->src.port[1] = src_port->port[1];
 		r->src.port_op = src_port->op;
 		r->dst.addr = dst_host->addr;
 		r->dst.neg = dst_host->not;
 		r->dst.port[0] = dst_port->port[0];
 		r->dst.port[1] = dst_port->port[1];
 		r->dst.port_op = dst_port->op;
 		r->uid.op = uid->op;
 		r->uid.uid[0] = uid->uid[0];
 		r->uid.uid[1] = uid->uid[1];
 		r->gid.op = gid->op;
 		r->gid.gid[0] = gid->gid[0];
 		r->gid.gid[1] = gid->gid[1];
 		r->type = icmp_type->type;
 		r->code = icmp_type->code;
 
 		if ((keep_state == PF_STATE_MODULATE ||
 		    keep_state == PF_STATE_SYNPROXY) &&
 		    r->proto && r->proto != IPPROTO_TCP)
 			r->keep_state = PF_STATE_NORMAL;
 		else
 			r->keep_state = keep_state;
 
 		if (r->proto && r->proto != IPPROTO_TCP) {
 			r->flags = 0;
 			r->flagset = 0;
 		} else {
 			r->flags = flags;
 			r->flagset = flagset;
 		}
 		if (icmp_type->proto && r->proto != icmp_type->proto) {
 			yyerror("icmp-type mismatch");
 			error++;
 		}
 
 		if (src_os && src_os->os) {
 			r->os_fingerprint = pfctl_get_fingerprint(src_os->os);
 			if ((pf->opts & PF_OPT_VERBOSE2) &&
 			    r->os_fingerprint == PF_OSFP_NOMATCH)
 				fprintf(stderr,
 				    "warning: unknown '%s' OS fingerprint\n",
 				    src_os->os);
 		} else {
 			r->os_fingerprint = PF_OSFP_ANY;
 		}
 
 		TAILQ_INIT(&r->rpool.list);
 		for (h = rpool_hosts; h != NULL; h = h->next) {
 			pa = calloc(1, sizeof(struct pf_pooladdr));
 			if (pa == NULL)
 				err(1, "expand_rule: calloc");
 			pa->addr = h->addr;
 			if (h->ifname != NULL) {
 				if (strlcpy(pa->ifname, h->ifname,
 				    sizeof(pa->ifname)) >=
 				    sizeof(pa->ifname))
 					errx(1, "expand_rule: strlcpy");
 			} else
 				pa->ifname[0] = 0;
 			TAILQ_INSERT_TAIL(&r->rpool.list, pa, entries);
 		}
 
 		if (rule_consistent(r, anchor_call[0]) < 0 || error)
 			yyerror("skipping rule due to errors");
 		else {
 			r->nr = pf->astack[pf->asd]->match++;
 			pfctl_add_rule(pf, r, anchor_call);
 			added++;
 		}
 
 	))))))))));
 
 	FREE_LIST(struct node_if, interfaces);
 	FREE_LIST(struct node_proto, protos);
 	FREE_LIST(struct node_host, src_hosts);
 	FREE_LIST(struct node_port, src_ports);
 	FREE_LIST(struct node_os, src_oses);
 	FREE_LIST(struct node_host, dst_hosts);
 	FREE_LIST(struct node_port, dst_ports);
 	FREE_LIST(struct node_uid, uids);
 	FREE_LIST(struct node_gid, gids);
 	FREE_LIST(struct node_icmp, icmp_types);
 	FREE_LIST(struct node_host, rpool_hosts);
 
 	if (!added)
 		yyerror("rule expands to no valid combination");
 }
 
 int
 expand_skip_interface(struct node_if *interfaces)
 {
 	int	errs = 0;
 
 	if (!interfaces || (!interfaces->next && !interfaces->not &&
 	    !strcmp(interfaces->ifname, "none"))) {
 		if (pf->opts & PF_OPT_VERBOSE)
 			printf("set skip on none\n");
 		errs = pfctl_set_interface_flags(pf, "", PFI_IFLAG_SKIP, 0);
 		return (errs);
 	}
 
 	if (pf->opts & PF_OPT_VERBOSE)
 		printf("set skip on {");
 	LOOP_THROUGH(struct node_if, interface, interfaces,
 		if (pf->opts & PF_OPT_VERBOSE)
 			printf(" %s", interface->ifname);
 		if (interface->not) {
 			yyerror("skip on ! <interface> is not supported");
 			errs++;
 		} else
 			errs += pfctl_set_interface_flags(pf,
 			    interface->ifname, PFI_IFLAG_SKIP, 1);
 	);
 	if (pf->opts & PF_OPT_VERBOSE)
 		printf(" }\n");
 
 	FREE_LIST(struct node_if, interfaces);
 
 	if (errs)
 		return (1);
 	else
 		return (0);
 }
 
 #undef FREE_LIST
 #undef LOOP_THROUGH
 
 int
 check_rulestate(int desired_state)
 {
 	if (require_order && (rulestate > desired_state)) {
 		yyerror("Rules must be in order: options, normalization, "
 		    "queueing, translation, filtering");
 		return (1);
 	}
 	rulestate = desired_state;
 	return (0);
 }
 
 int
 kw_cmp(const void *k, const void *e)
 {
 	return (strcmp(k, ((const struct keywords *)e)->k_name));
 }
 
 int
 lookup(char *s)
 {
 	/* this has to be sorted always */
 	static const struct keywords keywords[] = {
 		{ "all",		ALL},
 		{ "allow-opts",		ALLOWOPTS},
 		{ "altq",		ALTQ},
 		{ "anchor",		ANCHOR},
 		{ "antispoof",		ANTISPOOF},
 		{ "any",		ANY},
 		{ "bandwidth",		BANDWIDTH},
 		{ "binat",		BINAT},
 		{ "binat-anchor",	BINATANCHOR},
 		{ "bitmask",		BITMASK},
 		{ "block",		BLOCK},
 		{ "block-policy",	BLOCKPOLICY},
 		{ "buckets",		BUCKETS},
 		{ "cbq",		CBQ},
 		{ "code",		CODE},
 		{ "codelq",		CODEL},
 		{ "crop",		FRAGCROP},
 		{ "debug",		DEBUG},
 		{ "divert-reply",	DIVERTREPLY},
 		{ "divert-to",		DIVERTTO},
 		{ "drop",		DROP},
 		{ "drop-ovl",		FRAGDROP},
 		{ "dup-to",		DUPTO},
 		{ "fairq",		FAIRQ},
 		{ "fastroute",		FASTROUTE},
 		{ "file",		FILENAME},
 		{ "fingerprints",	FINGERPRINTS},
 		{ "flags",		FLAGS},
 		{ "floating",		FLOATING},
 		{ "flush",		FLUSH},
 		{ "for",		FOR},
 		{ "fragment",		FRAGMENT},
 		{ "from",		FROM},
 		{ "global",		GLOBAL},
 		{ "group",		GROUP},
 		{ "hfsc",		HFSC},
 		{ "hogs",		HOGS},
 		{ "hostid",		HOSTID},
 		{ "icmp-type",		ICMPTYPE},
 		{ "icmp6-type",		ICMP6TYPE},
 		{ "if-bound",		IFBOUND},
 		{ "in",			IN},
 		{ "include",		INCLUDE},
 		{ "inet",		INET},
 		{ "inet6",		INET6},
 		{ "interval",		INTERVAL},
 		{ "keep",		KEEP},
 		{ "label",		LABEL},
 		{ "limit",		LIMIT},
 		{ "linkshare",		LINKSHARE},
 		{ "load",		LOAD},
 		{ "log",		LOG},
 		{ "loginterface",	LOGINTERFACE},
 		{ "max",		MAXIMUM},
 		{ "max-mss",		MAXMSS},
 		{ "max-src-conn",	MAXSRCCONN},
 		{ "max-src-conn-rate",	MAXSRCCONNRATE},
 		{ "max-src-nodes",	MAXSRCNODES},
 		{ "max-src-states",	MAXSRCSTATES},
 		{ "min-ttl",		MINTTL},
 		{ "modulate",		MODULATE},
 		{ "nat",		NAT},
 		{ "nat-anchor",		NATANCHOR},
 		{ "no",			NO},
 		{ "no-df",		NODF},
 		{ "no-route",		NOROUTE},
 		{ "no-sync",		NOSYNC},
 		{ "on",			ON},
 		{ "optimization",	OPTIMIZATION},
 		{ "os",			OS},
 		{ "out",		OUT},
 		{ "overload",		OVERLOAD},
 		{ "pass",		PASS},
 		{ "port",		PORT},
 		{ "prio",		PRIO},
 		{ "priority",		PRIORITY},
 		{ "priq",		PRIQ},
 		{ "probability",	PROBABILITY},
 		{ "proto",		PROTO},
 		{ "qlimit",		QLIMIT},
 		{ "queue",		QUEUE},
 		{ "quick",		QUICK},
 		{ "random",		RANDOM},
 		{ "random-id",		RANDOMID},
 		{ "rdr",		RDR},
 		{ "rdr-anchor",		RDRANCHOR},
 		{ "realtime",		REALTIME},
 		{ "reassemble",		REASSEMBLE},
 		{ "reply-to",		REPLYTO},
 		{ "require-order",	REQUIREORDER},
 		{ "return",		RETURN},
 		{ "return-icmp",	RETURNICMP},
 		{ "return-icmp6",	RETURNICMP6},
 		{ "return-rst",		RETURNRST},
 		{ "round-robin",	ROUNDROBIN},
 		{ "route",		ROUTE},
 		{ "route-to",		ROUTETO},
 		{ "rtable",		RTABLE},
 		{ "rule",		RULE},
 		{ "ruleset-optimization",	RULESET_OPTIMIZATION},
 		{ "scrub",		SCRUB},
 		{ "set",		SET},
 		{ "set-tos",		SETTOS},
 		{ "skip",		SKIP},
 		{ "sloppy",		SLOPPY},
 		{ "source-hash",	SOURCEHASH},
 		{ "source-track",	SOURCETRACK},
 		{ "state",		STATE},
 		{ "state-defaults",	STATEDEFAULTS},
 		{ "state-policy",	STATEPOLICY},
 		{ "static-port",	STATICPORT},
 		{ "sticky-address",	STICKYADDRESS},
 		{ "synproxy",		SYNPROXY},
 		{ "table",		TABLE},
 		{ "tag",		TAG},
 		{ "tagged",		TAGGED},
 		{ "target",		TARGET},
 		{ "tbrsize",		TBRSIZE},
 		{ "timeout",		TIMEOUT},
 		{ "to",			TO},
 		{ "tos",		TOS},
 		{ "ttl",		TTL},
 		{ "upperlimit",		UPPERLIMIT},
 		{ "urpf-failed",	URPFFAILED},
 		{ "user",		USER},
 	};
 	const struct keywords	*p;
 
 	p = bsearch(s, keywords, sizeof(keywords)/sizeof(keywords[0]),
 	    sizeof(keywords[0]), kw_cmp);
 
 	if (p) {
 		if (debug > 1)
 			fprintf(stderr, "%s: %d\n", s, p->k_val);
 		return (p->k_val);
 	} else {
 		if (debug > 1)
 			fprintf(stderr, "string: %s\n", s);
 		return (STRING);
 	}
 }
 
 #define MAXPUSHBACK	128
 
 char	*parsebuf;
 int	 parseindex;
 char	 pushback_buffer[MAXPUSHBACK];
 int	 pushback_index = 0;
 
 int
 lgetc(int quotec)
 {
 	int		c, next;
 
 	if (parsebuf) {
 		/* Read character from the parsebuffer instead of input. */
 		if (parseindex >= 0) {
 			c = parsebuf[parseindex++];
 			if (c != '\0')
 				return (c);
 			parsebuf = NULL;
 		} else
 			parseindex++;
 	}
 
 	if (pushback_index)
 		return (pushback_buffer[--pushback_index]);
 
 	if (quotec) {
 		if ((c = getc(file->stream)) == EOF) {
 			yyerror("reached end of file while parsing quoted string");
 			if (popfile() == EOF)
 				return (EOF);
 			return (quotec);
 		}
 		return (c);
 	}
 
 	while ((c = getc(file->stream)) == '\\') {
 		next = getc(file->stream);
 		if (next != '\n') {
 			c = next;
 			break;
 		}
 		yylval.lineno = file->lineno;
 		file->lineno++;
 	}
 
 	while (c == EOF) {
 		if (popfile() == EOF)
 			return (EOF);
 		c = getc(file->stream);
 	}
 	return (c);
 }
 
 int
 lungetc(int c)
 {
 	if (c == EOF)
 		return (EOF);
 	if (parsebuf) {
 		parseindex--;
 		if (parseindex >= 0)
 			return (c);
 	}
 	if (pushback_index < MAXPUSHBACK-1)
 		return (pushback_buffer[pushback_index++] = c);
 	else
 		return (EOF);
 }
 
 int
 findeol(void)
 {
 	int	c;
 
 	parsebuf = NULL;
 
 	/* skip to either EOF or the first real EOL */
 	while (1) {
 		if (pushback_index)
 			c = pushback_buffer[--pushback_index];
 		else
 			c = lgetc(0);
 		if (c == '\n') {
 			file->lineno++;
 			break;
 		}
 		if (c == EOF)
 			break;
 	}
 	return (ERROR);
 }
 
 int
 yylex(void)
 {
 	char	 buf[8096];
 	char	*p, *val;
 	int	 quotec, next, c;
 	int	 token;
 
 top:
 	p = buf;
 	while ((c = lgetc(0)) == ' ' || c == '\t')
 		; /* nothing */
 
 	yylval.lineno = file->lineno;
 	if (c == '#')
 		while ((c = lgetc(0)) != '\n' && c != EOF)
 			; /* nothing */
 	if (c == '$' && parsebuf == NULL) {
 		while (1) {
 			if ((c = lgetc(0)) == EOF)
 				return (0);
 
 			if (p + 1 >= buf + sizeof(buf) - 1) {
 				yyerror("string too long");
 				return (findeol());
 			}
 			if (isalnum(c) || c == '_') {
 				*p++ = (char)c;
 				continue;
 			}
 			*p = '\0';
 			lungetc(c);
 			break;
 		}
 		val = symget(buf);
 		if (val == NULL) {
 			yyerror("macro '%s' not defined", buf);
 			return (findeol());
 		}
 		parsebuf = val;
 		parseindex = 0;
 		goto top;
 	}
 
 	switch (c) {
 	case '\'':
 	case '"':
 		quotec = c;
 		while (1) {
 			if ((c = lgetc(quotec)) == EOF)
 				return (0);
 			if (c == '\n') {
 				file->lineno++;
 				continue;
 			} else if (c == '\\') {
 				if ((next = lgetc(quotec)) == EOF)
 					return (0);
 				if (next == quotec || c == ' ' || c == '\t')
 					c = next;
 				else if (next == '\n')
 					continue;
 				else
 					lungetc(next);
 			} else if (c == quotec) {
 				*p = '\0';
 				break;
 			}
 			if (p + 1 >= buf + sizeof(buf) - 1) {
 				yyerror("string too long");
 				return (findeol());
 			}
 			*p++ = (char)c;
 		}
 		yylval.v.string = strdup(buf);
 		if (yylval.v.string == NULL)
 			err(1, "yylex: strdup");
 		return (STRING);
 	case '<':
 		next = lgetc(0);
 		if (next == '>') {
 			yylval.v.i = PF_OP_XRG;
 			return (PORTBINARY);
 		}
 		lungetc(next);
 		break;
 	case '>':
 		next = lgetc(0);
 		if (next == '<') {
 			yylval.v.i = PF_OP_IRG;
 			return (PORTBINARY);
 		}
 		lungetc(next);
 		break;
 	case '-':
 		next = lgetc(0);
 		if (next == '>')
 			return (ARROW);
 		lungetc(next);
 		break;
 	}
 
 #define allowed_to_end_number(x) \
 	(isspace(x) || x == ')' || x ==',' || x == '/' || x == '}' || x == '=')
 
 	if (c == '-' || isdigit(c)) {
 		do {
 			*p++ = c;
 			if ((unsigned)(p-buf) >= sizeof(buf)) {
 				yyerror("string too long");
 				return (findeol());
 			}
 		} while ((c = lgetc(0)) != EOF && isdigit(c));
 		lungetc(c);
 		if (p == buf + 1 && buf[0] == '-')
 			goto nodigits;
 		if (c == EOF || allowed_to_end_number(c)) {
 			const char *errstr = NULL;
 
 			*p = '\0';
 			yylval.v.number = strtonum(buf, LLONG_MIN,
 			    LLONG_MAX, &errstr);
 			if (errstr) {
 				yyerror("\"%s\" invalid number: %s",
 				    buf, errstr);
 				return (findeol());
 			}
 			return (NUMBER);
 		} else {
 nodigits:
 			while (p > buf + 1)
 				lungetc(*--p);
 			c = *--p;
 			if (c == '-')
 				return (c);
 		}
 	}
 
 #define allowed_in_string(x) \
 	(isalnum(x) || (ispunct(x) && x != '(' && x != ')' && \
 	x != '{' && x != '}' && x != '<' && x != '>' && \
 	x != '!' && x != '=' && x != '/' && x != '#' && \
 	x != ','))
 
 	if (isalnum(c) || c == ':' || c == '_') {
 		do {
 			*p++ = c;
 			if ((unsigned)(p-buf) >= sizeof(buf)) {
 				yyerror("string too long");
 				return (findeol());
 			}
 		} while ((c = lgetc(0)) != EOF && (allowed_in_string(c)));
 		lungetc(c);
 		*p = '\0';
 		if ((token = lookup(buf)) == STRING)
 			if ((yylval.v.string = strdup(buf)) == NULL)
 				err(1, "yylex: strdup");
 		return (token);
 	}
 	if (c == '\n') {
 		yylval.lineno = file->lineno;
 		file->lineno++;
 	}
 	if (c == EOF)
 		return (0);
 	return (c);
 }
 
 int
 check_file_secrecy(int fd, const char *fname)
 {
 	struct stat	st;
 
 	if (fstat(fd, &st)) {
 		warn("cannot stat %s", fname);
 		return (-1);
 	}
 	if (st.st_uid != 0 && st.st_uid != getuid()) {
 		warnx("%s: owner not root or current user", fname);
 		return (-1);
 	}
 	if (st.st_mode & (S_IRWXG | S_IRWXO)) {
 		warnx("%s: group/world readable/writeable", fname);
 		return (-1);
 	}
 	return (0);
 }
 
 struct file *
 pushfile(const char *name, int secret)
 {
 	struct file	*nfile;
 
 	if ((nfile = calloc(1, sizeof(struct file))) == NULL ||
 	    (nfile->name = strdup(name)) == NULL) {
 		warn("malloc");
 		return (NULL);
 	}
 	if (TAILQ_FIRST(&files) == NULL && strcmp(nfile->name, "-") == 0) {
 		nfile->stream = stdin;
 		free(nfile->name);
 		if ((nfile->name = strdup("stdin")) == NULL) {
 			warn("strdup");
 			free(nfile);
 			return (NULL);
 		}
 	} else if ((nfile->stream = fopen(nfile->name, "r")) == NULL) {
 		warn("%s", nfile->name);
 		free(nfile->name);
 		free(nfile);
 		return (NULL);
 	} else if (secret &&
 	    check_file_secrecy(fileno(nfile->stream), nfile->name)) {
 		fclose(nfile->stream);
 		free(nfile->name);
 		free(nfile);
 		return (NULL);
 	}
 	nfile->lineno = 1;
 	TAILQ_INSERT_TAIL(&files, nfile, entry);
 	return (nfile);
 }
 
 int
 popfile(void)
 {
 	struct file	*prev;
 
 	if ((prev = TAILQ_PREV(file, files, entry)) != NULL) {
 		prev->errors += file->errors;
 		TAILQ_REMOVE(&files, file, entry);
 		fclose(file->stream);
 		free(file->name);
 		free(file);
 		file = prev;
 		return (0);
 	}
 	return (EOF);
 }
 
 int
 parse_config(char *filename, struct pfctl *xpf)
 {
 	int		 errors = 0;
 	struct sym	*sym;
 
 	pf = xpf;
 	errors = 0;
 	rulestate = PFCTL_STATE_NONE;
 	returnicmpdefault = (ICMP_UNREACH << 8) | ICMP_UNREACH_PORT;
 	returnicmp6default =
 	    (ICMP6_DST_UNREACH << 8) | ICMP6_DST_UNREACH_NOPORT;
 	blockpolicy = PFRULE_DROP;
 	require_order = 1;
 
 	if ((file = pushfile(filename, 0)) == NULL) {
 		warn("cannot open the main config file!");
 		return (-1);
 	}
 
 	yyparse();
 	errors = file->errors;
 	popfile();
 
 	/* Free macros and check which have not been used. */
 	while ((sym = TAILQ_FIRST(&symhead))) {
 		if ((pf->opts & PF_OPT_VERBOSE2) && !sym->used)
 			fprintf(stderr, "warning: macro '%s' not "
 			    "used\n", sym->nam);
 		free(sym->nam);
 		free(sym->val);
 		TAILQ_REMOVE(&symhead, sym, entry);
 		free(sym);
 	}
 
 	return (errors ? -1 : 0);
 }
 
 int
 symset(const char *nam, const char *val, int persist)
 {
 	struct sym	*sym;
 
 	for (sym = TAILQ_FIRST(&symhead); sym && strcmp(nam, sym->nam);
 	    sym = TAILQ_NEXT(sym, entry))
 		;	/* nothing */
 
 	if (sym != NULL) {
 		if (sym->persist == 1)
 			return (0);
 		else {
 			free(sym->nam);
 			free(sym->val);
 			TAILQ_REMOVE(&symhead, sym, entry);
 			free(sym);
 		}
 	}
 	if ((sym = calloc(1, sizeof(*sym))) == NULL)
 		return (-1);
 
 	sym->nam = strdup(nam);
 	if (sym->nam == NULL) {
 		free(sym);
 		return (-1);
 	}
 	sym->val = strdup(val);
 	if (sym->val == NULL) {
 		free(sym->nam);
 		free(sym);
 		return (-1);
 	}
 	sym->used = 0;
 	sym->persist = persist;
 	TAILQ_INSERT_TAIL(&symhead, sym, entry);
 	return (0);
 }
 
 int
 pfctl_cmdline_symset(char *s)
 {
 	char	*sym, *val;
 	int	 ret;
 
 	if ((val = strrchr(s, '=')) == NULL)
 		return (-1);
 
 	if ((sym = malloc(strlen(s) - strlen(val) + 1)) == NULL)
 		err(1, "pfctl_cmdline_symset: malloc");
 
 	strlcpy(sym, s, strlen(s) - strlen(val) + 1);
 
 	ret = symset(sym, val + 1, 1);
 	free(sym);
 
 	return (ret);
 }
 
 char *
 symget(const char *nam)
 {
 	struct sym	*sym;
 
 	TAILQ_FOREACH(sym, &symhead, entry)
 		if (strcmp(nam, sym->nam) == 0) {
 			sym->used = 1;
 			return (sym->val);
 		}
 	return (NULL);
 }
 
 void
 mv_rules(struct pf_ruleset *src, struct pf_ruleset *dst)
 {
 	int i;
 	struct pf_rule *r;
 
 	for (i = 0; i < PF_RULESET_MAX; ++i) {
 		while ((r = TAILQ_FIRST(src->rules[i].active.ptr))
 		    != NULL) {
 			TAILQ_REMOVE(src->rules[i].active.ptr, r, entries);
 			TAILQ_INSERT_TAIL(dst->rules[i].active.ptr, r, entries);
 			dst->anchor->match++;
 		}
 		src->anchor->match = 0;
 		while ((r = TAILQ_FIRST(src->rules[i].inactive.ptr))
 		    != NULL) {
 			TAILQ_REMOVE(src->rules[i].inactive.ptr, r, entries);
 			TAILQ_INSERT_TAIL(dst->rules[i].inactive.ptr,
 				r, entries);
 		}
 	}
 }
 
 void
 decide_address_family(struct node_host *n, sa_family_t *af)
 {
 	if (*af != 0 || n == NULL)
 		return;
 	*af = n->af;
 	while ((n = n->next) != NULL) {
 		if (n->af != *af) {
 			*af = 0;
 			return;
 		}
 	}
 }
 
 void
 remove_invalid_hosts(struct node_host **nh, sa_family_t *af)
 {
 	struct node_host	*n = *nh, *prev = NULL;
 
 	while (n != NULL) {
 		if (*af && n->af && n->af != *af) {
 			/* unlink and free n */
 			struct node_host *next = n->next;
 
 			/* adjust tail pointer */
 			if (n == (*nh)->tail)
 				(*nh)->tail = prev;
 			/* adjust previous node's next pointer */
 			if (prev == NULL)
 				*nh = next;
 			else
 				prev->next = next;
 			/* free node */
 			if (n->ifname != NULL)
 				free(n->ifname);
 			free(n);
 			n = next;
 		} else {
 			if (n->af && !*af)
 				*af = n->af;
 			prev = n;
 			n = n->next;
 		}
 	}
 }
 
 int
 invalid_redirect(struct node_host *nh, sa_family_t af)
 {
 	if (!af) {
 		struct node_host *n;
 
 		/* tables and dyniftl are ok without an address family */
 		for (n = nh; n != NULL; n = n->next) {
 			if (n->addr.type != PF_ADDR_TABLE &&
 			    n->addr.type != PF_ADDR_DYNIFTL) {
 				yyerror("address family not given and "
 				    "translation address expands to multiple "
 				    "address families");
 				return (1);
 			}
 		}
 	}
 	if (nh == NULL) {
 		yyerror("no translation address with matching address family "
 		    "found.");
 		return (1);
 	}
 	return (0);
 }
 
 int
 atoul(char *s, u_long *ulvalp)
 {
 	u_long	 ulval;
 	char	*ep;
 
 	errno = 0;
 	ulval = strtoul(s, &ep, 0);
 	if (s[0] == '\0' || *ep != '\0')
 		return (-1);
 	if (errno == ERANGE && ulval == ULONG_MAX)
 		return (-1);
 	*ulvalp = ulval;
 	return (0);
 }
 
 int
 getservice(char *n)
 {
 	struct servent	*s;
 	u_long		 ulval;
 
 	if (atoul(n, &ulval) == 0) {
 		if (ulval > 65535) {
 			yyerror("illegal port value %lu", ulval);
 			return (-1);
 		}
 		return (htons(ulval));
 	} else {
 		s = getservbyname(n, "tcp");
 		if (s == NULL)
 			s = getservbyname(n, "udp");
 		if (s == NULL) {
 			yyerror("unknown port %s", n);
 			return (-1);
 		}
 		return (s->s_port);
 	}
 }
 
 int
 rule_label(struct pf_rule *r, char *s)
 {
 	if (s) {
 		if (strlcpy(r->label, s, sizeof(r->label)) >=
 		    sizeof(r->label)) {
 			yyerror("rule label too long (max %d chars)",
 			    sizeof(r->label)-1);
 			return (-1);
 		}
 	}
 	return (0);
 }
 
 u_int16_t
 parseicmpspec(char *w, sa_family_t af)
 {
 	const struct icmpcodeent	*p;
 	u_long				 ulval;
 	u_int8_t			 icmptype;
 
 	if (af == AF_INET)
 		icmptype = returnicmpdefault >> 8;
 	else
 		icmptype = returnicmp6default >> 8;
 
 	if (atoul(w, &ulval) == -1) {
 		if ((p = geticmpcodebyname(icmptype, w, af)) == NULL) {
 			yyerror("unknown icmp code %s", w);
 			return (0);
 		}
 		ulval = p->code;
 	}
 	if (ulval > 255) {
 		yyerror("invalid icmp code %lu", ulval);
 		return (0);
 	}
 	return (icmptype << 8 | ulval);
 }
 
 int
 parseport(char *port, struct range *r, int extensions)
 {
 	char	*p = strchr(port, ':');
 
 	if (p == NULL) {
 		if ((r->a = getservice(port)) == -1)
 			return (-1);
 		r->b = 0;
 		r->t = PF_OP_NONE;
 		return (0);
 	}
 	if ((extensions & PPORT_STAR) && !strcmp(p+1, "*")) {
 		*p = 0;
 		if ((r->a = getservice(port)) == -1)
 			return (-1);
 		r->b = 0;
 		r->t = PF_OP_IRG;
 		return (0);
 	}
 	if ((extensions & PPORT_RANGE)) {
 		*p++ = 0;
 		if ((r->a = getservice(port)) == -1 ||
 		    (r->b = getservice(p)) == -1)
 			return (-1);
 		if (r->a == r->b) {
 			r->b = 0;
 			r->t = PF_OP_NONE;
 		} else
 			r->t = PF_OP_RRG;
 		return (0);
 	}
 	return (-1);
 }
 
 int
 pfctl_load_anchors(int dev, struct pfctl *pf, struct pfr_buffer *trans)
 {
 	struct loadanchors	*la;
 
 	TAILQ_FOREACH(la, &loadanchorshead, entries) {
 		if (pf->opts & PF_OPT_VERBOSE)
 			fprintf(stderr, "\nLoading anchor %s from %s\n",
 			    la->anchorname, la->filename);
 		if (pfctl_rules(dev, la->filename, pf->opts, pf->optimize,
 		    la->anchorname, trans) == -1)
 			return (-1);
 	}
 
 	return (0);
 }
 
 int
 rt_tableid_max(void)
 {
 #ifdef __FreeBSD__
 	int fibs;
 	size_t l = sizeof(fibs);
 
         if (sysctlbyname("net.fibs", &fibs, &l, NULL, 0) == -1)
 		fibs = 16;	/* XXX RT_MAXFIBS, at least limit it some. */
 	/*
 	 * As the OpenBSD code only compares > and not >= we need to adjust
 	 * here given we only accept values of 0..n and want to avoid #ifdefs
 	 * in the grammar.
 	 */
 	return (fibs - 1);
 #else
 	return (RT_TABLEID_MAX);
 #endif
 }
Index: user/alc/PQ_LAUNDRY/sys/arm64/arm64/pmap.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/arm64/arm64/pmap.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/arm64/arm64/pmap.c	(revision 303667)
@@ -1,3595 +1,3595 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2003 Peter Wemm
  * All rights reserved.
  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
  * Copyright (c) 2014-2016 The FreeBSD Foundation
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * This software was developed by Andrew Turner under sponsorship from
  * the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Manages physical address maps.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/vmem.h>
 #include <sys/vmmeter.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/_unrhdr.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 #include <machine/machdep.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 
 #define	NL0PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define	NL1PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define	NL2PG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define	NL3PG		(PAGE_SIZE/(sizeof (pt_entry_t)))
 
 #define	NUL0E		L0_ENTRIES
 #define	NUL1E		(NUL0E * NL1PG)
 #define	NUL2E		(NUL1E * NL2PG)
 
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
 #else
 #define PMAP_INLINE	extern inline
 #endif
 #else
 #define PMAP_INLINE
 #endif
 
 /*
  * These are configured by the mair_el1 register. This is set up in locore.S
  */
 #define	DEVICE_MEMORY	0
 #define	UNCACHED_MEMORY	1
 #define	CACHED_MEMORY	2
 
 
 #ifdef PV_STATS
 #define PV_STAT(x)	do { x ; } while (0)
 #else
 #define PV_STAT(x)	do { } while (0)
 #endif
 
 #define	pmap_l2_pindex(v)	((v) >> L2_SHIFT)
 
 #define	NPV_LIST_LOCKS	MAXCPU
 
 #define	PHYS_TO_PV_LIST_LOCK(pa)	\
 			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
 
 #define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
 	struct rwlock **_lockp = (lockp);		\
 	struct rwlock *_new_lock;			\
 							\
 	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
 	if (_new_lock != *_lockp) {			\
 		if (*_lockp != NULL)			\
 			rw_wunlock(*_lockp);		\
 		*_lockp = _new_lock;			\
 		rw_wlock(*_lockp);			\
 	}						\
 } while (0)
 
 #define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
 			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
 
 #define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
 	struct rwlock **_lockp = (lockp);		\
 							\
 	if (*_lockp != NULL) {				\
 		rw_wunlock(*_lockp);			\
 		*_lockp = NULL;				\
 	}						\
 } while (0)
 
 #define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
 			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
 
 struct pmap kernel_pmap_store;
 
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 vm_offset_t kernel_vm_end = 0;
 
 struct msgbuf *msgbufp = NULL;
 
 static struct rwlock_padalign pvh_global_lock;
 
 vm_paddr_t dmap_phys_base;	/* The start of the dmap region */
 vm_paddr_t dmap_phys_max;	/* The limit of the dmap region */
 vm_offset_t dmap_max_addr;	/* The virtual address limit of the dmap */
 
 /* This code assumes all L1 DMAP entries will be used */
 CTASSERT((DMAP_MIN_ADDRESS  & ~L0_OFFSET) == DMAP_MIN_ADDRESS);
 CTASSERT((DMAP_MAX_ADDRESS  & ~L0_OFFSET) == DMAP_MAX_ADDRESS);
 
 #define	DMAP_TABLES	((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT)
 extern pt_entry_t pagetable_dmap[];
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
 static struct mtx pv_chunks_mutex;
 static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
 
 static void	free_pv_chunk(struct pv_chunk *pc);
 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
 		    vm_offset_t va);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
     pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m, struct rwlock **lockp);
 
 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
 		struct rwlock **lockp);
 
 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct spglist *free);
 static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
 
 /*
  * These load the old table data and store the new value.
  * They need to be atomic as the System MMU may write to the table at
  * the same time as the CPU.
  */
 #define	pmap_load_store(table, entry) atomic_swap_64(table, entry)
 #define	pmap_set(table, mask) atomic_set_64(table, mask)
 #define	pmap_load_clear(table) atomic_swap_64(table, 0)
 #define	pmap_load(table) (*table)
 
 /********************/
 /* Inline functions */
 /********************/
 
 static __inline void
 pagecopy(void *s, void *d)
 {
 
 	memcpy(d, s, PAGE_SIZE);
 }
 
 #define	pmap_l0_index(va)	(((va) >> L0_SHIFT) & L0_ADDR_MASK)
 #define	pmap_l1_index(va)	(((va) >> L1_SHIFT) & Ln_ADDR_MASK)
 #define	pmap_l2_index(va)	(((va) >> L2_SHIFT) & Ln_ADDR_MASK)
 #define	pmap_l3_index(va)	(((va) >> L3_SHIFT) & Ln_ADDR_MASK)
 
 static __inline pd_entry_t *
 pmap_l0(pmap_t pmap, vm_offset_t va)
 {
 
 	return (&pmap->pm_l0[pmap_l0_index(va)]);
 }
 
 static __inline pd_entry_t *
 pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va)
 {
 	pd_entry_t *l1;
 
 	l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
 	return (&l1[pmap_l1_index(va)]);
 }
 
 static __inline pd_entry_t *
 pmap_l1(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *l0;
 
 	l0 = pmap_l0(pmap, va);
 	if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE)
 		return (NULL);
 
 	return (pmap_l0_to_l1(l0, va));
 }
 
 static __inline pd_entry_t *
 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
 {
 	pd_entry_t *l2;
 
 	l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
 	return (&l2[pmap_l2_index(va)]);
 }
 
 static __inline pd_entry_t *
 pmap_l2(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *l1;
 
 	l1 = pmap_l1(pmap, va);
 	if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE)
 		return (NULL);
 
 	return (pmap_l1_to_l2(l1, va));
 }
 
 static __inline pt_entry_t *
 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
 {
 	pt_entry_t *l3;
 
 	l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
 	return (&l3[pmap_l3_index(va)]);
 }
 
 /*
  * Returns the lowest valid pde for a given virtual address.
  * The next level may or may not point to a valid page or block.
  */
 static __inline pd_entry_t *
 pmap_pde(pmap_t pmap, vm_offset_t va, int *level)
 {
 	pd_entry_t *l0, *l1, *l2, desc;
 
 	l0 = pmap_l0(pmap, va);
 	desc = pmap_load(l0) & ATTR_DESCR_MASK;
 	if (desc != L0_TABLE) {
 		*level = -1;
 		return (NULL);
 	}
 
 	l1 = pmap_l0_to_l1(l0, va);
 	desc = pmap_load(l1) & ATTR_DESCR_MASK;
 	if (desc != L1_TABLE) {
 		*level = 0;
 		return (l0);
 	}
 
 	l2 = pmap_l1_to_l2(l1, va);
 	desc = pmap_load(l2) & ATTR_DESCR_MASK;
 	if (desc != L2_TABLE) {
 		*level = 1;
 		return (l1);
 	}
 
 	*level = 2;
 	return (l2);
 }
 
 /*
  * Returns the lowest valid pte block or table entry for a given virtual
  * address. If there are no valid entries return NULL and set the level to
  * the first invalid level.
  */
 static __inline pt_entry_t *
 pmap_pte(pmap_t pmap, vm_offset_t va, int *level)
 {
 	pd_entry_t *l1, *l2, desc;
 	pt_entry_t *l3;
 
 	l1 = pmap_l1(pmap, va);
 	if (l1 == NULL) {
 		*level = 0;
 		return (NULL);
 	}
 	desc = pmap_load(l1) & ATTR_DESCR_MASK;
 	if (desc == L1_BLOCK) {
 		*level = 1;
 		return (l1);
 	}
 
 	if (desc != L1_TABLE) {
 		*level = 1;
 		return (NULL);
 	}
 
 	l2 = pmap_l1_to_l2(l1, va);
 	desc = pmap_load(l2) & ATTR_DESCR_MASK;
 	if (desc == L2_BLOCK) {
 		*level = 2;
 		return (l2);
 	}
 
 	if (desc != L2_TABLE) {
 		*level = 2;
 		return (NULL);
 	}
 
 	*level = 3;
 	l3 = pmap_l2_to_l3(l2, va);
 	if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE)
 		return (NULL);
 
 	return (l3);
 }
 
 bool
 pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1,
     pd_entry_t **l2, pt_entry_t **l3)
 {
 	pd_entry_t *l0p, *l1p, *l2p;
 
 	if (pmap->pm_l0 == NULL)
 		return (false);
 
 	l0p = pmap_l0(pmap, va);
 	*l0 = l0p;
 
 	if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE)
 		return (false);
 
 	l1p = pmap_l0_to_l1(l0p, va);
 	*l1 = l1p;
 
 	if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
 		*l2 = NULL;
 		*l3 = NULL;
 		return (true);
 	}
 
 	if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE)
 		return (false);
 
 	l2p = pmap_l1_to_l2(l1p, va);
 	*l2 = l2p;
 
 	if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) {
 		*l3 = NULL;
 		return (true);
 	}
 
 	*l3 = pmap_l2_to_l3(l2p, va);
 
 	return (true);
 }
 
 static __inline int
 pmap_is_current(pmap_t pmap)
 {
 
 	return ((pmap == pmap_kernel()) ||
 	    (pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
 }
 
 static __inline int
 pmap_l3_valid(pt_entry_t l3)
 {
 
 	return ((l3 & ATTR_DESCR_MASK) == L3_PAGE);
 }
 
 static __inline int
 pmap_l3_valid_cacheable(pt_entry_t l3)
 {
 
 	return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) &&
 	    ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY)));
 }
 
 #define	PTE_SYNC(pte)	cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
 
 /*
  * Checks if the page is dirty. We currently lack proper tracking of this on
  * arm64 so for now assume is a page mapped as rw was accessed it is.
  */
 static inline int
 pmap_page_dirty(pt_entry_t pte)
 {
 
 	return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) ==
 	    (ATTR_AF | ATTR_AP(ATTR_AP_RW)));
 }
 
 static __inline void
 pmap_resident_count_inc(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pmap->pm_stats.resident_count += count;
 }
 
 static __inline void
 pmap_resident_count_dec(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT(pmap->pm_stats.resident_count >= count,
 	    ("pmap %p resident count underflow %ld %d", pmap,
 	    pmap->pm_stats.resident_count, count));
 	pmap->pm_stats.resident_count -= count;
 }
 
 static pt_entry_t *
 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
     u_int *l2_slot)
 {
 	pt_entry_t *l2;
 	pd_entry_t *l1;
 
 	l1 = (pd_entry_t *)l1pt;
 	*l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
 
 	/* Check locore has used a table L1 map */
 	KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE,
 	   ("Invalid bootstrap L1 table"));
 	/* Find the address of the L2 table */
 	l2 = (pt_entry_t *)init_pt_va;
 	*l2_slot = pmap_l2_index(va);
 
 	return (l2);
 }
 
 static vm_paddr_t
 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
 {
 	u_int l1_slot, l2_slot;
 	pt_entry_t *l2;
 
 	l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
 
 	return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET));
 }
 
 static void
 pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
 {
 	vm_offset_t va;
 	vm_paddr_t pa;
 	u_int l1_slot;
 
 	pa = dmap_phys_base = min_pa & ~L1_OFFSET;
 	va = DMAP_MIN_ADDRESS;
 	for (; va < DMAP_MAX_ADDRESS && pa < max_pa;
 	    pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
 		l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT);
 
 		pmap_load_store(&pagetable_dmap[l1_slot],
 		    (pa & ~L1_OFFSET) | ATTR_DEFAULT |
 		    ATTR_IDX(CACHED_MEMORY) | L1_BLOCK);
 	}
 
 	/* Set the upper limit of the DMAP region */
 	dmap_phys_max = pa;
 	dmap_max_addr = va;
 
 	cpu_dcache_wb_range((vm_offset_t)pagetable_dmap,
 	    PAGE_SIZE * DMAP_TABLES);
 	cpu_tlb_flushID();
 }
 
 static vm_offset_t
 pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start)
 {
 	vm_offset_t l2pt;
 	vm_paddr_t pa;
 	pd_entry_t *l1;
 	u_int l1_slot;
 
 	KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address"));
 
 	l1 = (pd_entry_t *)l1pt;
 	l1_slot = pmap_l1_index(va);
 	l2pt = l2_start;
 
 	for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) {
 		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
 
 		pa = pmap_early_vtophys(l1pt, l2pt);
 		pmap_load_store(&l1[l1_slot],
 		    (pa & ~Ln_TABLE_MASK) | L1_TABLE);
 		l2pt += PAGE_SIZE;
 	}
 
 	/* Clean the L2 page table */
 	memset((void *)l2_start, 0, l2pt - l2_start);
 	cpu_dcache_wb_range(l2_start, l2pt - l2_start);
 
 	/* Flush the l1 table to ram */
 	cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
 
 	return l2pt;
 }
 
 static vm_offset_t
 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
 {
 	vm_offset_t l2pt, l3pt;
 	vm_paddr_t pa;
 	pd_entry_t *l2;
 	u_int l2_slot;
 
 	KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
 
 	l2 = pmap_l2(kernel_pmap, va);
 	l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE);
 	l2pt = (vm_offset_t)l2;
 	l2_slot = pmap_l2_index(va);
 	l3pt = l3_start;
 
 	for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
 		KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
 
 		pa = pmap_early_vtophys(l1pt, l3pt);
 		pmap_load_store(&l2[l2_slot],
 		    (pa & ~Ln_TABLE_MASK) | L2_TABLE);
 		l3pt += PAGE_SIZE;
 	}
 
 	/* Clean the L2 page table */
 	memset((void *)l3_start, 0, l3pt - l3_start);
 	cpu_dcache_wb_range(l3_start, l3pt - l3_start);
 
 	cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
 
 	return l3pt;
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  */
 void
 pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart,
     vm_size_t kernlen)
 {
 	u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
 	uint64_t kern_delta;
 	pt_entry_t *l2;
 	vm_offset_t va, freemempos;
 	vm_offset_t dpcpu, msgbufpv;
 	vm_paddr_t pa, max_pa, min_pa;
 	int i;
 
 	kern_delta = KERNBASE - kernstart;
 	physmem = 0;
 
 	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
 	printf("%lx\n", l1pt);
 	printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
 
 	/* Set this early so we can use the pagetable walking functions */
 	kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt;
 	PMAP_LOCK_INIT(kernel_pmap);
 
  	/*
 	 * Initialize the global pv list lock.
 	 */
 	rw_init(&pvh_global_lock, "pmap pv global");
 
 	/* Assume the address we were loaded to is a valid physical address */
 	min_pa = max_pa = KERNBASE - kern_delta;
 
 	/*
 	 * Find the minimum physical address. physmap is sorted,
 	 * but may contain empty ranges.
 	 */
 	for (i = 0; i < (physmap_idx * 2); i += 2) {
 		if (physmap[i] == physmap[i + 1])
 			continue;
 		if (physmap[i] <= min_pa)
 			min_pa = physmap[i];
 		if (physmap[i + 1] > max_pa)
 			max_pa = physmap[i + 1];
 	}
 
 	/* Create a direct map region early so we can use it for pa -> va */
 	pmap_bootstrap_dmap(l1pt, min_pa, max_pa);
 
 	va = KERNBASE;
 	pa = KERNBASE - kern_delta;
 
 	/*
 	 * Start to initialise phys_avail by copying from physmap
 	 * up to the physical address KERNBASE points at.
 	 */
 	map_slot = avail_slot = 0;
 	for (; map_slot < (physmap_idx * 2) &&
 	    avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) {
 		if (physmap[map_slot] == physmap[map_slot + 1])
 			continue;
 
 		if (physmap[map_slot] <= pa &&
 		    physmap[map_slot + 1] > pa)
 			break;
 
 		phys_avail[avail_slot] = physmap[map_slot];
 		phys_avail[avail_slot + 1] = physmap[map_slot + 1];
 		physmem += (phys_avail[avail_slot + 1] -
 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
 		avail_slot += 2;
 	}
 
 	/* Add the memory before the kernel */
 	if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) {
 		phys_avail[avail_slot] = physmap[map_slot];
 		phys_avail[avail_slot + 1] = pa;
 		physmem += (phys_avail[avail_slot + 1] -
 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
 		avail_slot += 2;
 	}
 	used_map_slot = map_slot;
 
 	/*
 	 * Read the page table to find out what is already mapped.
 	 * This assumes we have mapped a block of memory from KERNBASE
 	 * using a single L1 entry.
 	 */
 	l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
 
 	/* Sanity check the index, KERNBASE should be the first VA */
 	KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
 
 	/* Find how many pages we have mapped */
 	for (; l2_slot < Ln_ENTRIES; l2_slot++) {
 		if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0)
 			break;
 
 		/* Check locore used L2 blocks */
 		KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK,
 		    ("Invalid bootstrap L2 table"));
 		KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa,
 		    ("Incorrect PA in L2 table"));
 
 		va += L2_SIZE;
 		pa += L2_SIZE;
 	}
 
 	va = roundup2(va, L1_SIZE);
 
 	freemempos = KERNBASE + kernlen;
 	freemempos = roundup2(freemempos, PAGE_SIZE);
 	/* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */
 	freemempos = pmap_bootstrap_l2(l1pt, va, freemempos);
 	/* And the l3 tables for the early devmap */
 	freemempos = pmap_bootstrap_l3(l1pt,
 	    VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
 
 	cpu_tlb_flushID();
 
 #define alloc_pages(var, np)						\
 	(var) = freemempos;						\
 	freemempos += (np * PAGE_SIZE);					\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	/* Allocate dynamic per-cpu area. */
 	alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu, 0);
 
 	/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
 	alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 	msgbufp = (void *)msgbufpv;
 
 	virtual_avail = roundup2(freemempos, L1_SIZE);
 	virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
 	kernel_vm_end = virtual_avail;
-	
+
 	pa = pmap_early_vtophys(l1pt, freemempos);
 
 	/* Finish initialising physmap */
 	map_slot = used_map_slot;
 	for (; avail_slot < (PHYS_AVAIL_SIZE - 2) &&
 	    map_slot < (physmap_idx * 2); map_slot += 2) {
 		if (physmap[map_slot] == physmap[map_slot + 1])
 			continue;
 
 		/* Have we used the current range? */
 		if (physmap[map_slot + 1] <= pa)
 			continue;
 
 		/* Do we need to split the entry? */
 		if (physmap[map_slot] < pa) {
 			phys_avail[avail_slot] = pa;
 			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
 		} else {
 			phys_avail[avail_slot] = physmap[map_slot];
 			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
 		}
 		physmem += (phys_avail[avail_slot + 1] -
 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
 
 		avail_slot += 2;
 	}
 	phys_avail[avail_slot] = 0;
 	phys_avail[avail_slot + 1] = 0;
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".
 	 */
 	Maxmem = atop(phys_avail[avail_slot - 1]);
 
 	cpu_tlb_flushID();
 }
 
 /*
  *	Initialize a vm_page's machine-dependent fields.
  */
 void
 pmap_page_init(vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.pv_list);
 	m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
 }
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  */
 void
 pmap_init(void)
 {
 	int i;
 
 	/*
 	 * Initialize the pv chunk list mutex.
 	 */
 	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
 
 	/*
 	 * Initialize the pool of pv list locks.
 	 */
 	for (i = 0; i < NPV_LIST_LOCKS; i++)
 		rw_init(&pv_list_locks[i], "pmap pv list");
 }
 
 /*
  * Invalidate a single TLB entry.
  */
 PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
 	sched_pin();
 	__asm __volatile(
 	    "dsb  ishst		\n"
 	    "tlbi vaae1is, %0	\n"
 	    "dsb  ish		\n"
 	    "isb		\n"
 	    : : "r"(va >> PAGE_SHIFT));
 	sched_unpin();
 }
 
 PMAP_INLINE void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
 	sched_pin();
 	dsb(ishst);
 	for (addr = sva; addr < eva; addr += PAGE_SIZE) {
 		__asm __volatile(
 		    "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT));
 	}
 	__asm __volatile(
 	    "dsb  ish	\n"
 	    "isb	\n");
 	sched_unpin();
 }
 
 PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
 	sched_pin();
 	__asm __volatile(
 	    "dsb  ishst		\n"
 	    "tlbi vmalle1is	\n"
 	    "dsb  ish		\n"
 	    "isb		\n");
 	sched_unpin();
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
-vm_paddr_t 
+vm_paddr_t
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
 	pt_entry_t *pte, tpte;
 	vm_paddr_t pa;
 	int lvl;
 
 	pa = 0;
 	PMAP_LOCK(pmap);
 	/*
 	 * Find the block or page map for this virtual address. pmap_pte
 	 * will return either a valid block/page entry, or NULL.
 	 */
 	pte = pmap_pte(pmap, va, &lvl);
 	if (pte != NULL) {
 		tpte = pmap_load(pte);
 		pa = tpte & ~ATTR_MASK;
 		switch(lvl) {
 		case 1:
 			KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
 			    ("pmap_extract: Invalid L1 pte found: %lx",
 			    tpte & ATTR_DESCR_MASK));
 			pa |= (va & L1_OFFSET);
 			break;
 		case 2:
 			KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
 			    ("pmap_extract: Invalid L2 pte found: %lx",
 			    tpte & ATTR_DESCR_MASK));
 			pa |= (va & L2_OFFSET);
 			break;
 		case 3:
 			KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
 			    ("pmap_extract: Invalid L3 pte found: %lx",
 			    tpte & ATTR_DESCR_MASK));
 			pa |= (va & L3_OFFSET);
 			break;
 		}
 	}
 	PMAP_UNLOCK(pmap);
 	return (pa);
 }
 
 /*
  *	Routine:	pmap_extract_and_hold
  *	Function:
  *		Atomically extract and hold the physical page
  *		with the given pmap and virtual address pair
  *		if that mapping permits the given protection.
  */
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	pt_entry_t *pte, tpte;
 	vm_paddr_t pa;
 	vm_page_t m;
 	int lvl;
 
 	pa = 0;
 	m = NULL;
 	PMAP_LOCK(pmap);
 retry:
 	pte = pmap_pte(pmap, va, &lvl);
 	if (pte != NULL) {
 		tpte = pmap_load(pte);
 
 		KASSERT(lvl > 0 && lvl <= 3,
 		    ("pmap_extract_and_hold: Invalid level %d", lvl));
 		CTASSERT(L1_BLOCK == L2_BLOCK);
 		KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) ||
 		    (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK),
 		    ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl,
 		     tpte & ATTR_DESCR_MASK));
 		if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) ||
 		    ((prot & VM_PROT_WRITE) == 0)) {
 			if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa))
 				goto retry;
 			m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK);
 			vm_page_hold(m);
 		}
 	}
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	pt_entry_t *pte, tpte;
 	vm_paddr_t pa;
 	int lvl;
 
 	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
 		pa = DMAP_TO_PHYS(va);
 	} else {
 		pa = 0;
 		pte = pmap_pte(kernel_pmap, va, &lvl);
 		if (pte != NULL) {
 			tpte = pmap_load(pte);
 			pa = tpte & ~ATTR_MASK;
 			switch(lvl) {
 			case 1:
 				KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK,
 				    ("pmap_kextract: Invalid L1 pte found: %lx",
 				    tpte & ATTR_DESCR_MASK));
 				pa |= (va & L1_OFFSET);
 				break;
 			case 2:
 				KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK,
 				    ("pmap_kextract: Invalid L2 pte found: %lx",
 				    tpte & ATTR_DESCR_MASK));
 				pa |= (va & L2_OFFSET);
 				break;
 			case 3:
 				KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE,
 				    ("pmap_kextract: Invalid L3 pte found: %lx",
 				    tpte & ATTR_DESCR_MASK));
 				pa |= (va & L3_OFFSET);
 				break;
 			}
 		}
 	}
 	return (pa);
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 static void
 pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	vm_offset_t va;
 	int lvl;
 
 	KASSERT((pa & L3_OFFSET) == 0,
 	   ("pmap_kenter: Invalid physical address"));
 	KASSERT((sva & L3_OFFSET) == 0,
 	   ("pmap_kenter: Invalid virtual address"));
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("pmap_kenter: Mapping is not page-sized"));
 
 	va = sva;
 	while (size != 0) {
 		pde = pmap_pde(kernel_pmap, va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_kenter: Invalid page entry, va: 0x%lx", va));
 		KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl));
 
 		pte = pmap_l2_to_l3(pde, va);
 		pmap_load_store(pte, (pa & ~L3_OFFSET) | ATTR_DEFAULT |
 		    ATTR_IDX(mode) | L3_PAGE);
 		PTE_SYNC(pte);
 
 		va += PAGE_SIZE;
 		pa += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 void
 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
 {
 
 	pmap_kenter(sva, size, pa, DEVICE_MEMORY);
 }
 
 /*
  * Remove a page from the kernel pagetables.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt_entry_t *pte;
 	int lvl;
 
 	pte = pmap_pte(kernel_pmap, va, &lvl);
 	KASSERT(pte != NULL, ("pmap_kremove: Invalid address"));
 	KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl));
 
 	if (pmap_l3_valid_cacheable(pmap_load(pte)))
 		cpu_dcache_wb_range(va, L3_SIZE);
 	pmap_load_clear(pte);
 	PTE_SYNC(pte);
 	pmap_invalidate_page(kernel_pmap, va);
 }
 
 void
 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 {
 	pt_entry_t *pte;
 	vm_offset_t va;
 	int lvl;
 
 	KASSERT((sva & L3_OFFSET) == 0,
 	   ("pmap_kremove_device: Invalid virtual address"));
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("pmap_kremove_device: Mapping is not page-sized"));
 
 	va = sva;
 	while (size != 0) {
 		pte = pmap_pte(kernel_pmap, va, &lvl);
 		KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va));
 		KASSERT(lvl == 3,
 		    ("Invalid device pagetable level: %d != 3", lvl));
 		pmap_load_clear(pte);
 		PTE_SYNC(pte);
 
 		va += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 	return PHYS_TO_DMAP(start);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte, pa;
 	vm_offset_t va;
 	vm_page_t m;
 	int i, lvl;
 
 	va = sva;
 	for (i = 0; i < count; i++) {
 		pde = pmap_pde(kernel_pmap, va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_qenter: Invalid page entry, va: 0x%lx", va));
 		KASSERT(lvl == 2,
 		    ("pmap_qenter: Invalid level %d", lvl));
 
 		m = ma[i];
 		pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) |
 		    ATTR_IDX(m->md.pv_memattr) | L3_PAGE;
 		pte = pmap_l2_to_l3(pde, va);
 		pmap_load_store(pte, pa);
 		PTE_SYNC(pte);
 
 		va += L3_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	pt_entry_t *pte;
 	vm_offset_t va;
 	int lvl;
 
 	KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
 
 	va = sva;
 	while (count-- > 0) {
 		pte = pmap_pte(kernel_pmap, va, &lvl);
 		KASSERT(lvl == 3,
 		    ("Invalid device pagetable level: %d != 3", lvl));
 		if (pte != NULL) {
 			if (pmap_l3_valid_cacheable(pmap_load(pte)))
 				cpu_dcache_wb_range(va, L3_SIZE);
 			pmap_load_clear(pte);
 			PTE_SYNC(pte);
 		}
 
 		va += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 static __inline void
 pmap_free_zero_pages(struct spglist *free)
 {
 	vm_page_t m;
 
 	while ((m = SLIST_FIRST(free)) != NULL) {
 		SLIST_REMOVE_HEAD(free, plinks.s.ss);
 		/* Preserve the page's PG_ZERO setting. */
 		vm_page_free_toq(m);
 	}
 }
 
 /*
  * Schedule the specified unused page table page to be freed.  Specifically,
  * add the page to the specified list of pages that will be released to the
  * physical memory manager after the TLB has been updated.
  */
 static __inline void
 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
     boolean_t set_PG_ZERO)
 {
 
 	if (set_PG_ZERO)
 		m->flags |= PG_ZERO;
 	else
 		m->flags &= ~PG_ZERO;
 	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
 }
-	
+
 /*
  * Decrements a page table page's wire count, which is used to record the
  * number of valid page table entries within the page.  If the wire count
  * drops to zero, then the page table page is unmapped.  Returns TRUE if the
  * page table page was unmapped and FALSE otherwise.
  */
 static inline boolean_t
 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 
 	--m->wire_count;
 	if (m->wire_count == 0) {
 		_pmap_unwire_l3(pmap, va, m, free);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 static void
 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/*
 	 * unmap the page table page
 	 */
 	if (m->pindex >= (NUL2E + NUL1E)) {
 		/* l1 page */
 		pd_entry_t *l0;
 
 		l0 = pmap_l0(pmap, va);
 		pmap_load_clear(l0);
 		PTE_SYNC(l0);
 	} else if (m->pindex >= NUL2E) {
 		/* l2 page */
 		pd_entry_t *l1;
 
 		l1 = pmap_l1(pmap, va);
 		pmap_load_clear(l1);
 		PTE_SYNC(l1);
 	} else {
 		/* l3 page */
 		pd_entry_t *l2;
 
 		l2 = pmap_l2(pmap, va);
 		pmap_load_clear(l2);
 		PTE_SYNC(l2);
 	}
 	pmap_resident_count_dec(pmap, 1);
 	if (m->pindex < NUL2E) {
 		/* We just released an l3, unhold the matching l2 */
 		pd_entry_t *l1, tl1;
 		vm_page_t l2pg;
 
 		l1 = pmap_l1(pmap, va);
 		tl1 = pmap_load(l1);
 		l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
 		pmap_unwire_l3(pmap, va, l2pg, free);
 	} else if (m->pindex < (NUL2E + NUL1E)) {
 		/* We just released an l2, unhold the matching l1 */
 		pd_entry_t *l0, tl0;
 		vm_page_t l1pg;
 
 		l0 = pmap_l0(pmap, va);
 		tl0 = pmap_load(l0);
 		l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
 		pmap_unwire_l3(pmap, va, l1pg, free);
 	}
 	pmap_invalidate_page(pmap, va);
 
 	/*
 	 * This is a release store so that the ordinary store unmapping
 	 * the page table page is globally performed before TLB shoot-
 	 * down is begun.
 	 */
 	atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
 
-	/* 
+	/*
 	 * Put page on a list so that it is released after
 	 * *ALL* TLB shootdown is done
 	 */
 	pmap_add_delayed_free_list(m, free, TRUE);
 }
 
 /*
  * After removing an l3 entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
     struct spglist *free)
 {
 	vm_page_t mpte;
 
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (0);
 	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
 	mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK);
 	return (pmap_unwire_l3(pmap, va, mpte, free));
 }
 
 void
 pmap_pinit0(pmap_t pmap)
 {
 
 	PMAP_LOCK_INIT(pmap);
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 	pmap->pm_l0 = kernel_pmap->pm_l0;
 }
 
 int
 pmap_pinit(pmap_t pmap)
 {
 	vm_paddr_t l0phys;
 	vm_page_t l0pt;
 
 	/*
 	 * allocate the l0 page
 	 */
 	while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
 		VM_WAIT;
 
 	l0phys = VM_PAGE_TO_PHYS(l0pt);
 	pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys);
 
 	if ((l0pt->flags & PG_ZERO) == 0)
 		pagezero(pmap->pm_l0);
 
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 
 	return (1);
 }
 
 /*
  * This routine is called if the desired page table page does not exist.
  *
  * If page table page allocation fails, this routine may sleep before
  * returning NULL.  It sleeps only if a lock pointer was given.
  *
  * Note: If a page allocation fails at page table level two or three,
  * one or two pages may be held during the wait, only to be released
  * afterwards.  This conservative approach is easily argued to avoid
  * race conditions.
  */
 static vm_page_t
 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
 {
 	vm_page_t m, l1pg, l2pg;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * Allocate a page table page.
 	 */
 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 		if (lockp != NULL) {
 			RELEASE_PV_LIST_LOCK(lockp);
 			PMAP_UNLOCK(pmap);
 			rw_runlock(&pvh_global_lock);
 			VM_WAIT;
 			rw_rlock(&pvh_global_lock);
 			PMAP_LOCK(pmap);
 		}
 
 		/*
 		 * Indicate the need to retry.  While waiting, the page table
 		 * page may have been allocated.
 		 */
 		return (NULL);
 	}
 	if ((m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	if (ptepindex >= (NUL2E + NUL1E)) {
 		pd_entry_t *l0;
 		vm_pindex_t l0index;
 
 		l0index = ptepindex - (NUL2E + NUL1E);
 		l0 = &pmap->pm_l0[l0index];
 		pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE);
 		PTE_SYNC(l0);
 	} else if (ptepindex >= NUL2E) {
 		vm_pindex_t l0index, l1index;
 		pd_entry_t *l0, *l1;
 		pd_entry_t tl0;
 
 		l1index = ptepindex - NUL2E;
 		l0index = l1index >> L0_ENTRIES_SHIFT;
 
 		l0 = &pmap->pm_l0[l0index];
 		tl0 = pmap_load(l0);
 		if (tl0 == 0) {
 			/* recurse for allocating page dir */
 			if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index,
 			    lockp) == NULL) {
 				--m->wire_count;
 				/* XXX: release mem barrier? */
 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
 		} else {
 			l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK);
 			l1pg->wire_count++;
 		}
 
 		l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK);
 		l1 = &l1[ptepindex & Ln_ADDR_MASK];
 		pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
 		PTE_SYNC(l1);
 	} else {
 		vm_pindex_t l0index, l1index;
 		pd_entry_t *l0, *l1, *l2;
 		pd_entry_t tl0, tl1;
 
 		l1index = ptepindex >> Ln_ENTRIES_SHIFT;
 		l0index = l1index >> L0_ENTRIES_SHIFT;
 
 		l0 = &pmap->pm_l0[l0index];
 		tl0 = pmap_load(l0);
 		if (tl0 == 0) {
 			/* recurse for allocating page dir */
 			if (_pmap_alloc_l3(pmap, NUL2E + l1index,
 			    lockp) == NULL) {
 				--m->wire_count;
 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
 			tl0 = pmap_load(l0);
 			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
 			l1 = &l1[l1index & Ln_ADDR_MASK];
 		} else {
 			l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK);
 			l1 = &l1[l1index & Ln_ADDR_MASK];
 			tl1 = pmap_load(l1);
 			if (tl1 == 0) {
 				/* recurse for allocating page dir */
 				if (_pmap_alloc_l3(pmap, NUL2E + l1index,
 				    lockp) == NULL) {
 					--m->wire_count;
 					/* XXX: release mem barrier? */
 					atomic_subtract_int(
 					    &vm_cnt.v_wire_count, 1);
 					vm_page_free_zero(m);
 					return (NULL);
 				}
 			} else {
 				l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK);
 				l2pg->wire_count++;
 			}
 		}
 
 		l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
 		l2 = &l2[ptepindex & Ln_ADDR_MASK];
 		pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE);
 		PTE_SYNC(l2);
 	}
 
 	pmap_resident_count_inc(pmap, 1);
 
 	return (m);
 }
 
 static vm_page_t
 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
 	vm_pindex_t ptepindex;
 	pd_entry_t *pde, tpde;
 	vm_page_t m;
 	int lvl;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = pmap_l2_pindex(va);
 retry:
 	/*
 	 * Get the page directory entry
 	 */
 	pde = pmap_pde(pmap, va, &lvl);
 
 	/*
 	 * If the page table page is mapped, we just increment the hold count,
 	 * and activate it. If we get a level 2 pde it will point to a level 3
 	 * table.
 	 */
 	if (lvl == 2) {
 		tpde = pmap_load(pde);
 		if (tpde != 0) {
 			m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK);
 			m->wire_count++;
 			return (m);
 		}
 	}
 
 	/*
 	 * Here if the pte page isn't mapped, or if it has been deallocated.
 	 */
 	m = _pmap_alloc_l3(pmap, ptepindex, lockp);
 	if (m == NULL && lockp != NULL)
 		goto retry;
 
 	return (m);
 }
 
 
 /***************************************************
  * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 	vm_page_t m;
 
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
 
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0));
 
 	m->wire_count--;
 	atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 	vm_page_free_zero(m);
 }
 
 #if 0
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
 
 	return sysctl_handle_long(oidp, &ksize, 0, req);
 }
-SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
+SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
     0, 0, kvm_size, "LU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
 	return sysctl_handle_long(oidp, &kfree, 0, req);
 }
-SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
+SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
     0, 0, kvm_free, "LU", "Amount of KVM free");
 #endif /* 0 */
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	vm_paddr_t paddr;
 	vm_page_t nkpg;
 	pd_entry_t *l0, *l1, *l2;
 
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 
 	addr = roundup2(addr, L2_SIZE);
 	if (addr - 1 >= kernel_map->max_offset)
 		addr = kernel_map->max_offset;
 	while (kernel_vm_end < addr) {
 		l0 = pmap_l0(kernel_pmap, kernel_vm_end);
 		KASSERT(pmap_load(l0) != 0,
 		    ("pmap_growkernel: No level 0 kernel entry"));
 
 		l1 = pmap_l0_to_l1(l0, kernel_vm_end);
 		if (pmap_load(l1) == 0) {
 			/* We need a new PDP entry */
 			nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
 			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 			if (nkpg == NULL)
 				panic("pmap_growkernel: no memory to grow kernel");
 			if ((nkpg->flags & PG_ZERO) == 0)
 				pmap_zero_page(nkpg);
 			paddr = VM_PAGE_TO_PHYS(nkpg);
 			pmap_load_store(l1, paddr | L1_TABLE);
 			PTE_SYNC(l1);
 			continue; /* try again */
 		}
 		l2 = pmap_l1_to_l2(l1, kernel_vm_end);
 		if ((pmap_load(l2) & ATTR_AF) != 0) {
 			kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 				kernel_vm_end = kernel_map->max_offset;
-				break;                       
+				break;
 			}
 			continue;
 		}
 
 		nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
 		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		if (nkpg == NULL)
 			panic("pmap_growkernel: no memory to grow kernel");
 		if ((nkpg->flags & PG_ZERO) == 0)
 			pmap_zero_page(nkpg);
 		paddr = VM_PAGE_TO_PHYS(nkpg);
 		pmap_load_store(l2, paddr | L2_TABLE);
 		PTE_SYNC(l2);
 		pmap_invalidate_page(kernel_pmap, kernel_vm_end);
 
 		kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 			kernel_vm_end = kernel_map->max_offset;
-			break;                       
+			break;
 		}
 	}
 }
 
 
 /***************************************************
  * page management routines.
  ***************************************************/
 
 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 CTASSERT(_NPCM == 3);
 CTASSERT(_NPCPV == 168);
 
 static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
 	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 
 #define	PC_FREE0	0xfffffffffffffffful
 #define	PC_FREE1	0xfffffffffffffffful
 #define	PC_FREE2	0x000000fffffffffful
 
 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
 
 #if 0
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 	"Current number of pv entry chunks");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 	"Current number of pv entry chunks allocated");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 	"Current number of pv entry chunks frees");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 	"Number of times tried to get a chunk page but failed.");
 
 static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
 static int pv_entry_spare;
 
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 	"Current number of pv entry frees");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 	"Current number of pv entry allocs");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 	"Current number of pv entries");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 	"Current number of spare pv entries");
 #endif
 #endif /* 0 */
 
 /*
  * We are in a serious low memory condition.  Resort to
  * drastic measures to free some pages so we can allocate
  * another pv entry chunk.
  *
  * Returns NULL if PV entries were reclaimed from the specified pmap.
  *
  * We do not, however, unmap 2mpages because subsequent accesses will
  * allocate per-page pv entries until repromotion occurs, thereby
  * exacerbating the shortage of free pv entries.
  */
 static vm_page_t
 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
 {
 
 	panic("ARM64TODO: reclaim_pv_chunk");
 }
 
 /*
  * free the pv_entry back to the free list
  */
 static void
 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 {
 	struct pv_chunk *pc;
 	int idx, field, bit;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(atomic_add_long(&pv_entry_frees, 1));
 	PV_STAT(atomic_add_int(&pv_entry_spare, 1));
 	PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
 	pc = pv_to_chunk(pv);
 	idx = pv - &pc->pc_pventry[0];
 	field = idx / 64;
 	bit = idx % 64;
 	pc->pc_map[field] |= 1ul << bit;
 	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
 	    pc->pc_map[2] != PC_FREE2) {
 		/* 98% of the time, pc is already at the head of the list. */
 		if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 		}
 		return;
 	}
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	free_pv_chunk(pc);
 }
 
 static void
 free_pv_chunk(struct pv_chunk *pc)
 {
 	vm_page_t m;
 
 	mtx_lock(&pv_chunks_mutex);
  	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
 	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
 	PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
 	/* entire chunk is free, return it */
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 	dump_drop_page(m->phys_addr);
 	vm_page_unwire(m, PQ_NONE);
 	vm_page_free(m);
 }
 
 /*
  * Returns a new PV entry, allocating a new PV chunk from the system when
  * needed.  If this PV chunk allocation fails and a PV list lock pointer was
  * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
  * returned.
  *
  * The given PV list lock may be released.
  */
 static pv_entry_t
 get_pv_entry(pmap_t pmap, struct rwlock **lockp)
 {
 	int bit, field;
 	pv_entry_t pv;
 	struct pv_chunk *pc;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
 retry:
 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 	if (pc != NULL) {
 		for (field = 0; field < _NPCM; field++) {
 			if (pc->pc_map[field]) {
 				bit = ffsl(pc->pc_map[field]) - 1;
 				break;
 			}
 		}
 		if (field < _NPCM) {
 			pv = &pc->pc_pventry[field * 64 + bit];
 			pc->pc_map[field] &= ~(1ul << bit);
 			/* If this was the last item, move it to tail */
 			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
 			    pc->pc_map[2] == 0) {
 				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
 				    pc_list);
 			}
 			PV_STAT(atomic_add_long(&pv_entry_count, 1));
 			PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
 			return (pv);
 		}
 	}
 	/* No free items, allocate another chunk */
 	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED);
 	if (m == NULL) {
 		if (lockp == NULL) {
 			PV_STAT(pc_chunk_tryfail++);
 			return (NULL);
 		}
 		m = reclaim_pv_chunk(pmap, lockp);
 		if (m == NULL)
 			goto retry;
 	}
 	PV_STAT(atomic_add_int(&pc_chunk_count, 1));
 	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
 	dump_add_page(m->phys_addr);
 	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 	pc->pc_pmap = pmap;
 	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
 	pc->pc_map[1] = PC_FREE1;
 	pc->pc_map[2] = PC_FREE2;
 	mtx_lock(&pv_chunks_mutex);
 	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	pv = &pc->pc_pventry[0];
 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	PV_STAT(atomic_add_long(&pv_entry_count, 1));
 	PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
 	return (pv);
 }
 
 /*
  * First find and then remove the pv entry for the specified pmap and virtual
  * address from the specified pv list.  Returns the pv entry if found and NULL
  * otherwise.  This operation can be performed on pv lists for either 4KB or
  * 2MB page mappings.
  */
 static __inline pv_entry_t
 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			pvh->pv_gen++;
 			break;
 		}
 	}
 	return (pv);
 }
 
 /*
  * First find and then destroy the pv entry for the specified pmap and virtual
  * address.  This operation can be performed on pv lists for either 4KB or 2MB
  * page mappings.
  */
 static void
 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 	free_pv_entry(pmap, pv);
 }
 
 /*
  * Conditionally create the PV entry for a 4KB page mapping if the required
  * memory can be allocated without resorting to reclamation.
  */
 static boolean_t
 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct rwlock **lockp)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/* Pass NULL instead of the lock pointer to disable reclamation. */
 	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
 		pv->pv_va = va;
 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * pmap_remove_l3: do the things to unmap a page in a process
  */
 static int
-pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 
+pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
     pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
 {
 	pt_entry_t old_l3;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
 		cpu_dcache_wb_range(va, L3_SIZE);
 	old_l3 = pmap_load_clear(l3);
 	PTE_SYNC(l3);
 	pmap_invalidate_page(pmap, va);
 	if (old_l3 & ATTR_SW_WIRED)
 		pmap->pm_stats.wired_count -= 1;
 	pmap_resident_count_dec(pmap, 1);
 	if (old_l3 & ATTR_SW_MANAGED) {
 		m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
 		if (pmap_page_dirty(old_l3))
 			vm_page_dirty(m);
 		if (old_l3 & ATTR_AF)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 		pmap_pvh_free(&m->md, pmap, va);
 	}
 	return (pmap_unuse_l3(pmap, va, l2e, free));
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	struct rwlock *lock;
 	vm_offset_t va, va_next;
 	pd_entry_t *l0, *l1, *l2;
 	pt_entry_t l3_paddr, *l3;
 	struct spglist free;
 	int anyvalid;
 
 	/*
 	 * Perform an unsynchronized read.  This is, however, safe.
 	 */
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	anyvalid = 0;
 	SLIST_INIT(&free);
 
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 
 	lock = NULL;
 	for (; sva < eva; sva = va_next) {
 
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		l0 = pmap_l0(pmap, sva);
 		if (pmap_load(l0) == 0) {
 			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		l1 = pmap_l0_to_l1(l0, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (l2 == NULL)
 			continue;
 
 		l3_paddr = pmap_load(l2);
 
 		/*
 		 * Weed out invalid mappings.
 		 */
 		if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE)
 			continue;
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (va_next > eva)
 			va_next = eva;
 
 		va = va_next;
 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 		    sva += L3_SIZE) {
 			if (l3 == NULL)
 				panic("l3 == NULL");
 			if (pmap_load(l3) == 0) {
 				if (va != va_next) {
 					pmap_invalidate_range(pmap, va, sva);
 					va = va_next;
 				}
 				continue;
 			}
 			if (va == va_next)
 				va = sva;
 			if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free,
 			    &lock)) {
 				sva += L3_SIZE;
 				break;
 			}
 		}
 		if (va != va_next)
 			pmap_invalidate_range(pmap, va, sva);
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	if (anyvalid)
 		pmap_invalidate_all(pmap);
-	rw_runlock(&pvh_global_lock);	
+	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(&free);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	pv_entry_t pv;
 	pmap_t pmap;
 	pd_entry_t *pde, tpde;
 	pt_entry_t *pte, tpte;
 	struct spglist free;
 	int lvl;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_all: page %p is not managed", m));
 	SLIST_INIT(&free);
 	rw_wlock(&pvh_global_lock);
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pmap_resident_count_dec(pmap, 1);
 
 		pde = pmap_pde(pmap, pv->pv_va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_remove_all: no page directory entry found"));
 		KASSERT(lvl == 2,
 		    ("pmap_remove_all: invalid pde level %d", lvl));
 		tpde = pmap_load(pde);
 
 		pte = pmap_l2_to_l3(pde, pv->pv_va);
 		tpte = pmap_load(pte);
 		if (pmap_is_current(pmap) &&
 		    pmap_l3_valid_cacheable(tpte))
 			cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
 		pmap_load_clear(pte);
 		PTE_SYNC(pte);
 		pmap_invalidate_page(pmap, pv->pv_va);
 		if (tpte & ATTR_SW_WIRED)
 			pmap->pm_stats.wired_count--;
 		if ((tpte & ATTR_AF) != 0)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (pmap_page_dirty(tpte))
 			vm_page_dirty(m);
 		pmap_unuse_l3(pmap, pv->pv_va, tpde, &free);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		free_pv_entry(pmap, pv);
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(&pvh_global_lock);
 	pmap_free_zero_pages(&free);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	vm_offset_t va, va_next;
 	pd_entry_t *l0, *l1, *l2;
 	pt_entry_t *l3p, l3;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE)
 		return;
 
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 
 		l0 = pmap_l0(pmap, sva);
 		if (pmap_load(l0) == 0) {
 			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		l1 = pmap_l0_to_l1(l0, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE)
 			continue;
 
 		if (va_next > eva)
 			va_next = eva;
 
 		va = va_next;
 		for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
 		    sva += L3_SIZE) {
 			l3 = pmap_load(l3p);
 			if (pmap_l3_valid(l3)) {
 				pmap_set(l3p, ATTR_AP(ATTR_AP_RO));
 				PTE_SYNC(l3p);
 				/* XXX: Use pmap_invalidate_range */
 				pmap_invalidate_page(pmap, va);
 			}
 		}
 	}
 	PMAP_UNLOCK(pmap);
 
 	/* TODO: Only invalidate entries we are touching */
 	pmap_invalidate_all(pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 int
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     u_int flags, int8_t psind __unused)
 {
 	struct rwlock *lock;
 	pd_entry_t *pde;
 	pt_entry_t new_l3, orig_l3;
 	pt_entry_t *l3;
 	pv_entry_t pv;
 	vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa;
 	vm_page_t mpte, om, l1_m, l2_m, l3_m;
 	boolean_t nosleep;
 	int lvl;
 
 	va = trunc_page(va);
 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 	pa = VM_PAGE_TO_PHYS(m);
 	new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
 	    L3_PAGE);
 	if ((prot & VM_PROT_WRITE) == 0)
 		new_l3 |= ATTR_AP(ATTR_AP_RO);
 	if ((flags & PMAP_ENTER_WIRED) != 0)
 		new_l3 |= ATTR_SW_WIRED;
 	if ((va >> 63) == 0)
 		new_l3 |= ATTR_AP(ATTR_AP_USER);
 
 	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
 
 	mpte = NULL;
 
 	lock = NULL;
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 
 	if (va < VM_MAXUSER_ADDRESS) {
 		nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
 		mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
 		if (mpte == NULL && nosleep) {
 			CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
 			if (lock != NULL)
 				rw_wunlock(lock);
 			rw_runlock(&pvh_global_lock);
 			PMAP_UNLOCK(pmap);
 			return (KERN_RESOURCE_SHORTAGE);
 		}
 		pde = pmap_pde(pmap, va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_enter: Invalid page entry, va: 0x%lx", va));
 		KASSERT(lvl == 2,
 		    ("pmap_enter: Invalid level %d", lvl));
 
 		l3 = pmap_l2_to_l3(pde, va);
 	} else {
 		pde = pmap_pde(pmap, va, &lvl);
 		/*
 		 * If we get a level 2 pde it must point to a level 3 entry
 		 * otherwise we will need to create the intermediate tables
 		 */
 		if (lvl < 2) {
 			switch(lvl) {
 			default:
 			case -1:
 				/* Get the l0 pde to update */
 				pde = pmap_l0(pmap, va);
 				KASSERT(pde != NULL, ("..."));
 
 				l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 				    VM_ALLOC_ZERO);
 				if (l1_m == NULL)
 					panic("pmap_enter: l1 pte_m == NULL");
 				if ((l1_m->flags & PG_ZERO) == 0)
 					pmap_zero_page(l1_m);
 
 				l1_pa = VM_PAGE_TO_PHYS(l1_m);
 				pmap_load_store(pde, l1_pa | L0_TABLE);
 				PTE_SYNC(pde);
 				/* FALLTHROUGH */
 			case 0:
 				/* Get the l1 pde to update */
 				pde = pmap_l1_to_l2(pde, va);
 				KASSERT(pde != NULL, ("..."));
 
 				l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 				    VM_ALLOC_ZERO);
 				if (l2_m == NULL)
 					panic("pmap_enter: l2 pte_m == NULL");
 				if ((l2_m->flags & PG_ZERO) == 0)
 					pmap_zero_page(l2_m);
 
 				l2_pa = VM_PAGE_TO_PHYS(l2_m);
 				pmap_load_store(pde, l2_pa | L1_TABLE);
 				PTE_SYNC(pde);
 				/* FALLTHROUGH */
 			case 1:
 				/* Get the l2 pde to update */
 				pde = pmap_l1_to_l2(pde, va);
 
 				l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 				    VM_ALLOC_ZERO);
 				if (l3_m == NULL)
 					panic("pmap_enter: l3 pte_m == NULL");
 				if ((l3_m->flags & PG_ZERO) == 0)
 					pmap_zero_page(l3_m);
 
 				l3_pa = VM_PAGE_TO_PHYS(l3_m);
 				pmap_load_store(pde, l3_pa | L2_TABLE);
 				PTE_SYNC(pde);
 				break;
 			}
 		}
 		l3 = pmap_l2_to_l3(pde, va);
 		pmap_invalidate_page(pmap, va);
 	}
 
 	om = NULL;
 	orig_l3 = pmap_load(l3);
 	opa = orig_l3 & ~ATTR_MASK;
 
 	/*
 	 * Is the specified virtual address already mapped?
 	 */
 	if (pmap_l3_valid(orig_l3)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if ((flags & PMAP_ENTER_WIRED) != 0 &&
 		    (orig_l3 & ATTR_SW_WIRED) == 0)
 			pmap->pm_stats.wired_count++;
 		else if ((flags & PMAP_ENTER_WIRED) == 0 &&
 		    (orig_l3 & ATTR_SW_WIRED) != 0)
 			pmap->pm_stats.wired_count--;
 
 		/*
 		 * Remove the extra PT page reference.
 		 */
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			KASSERT(mpte->wire_count > 0,
 			    ("pmap_enter: missing reference to page table page,"
 			     " va: 0x%lx", va));
 		}
 
 		/*
 		 * Has the physical page changed?
 		 */
 		if (opa == pa) {
 			/*
 			 * No, might be a protection or wiring change.
 			 */
 			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
 				new_l3 |= ATTR_SW_MANAGED;
 				if ((new_l3 & ATTR_AP(ATTR_AP_RW)) ==
 				    ATTR_AP(ATTR_AP_RW)) {
 					vm_page_aflag_set(m, PGA_WRITEABLE);
 				}
 			}
 			goto validate;
 		}
 
 		/* Flush the cache, there might be uncommitted data in it */
 		if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
 			cpu_dcache_wb_range(va, L3_SIZE);
 	} else {
 		/*
 		 * Increment the counters.
 		 */
 		if ((new_l3 & ATTR_SW_WIRED) != 0)
 			pmap->pm_stats.wired_count++;
 		pmap_resident_count_inc(pmap, 1);
 	}
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		new_l3 |= ATTR_SW_MANAGED;
 		pv = get_pv_entry(pmap, &lock);
 		pv->pv_va = va;
 		CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
 			vm_page_aflag_set(m, PGA_WRITEABLE);
 	}
 
 	/*
 	 * Update the L3 entry.
 	 */
 	if (orig_l3 != 0) {
 validate:
 		orig_l3 = pmap_load_store(l3, new_l3);
 		PTE_SYNC(l3);
 		opa = orig_l3 & ~ATTR_MASK;
 
 		if (opa != pa) {
 			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
 				om = PHYS_TO_VM_PAGE(opa);
 				if (pmap_page_dirty(orig_l3))
 					vm_page_dirty(om);
 				if ((orig_l3 & ATTR_AF) != 0)
 					vm_page_aflag_set(om, PGA_REFERENCED);
 				CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
 				pmap_pvh_free(&om->md, pmap, va);
 			}
 		} else if (pmap_page_dirty(orig_l3)) {
 			if ((orig_l3 & ATTR_SW_MANAGED) != 0)
 				vm_page_dirty(m);
 		}
 	} else {
 		pmap_load_store(l3, new_l3);
 		PTE_SYNC(l3);
 	}
 	pmap_invalidate_page(pmap, va);
 	if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
 	    cpu_icache_sync_range(va, PAGE_SIZE);
 
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	return (KERN_SUCCESS);
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	struct rwlock *lock;
 	vm_offset_t va;
 	vm_page_t m, mpte;
 	vm_pindex_t diff, psize;
 
 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
 
 	psize = atop(end - start);
 	mpte = NULL;
 	m = m_start;
 	lock = NULL;
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		va = start + ptoa(diff);
 		mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock);
 		m = TAILQ_NEXT(m, listq);
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * but is *MUCH* faster than pmap_enter...
  */
 
 void
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 	struct rwlock *lock;
 
 	lock = NULL;
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
 {
 	struct spglist free;
 	pd_entry_t *pde;
 	pt_entry_t *l3;
 	vm_paddr_t pa;
 	int lvl;
 
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->oflags & VPO_UNMANAGED) != 0,
 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		vm_pindex_t l2pindex;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		l2pindex = pmap_l2_pindex(va);
 		if (mpte && (mpte->pindex == l2pindex)) {
 			mpte->wire_count++;
 		} else {
 			/*
 			 * Get the l2 entry
 			 */
 			pde = pmap_pde(pmap, va, &lvl);
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.  Otherwise, we
 			 * attempt to allocate a page table page.  If this
 			 * attempt fails, we don't retry.  Instead, we give up.
 			 */
 			if (lvl == 2 && pmap_load(pde) != 0) {
 				mpte =
 				    PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK);
 				mpte->wire_count++;
 			} else {
 				/*
 				 * Pass NULL instead of the PV list lock
 				 * pointer, because we don't intend to sleep.
 				 */
 				mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
 				if (mpte == NULL)
 					return (mpte);
 			}
 		}
 		l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
 		l3 = &l3[pmap_l3_index(va)];
 	} else {
 		mpte = NULL;
 		pde = pmap_pde(kernel_pmap, va, &lvl);
 		KASSERT(pde != NULL,
 		    ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx",
 		     va));
 		KASSERT(lvl == 2,
 		    ("pmap_enter_quick_locked: Invalid level %d", lvl));
 		l3 = pmap_l2_to_l3(pde, va);
 	}
 
 	if (pmap_load(l3) != 0) {
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0 &&
 	    !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
 		if (mpte != NULL) {
 			SLIST_INIT(&free);
 			if (pmap_unwire_l3(pmap, va, mpte, &free)) {
 				pmap_invalidate_page(pmap, va);
 				pmap_free_zero_pages(&free);
 			}
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap_resident_count_inc(pmap, 1);
 
 	pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
 	    ATTR_AP(ATTR_AP_RW) | L3_PAGE;
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		pa |= ATTR_SW_MANAGED;
 	pmap_load_store(l3, pa);
 	PTE_SYNC(l3);
 	pmap_invalidate_page(pmap, va);
 	return (mpte);
 }
 
 /*
  * This code maps large physical mmap regions into the
  * processor address space.  Note that some shortcuts
  * are taken, but the code works.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("pmap_object_init_pt: non-device object"));
 }
 
 /*
  *	Clear the wired attribute from the mappings for the specified range of
  *	addresses in the given pmap.  Every valid mapping within that range
  *	must have the wired attribute set.  In contrast, invalid mappings
  *	cannot have the wired attribute set, so they are ignored.
  *
  *	The wired attribute of the page table entry is not a hardware feature,
  *	so there is no need to invalidate any TLB entries.
  */
 void
 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t va_next;
 	pd_entry_t *l0, *l1, *l2;
 	pt_entry_t *l3;
 	boolean_t pv_lists_locked;
 
 	pv_lists_locked = FALSE;
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 		l0 = pmap_l0(pmap, sva);
 		if (pmap_load(l0) == 0) {
 			va_next = (sva + L0_SIZE) & ~L0_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		l1 = pmap_l0_to_l1(l0, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (pmap_load(l2) == 0)
 			continue;
 
 		if (va_next > eva)
 			va_next = eva;
 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 		    sva += L3_SIZE) {
 			if (pmap_load(l3) == 0)
 				continue;
 			if ((pmap_load(l3) & ATTR_SW_WIRED) == 0)
 				panic("pmap_unwire: l3 %#jx is missing "
 				    "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3));
 
 			/*
 			 * PG_W must be cleared atomically.  Although the pmap
 			 * lock synchronizes access to PG_W, another processor
 			 * could be setting PG_M and/or PG_A concurrently.
 			 */
 			atomic_clear_long(l3, ATTR_SW_WIRED);
 			pmap->pm_stats.wired_count--;
 		}
 	}
 	if (pv_lists_locked)
 		rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
     vm_offset_t src_addr)
 {
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	pagezero((void *)va);
 }
 
 /*
- *	pmap_zero_page_area zeros the specified hardware page by mapping 
+ *	pmap_zero_page_area zeros the specified hardware page by mapping
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	if (off == 0 && size == PAGE_SIZE)
 		pagezero((void *)va);
 	else
 		bzero((char *)va + off, size);
 }
 
 /*
- *	pmap_zero_page_idle zeros the specified hardware page by mapping 
+ *	pmap_zero_page_idle zeros the specified hardware page by mapping
  *	the page into KVM and using bzero to clear its contents.  This
  *	is intended to be called from the vm_pagezero process only and
  *	outside of Giant.
  */
 void
 pmap_zero_page_idle(vm_page_t m)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	pagezero((void *)va);
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 {
 	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
 	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
 
 	pagecopy((void *)src, (void *)dst);
 }
 
 int unmapped_buf_allowed = 1;
 
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)
 {
 	void *a_cp, *b_cp;
 	vm_page_t m_a, m_b;
 	vm_paddr_t p_a, p_b;
 	vm_offset_t a_pg_offset, b_pg_offset;
 	int cnt;
 
 	while (xfersize > 0) {
 		a_pg_offset = a_offset & PAGE_MASK;
 		m_a = ma[a_offset >> PAGE_SHIFT];
 		p_a = m_a->phys_addr;
 		b_pg_offset = b_offset & PAGE_MASK;
 		m_b = mb[b_offset >> PAGE_SHIFT];
 		p_b = m_b->phys_addr;
 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 		if (__predict_false(!PHYS_IN_DMAP(p_a))) {
 			panic("!DMAP a %lx", p_a);
 		} else {
 			a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
 		}
 		if (__predict_false(!PHYS_IN_DMAP(p_b))) {
 			panic("!DMAP b %lx", p_b);
 		} else {
 			b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
 		}
 		bcopy(a_cp, b_cp, cnt);
 		a_offset += cnt;
 		b_offset += cnt;
 		xfersize -= cnt;
 	}
 }
 
 vm_offset_t
 pmap_quick_enter_page(vm_page_t m)
 {
 
 	return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
 }
 
 void
 pmap_quick_remove_page(vm_offset_t addr)
 {
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 {
 	struct rwlock *lock;
 	pv_entry_t pv;
 	int loops = 0;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_page_exists_quick: page %p is not managed", m));
 	rv = FALSE;
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		if (PV_PMAP(pv) == pmap) {
 			rv = TRUE;
 			break;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	rw_runlock(lock);
 	rw_runlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_page_wired_mappings:
  *
  *	Return the number of managed mappings to the given physical page
  *	that are wired.
  */
 int
 pmap_page_wired_mappings(vm_page_t m)
 {
 	struct rwlock *lock;
 	pmap_t pmap;
 	pt_entry_t *pte;
 	pv_entry_t pv;
 	int count, lvl, md_gen;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (0);
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 restart:
 	count = 0;
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_runlock(lock);
 			PMAP_LOCK(pmap);
 			rw_rlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		pte = pmap_pte(pmap, pv->pv_va, &lvl);
 		if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0)
 			count++;
 		PMAP_UNLOCK(pmap);
 	}
 	rw_runlock(lock);
 	rw_runlock(&pvh_global_lock);
 	return (count);
 }
 
 /*
  * Destroy all managed, non-wired mappings in the given user-space
  * pmap.  This pmap cannot be active on any processor besides the
  * caller.
- *                                                                                
+ *
  * This function cannot be applied to the kernel pmap.  Moreover, it
  * is not intended for general use.  It is only to be used during
  * process termination.  Consequently, it can be implemented in ways
  * that make it faster than pmap_remove().  First, it can more quickly
  * destroy mappings by iterating over the pmap's collection of PV
  * entries, rather than searching the page table.  Second, it doesn't
  * have to test and clear the page table entries atomically, because
  * no processor is currently accessing the user address space.  In
  * particular, a page table entry's dirty bit won't change state once
  * this function starts.
  */
 void
 pmap_remove_pages(pmap_t pmap)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte, tpte;
 	struct spglist free;
 	vm_page_t m;
 	pv_entry_t pv;
 	struct pv_chunk *pc, *npc;
 	struct rwlock *lock;
 	int64_t bit;
 	uint64_t inuse, bitmask;
 	int allfree, field, freed, idx, lvl;
 	vm_paddr_t pa;
 
 	lock = NULL;
 
 	SLIST_INIT(&free);
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 		allfree = 1;
 		freed = 0;
 		for (field = 0; field < _NPCM; field++) {
 			inuse = ~pc->pc_map[field] & pc_freemask[field];
 			while (inuse != 0) {
 				bit = ffsl(inuse) - 1;
 				bitmask = 1UL << bit;
 				idx = field * 64 + bit;
 				pv = &pc->pc_pventry[idx];
 				inuse &= ~bitmask;
 
 				pde = pmap_pde(pmap, pv->pv_va, &lvl);
 				KASSERT(pde != NULL,
 				    ("Attempting to remove an unmapped page"));
 				KASSERT(lvl == 2,
 				    ("Invalid page directory level: %d", lvl));
 
 				pte = pmap_l2_to_l3(pde, pv->pv_va);
 				KASSERT(pte != NULL,
 				    ("Attempting to remove an unmapped page"));
 
 				tpte = pmap_load(pte);
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 				if (tpte & ATTR_SW_WIRED) {
 					allfree = 0;
 					continue;
 				}
 
 				pa = tpte & ~ATTR_MASK;
 
 				m = PHYS_TO_VM_PAGE(pa);
 				KASSERT(m->phys_addr == pa,
 				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 				    m, (uintmax_t)m->phys_addr,
 				    (uintmax_t)tpte));
 
 				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 				    m < &vm_page_array[vm_page_array_size],
 				    ("pmap_remove_pages: bad pte %#jx",
 				    (uintmax_t)tpte));
 
 				/* XXX: assumes tpte is level 3 */
 				if (pmap_is_current(pmap) &&
 				    pmap_l3_valid_cacheable(tpte))
 					cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
 				pmap_load_clear(pte);
 				PTE_SYNC(pte);
 				pmap_invalidate_page(pmap, pv->pv_va);
 
 				/*
 				 * Update the vm_page_t clean/reference bits.
 				 */
 				if ((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
 					vm_page_dirty(m);
 
 				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
 
 				/* Mark free */
 				pc->pc_map[field] |= bitmask;
 
 				pmap_resident_count_dec(pmap, 1);
 				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 				m->md.pv_gen++;
 
 				pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde),
 				    &free);
 				freed++;
 			}
 		}
 		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
 		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
 		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
 		if (allfree) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			free_pv_chunk(pc);
 		}
 	}
 	pmap_invalidate_all(pmap);
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(&free);
 }
 
 /*
  * This is used to check if a page has been accessed or modified. As we
  * don't have a bit to see if it has been modified we have to assume it
  * has been if the page is read/write.
  */
 static boolean_t
 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
 {
 	struct rwlock *lock;
 	pv_entry_t pv;
 	pt_entry_t *pte, mask, value;
 	pmap_t pmap;
 	int lvl, md_gen;
 	boolean_t rv;
 
 	rv = FALSE;
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 restart:
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_runlock(lock);
 			PMAP_LOCK(pmap);
 			rw_rlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		pte = pmap_pte(pmap, pv->pv_va, &lvl);
 		KASSERT(lvl == 3,
 		    ("pmap_page_test_mappings: Invalid level %d", lvl));
 		mask = 0;
 		value = 0;
 		if (modified) {
 			mask |= ATTR_AP_RW_BIT;
 			value |= ATTR_AP(ATTR_AP_RW);
 		}
 		if (accessed) {
 			mask |= ATTR_AF | ATTR_DESCR_MASK;
 			value |= ATTR_AF | L3_PAGE;
 		}
 		rv = (pmap_load(pte) & mask) == value;
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			goto out;
 	}
 out:
 	rw_runlock(lock);
 	rw_runlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_modified: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	return (pmap_page_test_mappings(m, FALSE, TRUE));
 }
 
 /*
  *	pmap_is_prefaultable:
  *
  *	Return whether or not the specified virtual address is eligible
  *	for prefault.
  */
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
 	pt_entry_t *pte;
 	boolean_t rv;
 	int lvl;
 
 	rv = FALSE;
 	PMAP_LOCK(pmap);
 	pte = pmap_pte(pmap, addr, &lvl);
 	if (pte != NULL && pmap_load(pte) != 0) {
 		rv = TRUE;
 	}
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	in any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_referenced: page %p is not managed", m));
 	return (pmap_page_test_mappings(m, TRUE, FALSE));
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 void
 pmap_remove_write(vm_page_t m)
 {
 	pmap_t pmap;
 	struct rwlock *lock;
 	pv_entry_t pv;
 	pt_entry_t oldpte, *pte;
 	int lvl, md_gen;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * set by another thread while the object is locked.  Thus,
 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 retry_pv_loop:
 	rw_wlock(lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				rw_wunlock(lock);
 				goto retry_pv_loop;
 			}
 		}
 		pte = pmap_pte(pmap, pv->pv_va, &lvl);
 retry:
 		oldpte = pmap_load(pte);
 		if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) {
 			if (!atomic_cmpset_long(pte, oldpte,
 			    oldpte | ATTR_AP(ATTR_AP_RO)))
 				goto retry;
 			if ((oldpte & ATTR_AF) != 0)
 				vm_page_dirty(m);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	rw_wunlock(lock);
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_runlock(&pvh_global_lock);
 }
 
 static __inline boolean_t
 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
 {
 
 	return (FALSE);
 }
 
 #define	PMAP_TS_REFERENCED_MAX	5
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	XXX: The exact number of bits to check and clear is a matter that
  *	should be tested and standardized at some point in the future for
  *	optimal aging of shared pages.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	pv_entry_t pv, pvf;
 	pmap_t pmap;
 	struct rwlock *lock;
 	pd_entry_t *pde, tpde;
 	pt_entry_t *pte, tpte;
 	vm_paddr_t pa;
 	int cleared, md_gen, not_cleared, lvl;
 	struct spglist free;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_ts_referenced: page %p is not managed", m));
 	SLIST_INIT(&free);
 	cleared = 0;
 	pa = VM_PAGE_TO_PHYS(m);
 	lock = PHYS_TO_PV_LIST_LOCK(pa);
 	rw_rlock(&pvh_global_lock);
 	rw_wlock(lock);
 retry:
 	not_cleared = 0;
 	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
 		goto out;
 	pv = pvf;
 	do {
 		if (pvf == NULL)
 			pvf = pv;
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto retry;
 			}
 		}
 		pde = pmap_pde(pmap, pv->pv_va, &lvl);
 		KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found"));
 		KASSERT(lvl == 2,
 		    ("pmap_ts_referenced: invalid pde level %d", lvl));
 		tpde = pmap_load(pde);
 		KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE,
 		    ("pmap_ts_referenced: found an invalid l2 table"));
 		pte = pmap_l2_to_l3(pde, pv->pv_va);
 		tpte = pmap_load(pte);
 		if ((tpte & ATTR_AF) != 0) {
 			if (safe_to_clear_referenced(pmap, tpte)) {
 				/*
 				 * TODO: We don't handle the access flag
 				 * at all. We need to be able to set it in
 				 * the exception handler.
 				 */
 				panic("ARM64TODO: safe_to_clear_referenced\n");
 			} else if ((tpte & ATTR_SW_WIRED) == 0) {
 				/*
 				 * Wired pages cannot be paged out so
 				 * doing accessed bit emulation for
 				 * them is wasted effort. We do the
 				 * hard work for unwired pages only.
 				 */
 				pmap_remove_l3(pmap, pte, pv->pv_va, tpde,
 				    &free, &lock);
 				pmap_invalidate_page(pmap, pv->pv_va);
 				cleared++;
 				if (pvf == pv)
 					pvf = NULL;
 				pv = NULL;
 				KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
 				    ("inconsistent pv lock %p %p for page %p",
 				    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
 			} else
 				not_cleared++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 			m->md.pv_gen++;
 		}
 	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
 	    not_cleared < PMAP_TS_REFERENCED_MAX);
 out:
 	rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	pmap_free_zero_pages(&free);
 	return (cleared + not_cleared);
 }
 
 /*
  *	Apply the given advice to the specified range of addresses within the
  *	given pmap.  Depending on the advice, clear the referenced and/or
  *	modified flags in each mapping and set the mapped page's dirty field.
  */
 void
 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 {
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	KASSERT(!vm_page_xbusied(m),
 	    ("pmap_clear_modify: page %p is exclusive busied", m));
 
 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
 	 * If the object containing the page is locked and the page is not
 	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
 
 	/* ARM64TODO: We lack support for tracking if a page is modified */
 }
 
 void *
 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 {
 
         return ((void *)PHYS_TO_DMAP(pa));
 }
 
 void
 pmap_unmapbios(vm_paddr_t pa, vm_size_t size)
 {
 }
 
 /*
  * Sets the memory attribute for the specified page.
  */
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
 
 	m->md.pv_memattr = ma;
 
 	/*
 	 * ARM64TODO: Implement the below (from the amd64 pmap)
 	 * If "m" is a normal page, update its direct mapping.  This update
 	 * can be relied upon to perform any cache operations that are
 	 * required for data coherence.
 	 */
 	if ((m->flags & PG_FICTITIOUS) == 0 &&
 	    PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m)))
 		panic("ARM64TODO: pmap_page_set_memattr");
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 {
 	pd_entry_t *l1p, l1;
 	pd_entry_t *l2p, l2;
 	pt_entry_t *l3p, l3;
 	vm_paddr_t pa;
 	bool managed;
 	int val;
 
 	PMAP_LOCK(pmap);
 retry:
 	pa = 0;
 	val = 0;
 	managed = false;
 
 	l1p = pmap_l1(pmap, addr);
 	if (l1p == NULL) /* No l1 */
 		goto done;
 
 	l1 = pmap_load(l1p);
 	if ((l1 & ATTR_DESCR_MASK) == L1_INVAL)
 		goto done;
 
 	if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) {
 		pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET);
 		managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
 		val = MINCORE_SUPER | MINCORE_INCORE;
 		if (pmap_page_dirty(l1))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if ((l1 & ATTR_AF) == ATTR_AF)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 		goto done;
 	}
 
 	l2p = pmap_l1_to_l2(l1p, addr);
 	if (l2p == NULL) /* No l2 */
 		goto done;
 
 	l2 = pmap_load(l2p);
 	if ((l2 & ATTR_DESCR_MASK) == L2_INVAL)
 		goto done;
 
 	if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) {
 		pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET);
 		managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
 		val = MINCORE_SUPER | MINCORE_INCORE;
 		if (pmap_page_dirty(l2))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if ((l2 & ATTR_AF) == ATTR_AF)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 		goto done;
 	}
 
 	l3p = pmap_l2_to_l3(l2p, addr);
 	if (l3p == NULL) /* No l3 */
 		goto done;
 
 	l3 = pmap_load(l2p);
 	if ((l3 & ATTR_DESCR_MASK) == L3_INVAL)
 		goto done;
 
 	if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) {
 		pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET);
 		managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
 		val = MINCORE_INCORE;
 		if (pmap_page_dirty(l3))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if ((l3 & ATTR_AF) == ATTR_AF)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 	}
 
 done:
 	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
 		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
 		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
 			goto retry;
 	} else
 		PA_UNLOCK_COND(*locked_pa);
 	PMAP_UNLOCK(pmap);
 
 	return (val);
 }
 
 void
 pmap_activate(struct thread *td)
 {
 	pmap_t	pmap;
 
 	critical_enter();
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0);
 	__asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr));
 	pmap_invalidate_all(pmap);
 	critical_exit();
 }
 
 void
 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
 {
 
 	if (va >= VM_MIN_KERNEL_ADDRESS) {
 		cpu_icache_sync_range(va, sz);
 	} else {
 		u_int len, offset;
 		vm_paddr_t pa;
 
 		/* Find the length of data in this page to flush */
 		offset = va & PAGE_MASK;
 		len = imin(PAGE_SIZE - offset, sz);
 
 		while (sz != 0) {
 			/* Extract the physical address & find it in the DMAP */
 			pa = pmap_extract(pmap, va);
 			if (pa != 0)
 				cpu_icache_sync_range(PHYS_TO_DMAP(pa), len);
 
 			/* Move to the next page */
 			sz -= len;
 			va += len;
 			/* Set the length for the next iteration */
 			len = imin(PAGE_SIZE, sz);
 		}
 	}
 }
 
 /*
  *	Increase the starting virtual address of the given mapping if a
  *	different alignment might result in more superpage mappings.
  */
 void
 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t size)
 {
 }
 
 /**
  * Get the kernel virtual address of a set of physical pages. If there are
  * physical addresses not covered by the DMAP perform a transient mapping
  * that will be removed when calling pmap_unmap_io_transient.
  *
  * \param page        The pages the caller wishes to obtain the virtual
  *                    address on the kernel memory map.
  * \param vaddr       On return contains the kernel virtual memory address
  *                    of the pages passed in the page parameter.
  * \param count       Number of pages passed in.
  * \param can_fault   TRUE if the thread using the mapped pages can take
  *                    page faults, FALSE otherwise.
  *
  * \returns TRUE if the caller must call pmap_unmap_io_transient when
  *          finished or FALSE otherwise.
  *
  */
 boolean_t
 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
     boolean_t can_fault)
 {
 	vm_paddr_t paddr;
 	boolean_t needs_mapping;
 	int error, i;
 
 	/*
 	 * Allocate any KVA space that we need, this is done in a separate
 	 * loop to prevent calling vmem_alloc while pinned.
 	 */
 	needs_mapping = FALSE;
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (__predict_false(!PHYS_IN_DMAP(paddr))) {
 			error = vmem_alloc(kernel_arena, PAGE_SIZE,
 			    M_BESTFIT | M_WAITOK, &vaddr[i]);
 			KASSERT(error == 0, ("vmem_alloc failed: %d", error));
 			needs_mapping = TRUE;
 		} else {
 			vaddr[i] = PHYS_TO_DMAP(paddr);
 		}
 	}
 
 	/* Exit early if everything is covered by the DMAP */
 	if (!needs_mapping)
 		return (FALSE);
 
 	if (!can_fault)
 		sched_pin();
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (!PHYS_IN_DMAP(paddr)) {
 			panic(
 			   "pmap_map_io_transient: TODO: Map out of DMAP data");
 		}
 	}
 
 	return (needs_mapping);
 }
 
 void
 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
     boolean_t can_fault)
 {
 	vm_paddr_t paddr;
 	int i;
 
 	if (!can_fault)
 		sched_unpin();
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (!PHYS_IN_DMAP(paddr)) {
 			panic("ARM64TODO: pmap_unmap_io_transient: Unmap data");
 		}
 	}
 }
Index: user/alc/PQ_LAUNDRY/sys/boot/fdt/dts/riscv/spike.dts
===================================================================
--- user/alc/PQ_LAUNDRY/sys/boot/fdt/dts/riscv/spike.dts	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/boot/fdt/dts/riscv/spike.dts	(revision 303667)
@@ -1,109 +1,110 @@
 /*-
- * Copyright (c) 2015 Ruslan Bukin <br@bsdpad.com>
+ * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /dts-v1/;
 
 / {
-	model = "UC Berkeley Spike Simulator RV64I";
-	compatible = "riscv,rv64i";
+	model = "UC Berkeley Spike Simulator RV64";
+	compatible = "riscv,rv64";
 	#address-cells = <1>;
 	#size-cells = <1>;
 	#interrupt-cells = <1>;
 
 	cpus {
 		#address-cells = <1>;
 		#size-cells = <0>;
 
 		cpu@0 {
 			device_type = "cpu";
-			compatible = "riscv,rv64i";
-			reg = <0x40002000>;
+			compatible = "riscv,rv64";
+			reg = <0x40001000>;
 		};
 
 		cpu@1 {
 			device_type = "cpu";
-			compatible = "riscv,rv64i";
-			reg = <0x4000a000>;
+			compatible = "riscv,rv64";
+			reg = <0x40002000>;
 		};
 	};
 
 	aliases {
 		console0 = &console0;
 	};
 
 	memory {
 		device_type = "memory";
-		reg = <0x0 0x40000000>; /* 1GB at 0x0 */
+		reg = <0x80000000 0x40000000>; /* 1GB at 0x80000000 */
 	};
 
 	soc {
-		#address-cells = <2>;
-		#size-cells = <2>;
+		#address-cells = <1>;
+		#size-cells = <1>;
 		#interrupt-cells = <1>;
 
 		compatible = "simple-bus";
 		ranges;
 
 		pic0: pic@0 {
 			compatible = "riscv,pic";
 			interrupt-controller;
 		};
 
 		timer0: timer@0 {
 			compatible = "riscv,timer";
-			interrupts = < 1 >;
+			reg = < 0x40000000 0x100 >;
+			interrupts = < 5 >;
 			interrupt-parent = < &pic0 >;
 			clock-frequency = < 1000000 >;
 		};
 
 		htif0: htif@0 {
 			compatible = "riscv,htif";
-			interrupts = < 0 >;
+			interrupts = < 1 >;
 			interrupt-parent = < &pic0 >;
 
 			console0: console@0 {
 				compatible = "htif,console";
 				status = "okay";
 			};
 		};
 	};
 
 	chosen {
 		bootargs = "-v";
 		stdin = "console0";
 		stdout = "console0";
 	};
 };
Index: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h	(revision 303667)
@@ -1,2514 +1,2514 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_DTRACE_H
 #define	_SYS_DTRACE_H
 
 #pragma ident	"%Z%%M%	%I%	%E% SMI"
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /*
  * DTrace Dynamic Tracing Software: Kernel Interfaces
  *
  * Note: The contents of this file are private to the implementation of the
  * Solaris system and DTrace subsystem and are subject to change at any time
  * without notice.  Applications and drivers using these interfaces will fail
  * to run on future releases.  These interfaces should not be used for any
  * purpose except those expressly outlined in dtrace(7D) and libdtrace(3LIB).
  * Please refer to the "Solaris Dynamic Tracing Guide" for more information.
  */
 
 #ifndef _ASM
 
 #include <sys/types.h>
 #include <sys/modctl.h>
 #include <sys/processor.h>
 #ifdef illumos
 #include <sys/systm.h>
 #else
 #include <sys/cpuvar.h>
 #include <sys/param.h>
 #include <sys/linker.h>
 #include <sys/ioccom.h>
 #include <sys/ucred.h>
 typedef int model_t;
 #endif
 #include <sys/ctf_api.h>
 #ifdef illumos
 #include <sys/cyclic.h>
 #include <sys/int_limits.h>
 #else
 #include <sys/stdint.h>
 #endif
 
 /*
  * DTrace Universal Constants and Typedefs
  */
 #define	DTRACE_CPUALL		-1	/* all CPUs */
 #define	DTRACE_IDNONE		0	/* invalid probe identifier */
 #define	DTRACE_EPIDNONE		0	/* invalid enabled probe identifier */
 #define	DTRACE_AGGIDNONE	0	/* invalid aggregation identifier */
 #define	DTRACE_AGGVARIDNONE	0	/* invalid aggregation variable ID */
 #define	DTRACE_CACHEIDNONE	0	/* invalid predicate cache */
 #define	DTRACE_PROVNONE		0	/* invalid provider identifier */
 #define	DTRACE_METAPROVNONE	0	/* invalid meta-provider identifier */
 #define	DTRACE_ARGNONE		-1	/* invalid argument index */
 
 #define	DTRACE_PROVNAMELEN	64
 #define	DTRACE_MODNAMELEN	64
 #define	DTRACE_FUNCNAMELEN	192
 #define	DTRACE_NAMELEN		64
 #define	DTRACE_FULLNAMELEN	(DTRACE_PROVNAMELEN + DTRACE_MODNAMELEN + \
 				DTRACE_FUNCNAMELEN + DTRACE_NAMELEN + 4)
 #define	DTRACE_ARGTYPELEN	128
 
 typedef uint32_t dtrace_id_t;		/* probe identifier */
 typedef uint32_t dtrace_epid_t;		/* enabled probe identifier */
 typedef uint32_t dtrace_aggid_t;	/* aggregation identifier */
 typedef int64_t dtrace_aggvarid_t;	/* aggregation variable identifier */
 typedef uint16_t dtrace_actkind_t;	/* action kind */
 typedef int64_t dtrace_optval_t;	/* option value */
 typedef uint32_t dtrace_cacheid_t;	/* predicate cache identifier */
 
 typedef enum dtrace_probespec {
 	DTRACE_PROBESPEC_NONE = -1,
 	DTRACE_PROBESPEC_PROVIDER = 0,
 	DTRACE_PROBESPEC_MOD,
 	DTRACE_PROBESPEC_FUNC,
 	DTRACE_PROBESPEC_NAME
 } dtrace_probespec_t;
 
 /*
  * DTrace Intermediate Format (DIF)
  *
  * The following definitions describe the DTrace Intermediate Format (DIF), a
  * a RISC-like instruction set and program encoding used to represent
  * predicates and actions that can be bound to DTrace probes.  The constants
  * below defining the number of available registers are suggested minimums; the
  * compiler should use DTRACEIOC_CONF to dynamically obtain the number of
  * registers provided by the current DTrace implementation.
  */
 #define	DIF_VERSION_1	1		/* DIF version 1: Solaris 10 Beta */
 #define	DIF_VERSION_2	2		/* DIF version 2: Solaris 10 FCS */
 #define	DIF_VERSION	DIF_VERSION_2	/* latest DIF instruction set version */
 #define	DIF_DIR_NREGS	8		/* number of DIF integer registers */
 #define	DIF_DTR_NREGS	8		/* number of DIF tuple registers */
 
 #define	DIF_OP_OR	1		/* or	r1, r2, rd */
 #define	DIF_OP_XOR	2		/* xor	r1, r2, rd */
 #define	DIF_OP_AND	3		/* and	r1, r2, rd */
 #define	DIF_OP_SLL	4		/* sll	r1, r2, rd */
 #define	DIF_OP_SRL	5		/* srl	r1, r2, rd */
 #define	DIF_OP_SUB	6		/* sub	r1, r2, rd */
 #define	DIF_OP_ADD	7		/* add	r1, r2, rd */
 #define	DIF_OP_MUL	8		/* mul	r1, r2, rd */
 #define	DIF_OP_SDIV	9		/* sdiv	r1, r2, rd */
 #define	DIF_OP_UDIV	10		/* udiv r1, r2, rd */
 #define	DIF_OP_SREM	11		/* srem r1, r2, rd */
 #define	DIF_OP_UREM	12		/* urem r1, r2, rd */
 #define	DIF_OP_NOT	13		/* not	r1, rd */
 #define	DIF_OP_MOV	14		/* mov	r1, rd */
 #define	DIF_OP_CMP	15		/* cmp	r1, r2 */
 #define	DIF_OP_TST	16		/* tst  r1 */
 #define	DIF_OP_BA	17		/* ba	label */
 #define	DIF_OP_BE	18		/* be	label */
 #define	DIF_OP_BNE	19		/* bne	label */
 #define	DIF_OP_BG	20		/* bg	label */
 #define	DIF_OP_BGU	21		/* bgu	label */
 #define	DIF_OP_BGE	22		/* bge	label */
 #define	DIF_OP_BGEU	23		/* bgeu	label */
 #define	DIF_OP_BL	24		/* bl	label */
 #define	DIF_OP_BLU	25		/* blu	label */
 #define	DIF_OP_BLE	26		/* ble	label */
 #define	DIF_OP_BLEU	27		/* bleu	label */
 #define	DIF_OP_LDSB	28		/* ldsb	[r1], rd */
 #define	DIF_OP_LDSH	29		/* ldsh	[r1], rd */
 #define	DIF_OP_LDSW	30		/* ldsw [r1], rd */
 #define	DIF_OP_LDUB	31		/* ldub	[r1], rd */
 #define	DIF_OP_LDUH	32		/* lduh	[r1], rd */
 #define	DIF_OP_LDUW	33		/* lduw	[r1], rd */
 #define	DIF_OP_LDX	34		/* ldx	[r1], rd */
 #define	DIF_OP_RET	35		/* ret	rd */
 #define	DIF_OP_NOP	36		/* nop */
 #define	DIF_OP_SETX	37		/* setx	intindex, rd */
 #define	DIF_OP_SETS	38		/* sets strindex, rd */
 #define	DIF_OP_SCMP	39		/* scmp	r1, r2 */
 #define	DIF_OP_LDGA	40		/* ldga	var, ri, rd */
 #define	DIF_OP_LDGS	41		/* ldgs var, rd */
 #define	DIF_OP_STGS	42		/* stgs var, rs */
 #define	DIF_OP_LDTA	43		/* ldta var, ri, rd */
 #define	DIF_OP_LDTS	44		/* ldts var, rd */
 #define	DIF_OP_STTS	45		/* stts var, rs */
 #define	DIF_OP_SRA	46		/* sra	r1, r2, rd */
 #define	DIF_OP_CALL	47		/* call	subr, rd */
 #define	DIF_OP_PUSHTR	48		/* pushtr type, rs, rr */
 #define	DIF_OP_PUSHTV	49		/* pushtv type, rs, rv */
 #define	DIF_OP_POPTS	50		/* popts */
 #define	DIF_OP_FLUSHTS	51		/* flushts */
 #define	DIF_OP_LDGAA	52		/* ldgaa var, rd */
 #define	DIF_OP_LDTAA	53		/* ldtaa var, rd */
 #define	DIF_OP_STGAA	54		/* stgaa var, rs */
 #define	DIF_OP_STTAA	55		/* sttaa var, rs */
 #define	DIF_OP_LDLS	56		/* ldls	var, rd */
 #define	DIF_OP_STLS	57		/* stls	var, rs */
 #define	DIF_OP_ALLOCS	58		/* allocs r1, rd */
 #define	DIF_OP_COPYS	59		/* copys  r1, r2, rd */
 #define	DIF_OP_STB	60		/* stb	r1, [rd] */
 #define	DIF_OP_STH	61		/* sth	r1, [rd] */
 #define	DIF_OP_STW	62		/* stw	r1, [rd] */
 #define	DIF_OP_STX	63		/* stx	r1, [rd] */
 #define	DIF_OP_ULDSB	64		/* uldsb [r1], rd */
 #define	DIF_OP_ULDSH	65		/* uldsh [r1], rd */
 #define	DIF_OP_ULDSW	66		/* uldsw [r1], rd */
 #define	DIF_OP_ULDUB	67		/* uldub [r1], rd */
 #define	DIF_OP_ULDUH	68		/* ulduh [r1], rd */
 #define	DIF_OP_ULDUW	69		/* ulduw [r1], rd */
 #define	DIF_OP_ULDX	70		/* uldx  [r1], rd */
 #define	DIF_OP_RLDSB	71		/* rldsb [r1], rd */
 #define	DIF_OP_RLDSH	72		/* rldsh [r1], rd */
 #define	DIF_OP_RLDSW	73		/* rldsw [r1], rd */
 #define	DIF_OP_RLDUB	74		/* rldub [r1], rd */
 #define	DIF_OP_RLDUH	75		/* rlduh [r1], rd */
 #define	DIF_OP_RLDUW	76		/* rlduw [r1], rd */
 #define	DIF_OP_RLDX	77		/* rldx  [r1], rd */
 #define	DIF_OP_XLATE	78		/* xlate xlrindex, rd */
 #define	DIF_OP_XLARG	79		/* xlarg xlrindex, rd */
 
 #define	DIF_INTOFF_MAX		0xffff	/* highest integer table offset */
 #define	DIF_STROFF_MAX		0xffff	/* highest string table offset */
 #define	DIF_REGISTER_MAX	0xff	/* highest register number */
 #define	DIF_VARIABLE_MAX	0xffff	/* highest variable identifier */
 #define	DIF_SUBROUTINE_MAX	0xffff	/* highest subroutine code */
 
 #define	DIF_VAR_ARRAY_MIN	0x0000	/* lowest numbered array variable */
 #define	DIF_VAR_ARRAY_UBASE	0x0080	/* lowest user-defined array */
 #define	DIF_VAR_ARRAY_MAX	0x00ff	/* highest numbered array variable */
 
 #define	DIF_VAR_OTHER_MIN	0x0100	/* lowest numbered scalar or assc */
 #define	DIF_VAR_OTHER_UBASE	0x0500	/* lowest user-defined scalar or assc */
 #define	DIF_VAR_OTHER_MAX	0xffff	/* highest numbered scalar or assc */
 
 #define	DIF_VAR_ARGS		0x0000	/* arguments array */
 #define	DIF_VAR_REGS		0x0001	/* registers array */
 #define	DIF_VAR_UREGS		0x0002	/* user registers array */
 #define	DIF_VAR_CURTHREAD	0x0100	/* thread pointer */
 #define	DIF_VAR_TIMESTAMP	0x0101	/* timestamp */
 #define	DIF_VAR_VTIMESTAMP	0x0102	/* virtual timestamp */
 #define	DIF_VAR_IPL		0x0103	/* interrupt priority level */
 #define	DIF_VAR_EPID		0x0104	/* enabled probe ID */
 #define	DIF_VAR_ID		0x0105	/* probe ID */
 #define	DIF_VAR_ARG0		0x0106	/* first argument */
 #define	DIF_VAR_ARG1		0x0107	/* second argument */
 #define	DIF_VAR_ARG2		0x0108	/* third argument */
 #define	DIF_VAR_ARG3		0x0109	/* fourth argument */
 #define	DIF_VAR_ARG4		0x010a	/* fifth argument */
 #define	DIF_VAR_ARG5		0x010b	/* sixth argument */
 #define	DIF_VAR_ARG6		0x010c	/* seventh argument */
 #define	DIF_VAR_ARG7		0x010d	/* eighth argument */
 #define	DIF_VAR_ARG8		0x010e	/* ninth argument */
 #define	DIF_VAR_ARG9		0x010f	/* tenth argument */
 #define	DIF_VAR_STACKDEPTH	0x0110	/* stack depth */
 #define	DIF_VAR_CALLER		0x0111	/* caller */
 #define	DIF_VAR_PROBEPROV	0x0112	/* probe provider */
 #define	DIF_VAR_PROBEMOD	0x0113	/* probe module */
 #define	DIF_VAR_PROBEFUNC	0x0114	/* probe function */
 #define	DIF_VAR_PROBENAME	0x0115	/* probe name */
 #define	DIF_VAR_PID		0x0116	/* process ID */
 #define	DIF_VAR_TID		0x0117	/* (per-process) thread ID */
 #define	DIF_VAR_EXECNAME	0x0118	/* name of executable */
 #define	DIF_VAR_ZONENAME	0x0119	/* zone name associated with process */
 #define	DIF_VAR_WALLTIMESTAMP	0x011a	/* wall-clock timestamp */
 #define	DIF_VAR_USTACKDEPTH	0x011b	/* user-land stack depth */
 #define	DIF_VAR_UCALLER		0x011c	/* user-level caller */
 #define	DIF_VAR_PPID		0x011d	/* parent process ID */
 #define	DIF_VAR_UID		0x011e	/* process user ID */
 #define	DIF_VAR_GID		0x011f	/* process group ID */
 #define	DIF_VAR_ERRNO		0x0120	/* thread errno */
 #define	DIF_VAR_EXECARGS	0x0121	/* process arguments */
 
 #ifndef illumos
 #define	DIF_VAR_CPU		0x0200
 #endif
 
 #define	DIF_SUBR_RAND			0
 #define	DIF_SUBR_MUTEX_OWNED		1
 #define	DIF_SUBR_MUTEX_OWNER		2
 #define	DIF_SUBR_MUTEX_TYPE_ADAPTIVE	3
 #define	DIF_SUBR_MUTEX_TYPE_SPIN	4
 #define	DIF_SUBR_RW_READ_HELD		5
 #define	DIF_SUBR_RW_WRITE_HELD		6
 #define	DIF_SUBR_RW_ISWRITER		7
 #define	DIF_SUBR_COPYIN			8
 #define	DIF_SUBR_COPYINSTR		9
 #define	DIF_SUBR_SPECULATION		10
 #define	DIF_SUBR_PROGENYOF		11
 #define	DIF_SUBR_STRLEN			12
 #define	DIF_SUBR_COPYOUT		13
 #define	DIF_SUBR_COPYOUTSTR		14
 #define	DIF_SUBR_ALLOCA			15
 #define	DIF_SUBR_BCOPY			16
 #define	DIF_SUBR_COPYINTO		17
 #define	DIF_SUBR_MSGDSIZE		18
 #define	DIF_SUBR_MSGSIZE		19
 #define	DIF_SUBR_GETMAJOR		20
 #define	DIF_SUBR_GETMINOR		21
 #define	DIF_SUBR_DDI_PATHNAME		22
 #define	DIF_SUBR_STRJOIN		23
 #define	DIF_SUBR_LLTOSTR		24
 #define	DIF_SUBR_BASENAME		25
 #define	DIF_SUBR_DIRNAME		26
 #define	DIF_SUBR_CLEANPATH		27
 #define	DIF_SUBR_STRCHR			28
 #define	DIF_SUBR_STRRCHR		29
 #define	DIF_SUBR_STRSTR			30
 #define	DIF_SUBR_STRTOK			31
 #define	DIF_SUBR_SUBSTR			32
 #define	DIF_SUBR_INDEX			33
 #define	DIF_SUBR_RINDEX			34
 #define	DIF_SUBR_HTONS			35
 #define	DIF_SUBR_HTONL			36
 #define	DIF_SUBR_HTONLL			37
 #define	DIF_SUBR_NTOHS			38
 #define	DIF_SUBR_NTOHL			39
 #define	DIF_SUBR_NTOHLL			40
 #define	DIF_SUBR_INET_NTOP		41
 #define	DIF_SUBR_INET_NTOA		42
 #define	DIF_SUBR_INET_NTOA6		43
 #define	DIF_SUBR_TOUPPER		44
 #define	DIF_SUBR_TOLOWER		45
 #define	DIF_SUBR_MEMREF			46
 #define	DIF_SUBR_TYPEREF		47
 #define	DIF_SUBR_SX_SHARED_HELD		48
 #define	DIF_SUBR_SX_EXCLUSIVE_HELD	49
 #define	DIF_SUBR_SX_ISEXCLUSIVE		50
 #define	DIF_SUBR_MEMSTR			51
 #define	DIF_SUBR_GETF			52
 #define	DIF_SUBR_JSON			53
 #define	DIF_SUBR_STRTOLL		54
 #define	DIF_SUBR_MAX			54	/* max subroutine value */
 
 typedef uint32_t dif_instr_t;
 
 #define	DIF_INSTR_OP(i)			(((i) >> 24) & 0xff)
 #define	DIF_INSTR_R1(i)			(((i) >> 16) & 0xff)
 #define	DIF_INSTR_R2(i)			(((i) >>  8) & 0xff)
 #define	DIF_INSTR_RD(i)			((i) & 0xff)
 #define	DIF_INSTR_RS(i)			((i) & 0xff)
 #define	DIF_INSTR_LABEL(i)		((i) & 0xffffff)
 #define	DIF_INSTR_VAR(i)		(((i) >>  8) & 0xffff)
 #define	DIF_INSTR_INTEGER(i)		(((i) >>  8) & 0xffff)
 #define	DIF_INSTR_STRING(i)		(((i) >>  8) & 0xffff)
 #define	DIF_INSTR_SUBR(i)		(((i) >>  8) & 0xffff)
 #define	DIF_INSTR_TYPE(i)		(((i) >> 16) & 0xff)
 #define	DIF_INSTR_XLREF(i)		(((i) >>  8) & 0xffff)
 
 #define	DIF_INSTR_FMT(op, r1, r2, d) \
 	(((op) << 24) | ((r1) << 16) | ((r2) << 8) | (d))
 
 #define	DIF_INSTR_NOT(r1, d)		(DIF_INSTR_FMT(DIF_OP_NOT, r1, 0, d))
 #define	DIF_INSTR_MOV(r1, d)		(DIF_INSTR_FMT(DIF_OP_MOV, r1, 0, d))
 #define	DIF_INSTR_CMP(op, r1, r2)	(DIF_INSTR_FMT(op, r1, r2, 0))
 #define	DIF_INSTR_TST(r1)		(DIF_INSTR_FMT(DIF_OP_TST, r1, 0, 0))
 #define	DIF_INSTR_BRANCH(op, label)	(((op) << 24) | (label))
 #define	DIF_INSTR_LOAD(op, r1, d)	(DIF_INSTR_FMT(op, r1, 0, d))
 #define	DIF_INSTR_STORE(op, r1, d)	(DIF_INSTR_FMT(op, r1, 0, d))
 #define	DIF_INSTR_SETX(i, d)		((DIF_OP_SETX << 24) | ((i) << 8) | (d))
 #define	DIF_INSTR_SETS(s, d)		((DIF_OP_SETS << 24) | ((s) << 8) | (d))
 #define	DIF_INSTR_RET(d)		(DIF_INSTR_FMT(DIF_OP_RET, 0, 0, d))
 #define	DIF_INSTR_NOP			(DIF_OP_NOP << 24)
 #define	DIF_INSTR_LDA(op, v, r, d)	(DIF_INSTR_FMT(op, v, r, d))
 #define	DIF_INSTR_LDV(op, v, d)		(((op) << 24) | ((v) << 8) | (d))
 #define	DIF_INSTR_STV(op, v, rs)	(((op) << 24) | ((v) << 8) | (rs))
 #define	DIF_INSTR_CALL(s, d)		((DIF_OP_CALL << 24) | ((s) << 8) | (d))
 #define	DIF_INSTR_PUSHTS(op, t, r2, rs)	(DIF_INSTR_FMT(op, t, r2, rs))
 #define	DIF_INSTR_POPTS			(DIF_OP_POPTS << 24)
 #define	DIF_INSTR_FLUSHTS		(DIF_OP_FLUSHTS << 24)
 #define	DIF_INSTR_ALLOCS(r1, d)		(DIF_INSTR_FMT(DIF_OP_ALLOCS, r1, 0, d))
 #define	DIF_INSTR_COPYS(r1, r2, d)	(DIF_INSTR_FMT(DIF_OP_COPYS, r1, r2, d))
 #define	DIF_INSTR_XLATE(op, r, d)	(((op) << 24) | ((r) << 8) | (d))
 
 #define	DIF_REG_R0	0		/* %r0 is always set to zero */
 
 /*
  * A DTrace Intermediate Format Type (DIF Type) is used to represent the types
  * of variables, function and associative array arguments, and the return type
  * for each DIF object (shown below).  It contains a description of the type,
  * its size in bytes, and a module identifier.
  */
 typedef struct dtrace_diftype {
 	uint8_t dtdt_kind;		/* type kind (see below) */
 	uint8_t dtdt_ckind;		/* type kind in CTF */
 	uint8_t dtdt_flags;		/* type flags (see below) */
 	uint8_t dtdt_pad;		/* reserved for future use */
 	uint32_t dtdt_size;		/* type size in bytes (unless string) */
 } dtrace_diftype_t;
 
 #define	DIF_TYPE_CTF		0	/* type is a CTF type */
 #define	DIF_TYPE_STRING		1	/* type is a D string */
 
 #define	DIF_TF_BYREF		0x1	/* type is passed by reference */
 #define	DIF_TF_BYUREF		0x2	/* user type is passed by reference */
 
 /*
  * A DTrace Intermediate Format variable record is used to describe each of the
  * variables referenced by a given DIF object.  It contains an integer variable
  * identifier along with variable scope and properties, as shown below.  The
  * size of this structure must be sizeof (int) aligned.
  */
 typedef struct dtrace_difv {
 	uint32_t dtdv_name;		/* variable name index in dtdo_strtab */
 	uint32_t dtdv_id;		/* variable reference identifier */
 	uint8_t dtdv_kind;		/* variable kind (see below) */
 	uint8_t dtdv_scope;		/* variable scope (see below) */
 	uint16_t dtdv_flags;		/* variable flags (see below) */
 	dtrace_diftype_t dtdv_type;	/* variable type (see above) */
 } dtrace_difv_t;
 
 #define	DIFV_KIND_ARRAY		0	/* variable is an array of quantities */
 #define	DIFV_KIND_SCALAR	1	/* variable is a scalar quantity */
 
 #define	DIFV_SCOPE_GLOBAL	0	/* variable has global scope */
 #define	DIFV_SCOPE_THREAD	1	/* variable has thread scope */
 #define	DIFV_SCOPE_LOCAL	2	/* variable has local scope */
 
 #define	DIFV_F_REF		0x1	/* variable is referenced by DIFO */
 #define	DIFV_F_MOD		0x2	/* variable is written by DIFO */
 
 /*
  * DTrace Actions
  *
  * The upper byte determines the class of the action; the low bytes determines
  * the specific action within that class.  The classes of actions are as
  * follows:
  *
  *   [ no class ]                  <= May record process- or kernel-related data
  *   DTRACEACT_PROC                <= Only records process-related data
  *   DTRACEACT_PROC_DESTRUCTIVE    <= Potentially destructive to processes
  *   DTRACEACT_KERNEL              <= Only records kernel-related data
  *   DTRACEACT_KERNEL_DESTRUCTIVE  <= Potentially destructive to the kernel
  *   DTRACEACT_SPECULATIVE         <= Speculation-related action
  *   DTRACEACT_AGGREGATION         <= Aggregating action
  */
 #define	DTRACEACT_NONE			0	/* no action */
 #define	DTRACEACT_DIFEXPR		1	/* action is DIF expression */
 #define	DTRACEACT_EXIT			2	/* exit() action */
 #define	DTRACEACT_PRINTF		3	/* printf() action */
 #define	DTRACEACT_PRINTA		4	/* printa() action */
 #define	DTRACEACT_LIBACT		5	/* library-controlled action */
 #define	DTRACEACT_TRACEMEM		6	/* tracemem() action */
 #define	DTRACEACT_TRACEMEM_DYNSIZE	7	/* dynamic tracemem() size */
 #define	DTRACEACT_PRINTM		8	/* printm() action (BSD) */
 #define	DTRACEACT_PRINTT		9	/* printt() action (BSD) */
 
 #define	DTRACEACT_PROC			0x0100
 #define	DTRACEACT_USTACK		(DTRACEACT_PROC + 1)
 #define	DTRACEACT_JSTACK		(DTRACEACT_PROC + 2)
 #define	DTRACEACT_USYM			(DTRACEACT_PROC + 3)
 #define	DTRACEACT_UMOD			(DTRACEACT_PROC + 4)
 #define	DTRACEACT_UADDR			(DTRACEACT_PROC + 5)
 
 #define	DTRACEACT_PROC_DESTRUCTIVE	0x0200
 #define	DTRACEACT_STOP			(DTRACEACT_PROC_DESTRUCTIVE + 1)
 #define	DTRACEACT_RAISE			(DTRACEACT_PROC_DESTRUCTIVE + 2)
 #define	DTRACEACT_SYSTEM		(DTRACEACT_PROC_DESTRUCTIVE + 3)
 #define	DTRACEACT_FREOPEN		(DTRACEACT_PROC_DESTRUCTIVE + 4)
 
 #define	DTRACEACT_PROC_CONTROL		0x0300
 
 #define	DTRACEACT_KERNEL		0x0400
 #define	DTRACEACT_STACK			(DTRACEACT_KERNEL + 1)
 #define	DTRACEACT_SYM			(DTRACEACT_KERNEL + 2)
 #define	DTRACEACT_MOD			(DTRACEACT_KERNEL + 3)
 
 #define	DTRACEACT_KERNEL_DESTRUCTIVE	0x0500
 #define	DTRACEACT_BREAKPOINT		(DTRACEACT_KERNEL_DESTRUCTIVE + 1)
 #define	DTRACEACT_PANIC			(DTRACEACT_KERNEL_DESTRUCTIVE + 2)
 #define	DTRACEACT_CHILL			(DTRACEACT_KERNEL_DESTRUCTIVE + 3)
 
 #define	DTRACEACT_SPECULATIVE		0x0600
 #define	DTRACEACT_SPECULATE		(DTRACEACT_SPECULATIVE + 1)
 #define	DTRACEACT_COMMIT		(DTRACEACT_SPECULATIVE + 2)
 #define	DTRACEACT_DISCARD		(DTRACEACT_SPECULATIVE + 3)
 
 #define	DTRACEACT_CLASS(x)		((x) & 0xff00)
 
 #define	DTRACEACT_ISDESTRUCTIVE(x)	\
 	(DTRACEACT_CLASS(x) == DTRACEACT_PROC_DESTRUCTIVE || \
 	DTRACEACT_CLASS(x) == DTRACEACT_KERNEL_DESTRUCTIVE)
 
 #define	DTRACEACT_ISSPECULATIVE(x)	\
 	(DTRACEACT_CLASS(x) == DTRACEACT_SPECULATIVE)
 
 #define	DTRACEACT_ISPRINTFLIKE(x)	\
 	((x) == DTRACEACT_PRINTF || (x) == DTRACEACT_PRINTA || \
 	(x) == DTRACEACT_SYSTEM || (x) == DTRACEACT_FREOPEN)
 
 /*
  * DTrace Aggregating Actions
  *
  * These are functions f(x) for which the following is true:
  *
  *    f(f(x_0) U f(x_1) U ... U f(x_n)) = f(x_0 U x_1 U ... U x_n)
  *
  * where x_n is a set of arbitrary data.  Aggregating actions are in their own
  * DTrace action class, DTTRACEACT_AGGREGATION.  The macros provided here allow
  * for easier processing of the aggregation argument and data payload for a few
  * aggregating actions (notably:  quantize(), lquantize(), and ustack()).
  */
 #define	DTRACEACT_AGGREGATION		0x0700
 #define	DTRACEAGG_COUNT			(DTRACEACT_AGGREGATION + 1)
 #define	DTRACEAGG_MIN			(DTRACEACT_AGGREGATION + 2)
 #define	DTRACEAGG_MAX			(DTRACEACT_AGGREGATION + 3)
 #define	DTRACEAGG_AVG			(DTRACEACT_AGGREGATION + 4)
 #define	DTRACEAGG_SUM			(DTRACEACT_AGGREGATION + 5)
 #define	DTRACEAGG_STDDEV		(DTRACEACT_AGGREGATION + 6)
 #define	DTRACEAGG_QUANTIZE		(DTRACEACT_AGGREGATION + 7)
 #define	DTRACEAGG_LQUANTIZE		(DTRACEACT_AGGREGATION + 8)
 #define	DTRACEAGG_LLQUANTIZE		(DTRACEACT_AGGREGATION + 9)
 
 #define	DTRACEACT_ISAGG(x)		\
 	(DTRACEACT_CLASS(x) == DTRACEACT_AGGREGATION)
 
 #define	DTRACE_QUANTIZE_NBUCKETS	\
 	(((sizeof (uint64_t) * NBBY) - 1) * 2 + 1)
 
 #define	DTRACE_QUANTIZE_ZEROBUCKET	((sizeof (uint64_t) * NBBY) - 1)
 
 #define	DTRACE_QUANTIZE_BUCKETVAL(buck)					\
 	(int64_t)((buck) < DTRACE_QUANTIZE_ZEROBUCKET ?			\
 	-(1LL << (DTRACE_QUANTIZE_ZEROBUCKET - 1 - (buck))) :		\
 	(buck) == DTRACE_QUANTIZE_ZEROBUCKET ? 0 :			\
 	1LL << ((buck) - DTRACE_QUANTIZE_ZEROBUCKET - 1))
 
 #define	DTRACE_LQUANTIZE_STEPSHIFT		48
 #define	DTRACE_LQUANTIZE_STEPMASK		((uint64_t)UINT16_MAX << 48)
 #define	DTRACE_LQUANTIZE_LEVELSHIFT		32
 #define	DTRACE_LQUANTIZE_LEVELMASK		((uint64_t)UINT16_MAX << 32)
 #define	DTRACE_LQUANTIZE_BASESHIFT		0
 #define	DTRACE_LQUANTIZE_BASEMASK		UINT32_MAX
 
 #define	DTRACE_LQUANTIZE_STEP(x)		\
 	(uint16_t)(((x) & DTRACE_LQUANTIZE_STEPMASK) >> \
 	DTRACE_LQUANTIZE_STEPSHIFT)
 
 #define	DTRACE_LQUANTIZE_LEVELS(x)		\
 	(uint16_t)(((x) & DTRACE_LQUANTIZE_LEVELMASK) >> \
 	DTRACE_LQUANTIZE_LEVELSHIFT)
 
 #define	DTRACE_LQUANTIZE_BASE(x)		\
 	(int32_t)(((x) & DTRACE_LQUANTIZE_BASEMASK) >> \
 	DTRACE_LQUANTIZE_BASESHIFT)
 
 #define	DTRACE_LLQUANTIZE_FACTORSHIFT		48
 #define	DTRACE_LLQUANTIZE_FACTORMASK		((uint64_t)UINT16_MAX << 48)
 #define	DTRACE_LLQUANTIZE_LOWSHIFT		32
 #define	DTRACE_LLQUANTIZE_LOWMASK		((uint64_t)UINT16_MAX << 32)
 #define	DTRACE_LLQUANTIZE_HIGHSHIFT		16
 #define	DTRACE_LLQUANTIZE_HIGHMASK		((uint64_t)UINT16_MAX << 16)
 #define	DTRACE_LLQUANTIZE_NSTEPSHIFT		0
 #define	DTRACE_LLQUANTIZE_NSTEPMASK		UINT16_MAX
 
 #define	DTRACE_LLQUANTIZE_FACTOR(x)		\
 	(uint16_t)(((x) & DTRACE_LLQUANTIZE_FACTORMASK) >> \
 	DTRACE_LLQUANTIZE_FACTORSHIFT)
 
 #define	DTRACE_LLQUANTIZE_LOW(x)		\
 	(uint16_t)(((x) & DTRACE_LLQUANTIZE_LOWMASK) >> \
 	DTRACE_LLQUANTIZE_LOWSHIFT)
 
 #define	DTRACE_LLQUANTIZE_HIGH(x)		\
 	(uint16_t)(((x) & DTRACE_LLQUANTIZE_HIGHMASK) >> \
 	DTRACE_LLQUANTIZE_HIGHSHIFT)
 
 #define	DTRACE_LLQUANTIZE_NSTEP(x)		\
 	(uint16_t)(((x) & DTRACE_LLQUANTIZE_NSTEPMASK) >> \
 	DTRACE_LLQUANTIZE_NSTEPSHIFT)
 
 #define	DTRACE_USTACK_NFRAMES(x)	(uint32_t)((x) & UINT32_MAX)
 #define	DTRACE_USTACK_STRSIZE(x)	(uint32_t)((x) >> 32)
 #define	DTRACE_USTACK_ARG(x, y)		\
 	((((uint64_t)(y)) << 32) | ((x) & UINT32_MAX))
 
 #ifndef _LP64
 #if BYTE_ORDER == _BIG_ENDIAN
 #define	DTRACE_PTR(type, name)	uint32_t name##pad; type *name
 #else
 #define	DTRACE_PTR(type, name)	type *name; uint32_t name##pad
 #endif
 #else
 #define	DTRACE_PTR(type, name)	type *name
 #endif
 
 /*
  * DTrace Object Format (DOF)
  *
  * DTrace programs can be persistently encoded in the DOF format so that they
  * may be embedded in other programs (for example, in an ELF file) or in the
  * dtrace driver configuration file for use in anonymous tracing.  The DOF
  * format is versioned and extensible so that it can be revised and so that
  * internal data structures can be modified or extended compatibly.  All DOF
  * structures use fixed-size types, so the 32-bit and 64-bit representations
  * are identical and consumers can use either data model transparently.
  *
  * The file layout is structured as follows:
  *
  * +---------------+-------------------+----- ... ----+---- ... ------+
  * |   dof_hdr_t   |  dof_sec_t[ ... ] |   loadable   | non-loadable  |
  * | (file header) | (section headers) | section data | section data  |
  * +---------------+-------------------+----- ... ----+---- ... ------+
  * |<------------ dof_hdr.dofh_loadsz --------------->|               |
  * |<------------ dof_hdr.dofh_filesz ------------------------------->|
  *
  * The file header stores meta-data including a magic number, data model for
  * the instrumentation, data encoding, and properties of the DIF code within.
  * The header describes its own size and the size of the section headers.  By
  * convention, an array of section headers follows the file header, and then
  * the data for all loadable sections and unloadable sections.  This permits
  * consumer code to easily download the headers and all loadable data into the
  * DTrace driver in one contiguous chunk, omitting other extraneous sections.
  *
  * The section headers describe the size, offset, alignment, and section type
  * for each section.  Sections are described using a set of #defines that tell
  * the consumer what kind of data is expected.  Sections can contain links to
  * other sections by storing a dof_secidx_t, an index into the section header
  * array, inside of the section data structures.  The section header includes
  * an entry size so that sections with data arrays can grow their structures.
  *
  * The DOF data itself can contain many snippets of DIF (i.e. >1 DIFOs), which
  * are represented themselves as a collection of related DOF sections.  This
  * permits us to change the set of sections associated with a DIFO over time,
  * and also permits us to encode DIFOs that contain different sets of sections.
  * When a DOF section wants to refer to a DIFO, it stores the dof_secidx_t of a
  * section of type DOF_SECT_DIFOHDR.  This section's data is then an array of
  * dof_secidx_t's which in turn denote the sections associated with this DIFO.
  *
  * This loose coupling of the file structure (header and sections) to the
  * structure of the DTrace program itself (ECB descriptions, action
  * descriptions, and DIFOs) permits activities such as relocation processing
  * to occur in a single pass without having to understand D program structure.
  *
  * Finally, strings are always stored in ELF-style string tables along with a
  * string table section index and string table offset.  Therefore strings in
  * DOF are always arbitrary-length and not bound to the current implementation.
  */
 
 #define	DOF_ID_SIZE	16	/* total size of dofh_ident[] in bytes */
 
 typedef struct dof_hdr {
 	uint8_t dofh_ident[DOF_ID_SIZE]; /* identification bytes (see below) */
 	uint32_t dofh_flags;		/* file attribute flags (if any) */
 	uint32_t dofh_hdrsize;		/* size of file header in bytes */
 	uint32_t dofh_secsize;		/* size of section header in bytes */
 	uint32_t dofh_secnum;		/* number of section headers */
 	uint64_t dofh_secoff;		/* file offset of section headers */
 	uint64_t dofh_loadsz;		/* file size of loadable portion */
 	uint64_t dofh_filesz;		/* file size of entire DOF file */
 	uint64_t dofh_pad;		/* reserved for future use */
 } dof_hdr_t;
 
 #define	DOF_ID_MAG0	0	/* first byte of magic number */
 #define	DOF_ID_MAG1	1	/* second byte of magic number */
 #define	DOF_ID_MAG2	2	/* third byte of magic number */
 #define	DOF_ID_MAG3	3	/* fourth byte of magic number */
 #define	DOF_ID_MODEL	4	/* DOF data model (see below) */
 #define	DOF_ID_ENCODING	5	/* DOF data encoding (see below) */
 #define	DOF_ID_VERSION	6	/* DOF file format major version (see below) */
 #define	DOF_ID_DIFVERS	7	/* DIF instruction set version */
 #define	DOF_ID_DIFIREG	8	/* DIF integer registers used by compiler */
 #define	DOF_ID_DIFTREG	9	/* DIF tuple registers used by compiler */
 #define	DOF_ID_PAD	10	/* start of padding bytes (all zeroes) */
 
 #define	DOF_MAG_MAG0	0x7F	/* DOF_ID_MAG[0-3] */
 #define	DOF_MAG_MAG1	'D'
 #define	DOF_MAG_MAG2	'O'
 #define	DOF_MAG_MAG3	'F'
 
 #define	DOF_MAG_STRING	"\177DOF"
 #define	DOF_MAG_STRLEN	4
 
 #define	DOF_MODEL_NONE	0	/* DOF_ID_MODEL */
 #define	DOF_MODEL_ILP32	1
 #define	DOF_MODEL_LP64	2
 
 #ifdef _LP64
 #define	DOF_MODEL_NATIVE	DOF_MODEL_LP64
 #else
 #define	DOF_MODEL_NATIVE	DOF_MODEL_ILP32
 #endif
 
 #define	DOF_ENCODE_NONE	0	/* DOF_ID_ENCODING */
 #define	DOF_ENCODE_LSB	1
 #define	DOF_ENCODE_MSB	2
 
 #if BYTE_ORDER == _BIG_ENDIAN
 #define	DOF_ENCODE_NATIVE	DOF_ENCODE_MSB
 #else
 #define	DOF_ENCODE_NATIVE	DOF_ENCODE_LSB
 #endif
 
 #define	DOF_VERSION_1	1	/* DOF version 1: Solaris 10 FCS */
 #define	DOF_VERSION_2	2	/* DOF version 2: Solaris Express 6/06 */
 #define	DOF_VERSION	DOF_VERSION_2	/* Latest DOF version */
 
 #define	DOF_FL_VALID	0	/* mask of all valid dofh_flags bits */
 
 typedef uint32_t dof_secidx_t;	/* section header table index type */
 typedef uint32_t dof_stridx_t;	/* string table index type */
 
 #define	DOF_SECIDX_NONE	(-1U)	/* null value for section indices */
 #define	DOF_STRIDX_NONE	(-1U)	/* null value for string indices */
 
 typedef struct dof_sec {
 	uint32_t dofs_type;	/* section type (see below) */
 	uint32_t dofs_align;	/* section data memory alignment */
 	uint32_t dofs_flags;	/* section flags (if any) */
 	uint32_t dofs_entsize;	/* size of section entry (if table) */
 	uint64_t dofs_offset;	/* offset of section data within file */
 	uint64_t dofs_size;	/* size of section data in bytes */
 } dof_sec_t;
 
 #define	DOF_SECT_NONE		0	/* null section */
 #define	DOF_SECT_COMMENTS	1	/* compiler comments */
 #define	DOF_SECT_SOURCE		2	/* D program source code */
 #define	DOF_SECT_ECBDESC	3	/* dof_ecbdesc_t */
 #define	DOF_SECT_PROBEDESC	4	/* dof_probedesc_t */
 #define	DOF_SECT_ACTDESC	5	/* dof_actdesc_t array */
 #define	DOF_SECT_DIFOHDR	6	/* dof_difohdr_t (variable length) */
 #define	DOF_SECT_DIF		7	/* uint32_t array of byte code */
 #define	DOF_SECT_STRTAB		8	/* string table */
 #define	DOF_SECT_VARTAB		9	/* dtrace_difv_t array */
 #define	DOF_SECT_RELTAB		10	/* dof_relodesc_t array */
 #define	DOF_SECT_TYPTAB		11	/* dtrace_diftype_t array */
 #define	DOF_SECT_URELHDR	12	/* dof_relohdr_t (user relocations) */
 #define	DOF_SECT_KRELHDR	13	/* dof_relohdr_t (kernel relocations) */
 #define	DOF_SECT_OPTDESC	14	/* dof_optdesc_t array */
 #define	DOF_SECT_PROVIDER	15	/* dof_provider_t */
 #define	DOF_SECT_PROBES		16	/* dof_probe_t array */
 #define	DOF_SECT_PRARGS		17	/* uint8_t array (probe arg mappings) */
 #define	DOF_SECT_PROFFS		18	/* uint32_t array (probe arg offsets) */
 #define	DOF_SECT_INTTAB		19	/* uint64_t array */
 #define	DOF_SECT_UTSNAME	20	/* struct utsname */
 #define	DOF_SECT_XLTAB		21	/* dof_xlref_t array */
 #define	DOF_SECT_XLMEMBERS	22	/* dof_xlmember_t array */
 #define	DOF_SECT_XLIMPORT	23	/* dof_xlator_t */
 #define	DOF_SECT_XLEXPORT	24	/* dof_xlator_t */
 #define	DOF_SECT_PREXPORT	25	/* dof_secidx_t array (exported objs) */
 #define	DOF_SECT_PRENOFFS	26	/* uint32_t array (enabled offsets) */
 
 #define	DOF_SECF_LOAD		1	/* section should be loaded */
 
 #define	DOF_SEC_ISLOADABLE(x)						\
 	(((x) == DOF_SECT_ECBDESC) || ((x) == DOF_SECT_PROBEDESC) ||	\
 	((x) == DOF_SECT_ACTDESC) || ((x) == DOF_SECT_DIFOHDR) ||	\
 	((x) == DOF_SECT_DIF) || ((x) == DOF_SECT_STRTAB) ||		\
 	((x) == DOF_SECT_VARTAB) || ((x) == DOF_SECT_RELTAB) ||		\
 	((x) == DOF_SECT_TYPTAB) || ((x) == DOF_SECT_URELHDR) ||	\
 	((x) == DOF_SECT_KRELHDR) || ((x) == DOF_SECT_OPTDESC) ||	\
 	((x) == DOF_SECT_PROVIDER) || ((x) == DOF_SECT_PROBES) ||	\
 	((x) == DOF_SECT_PRARGS) || ((x) == DOF_SECT_PROFFS) ||		\
 	((x) == DOF_SECT_INTTAB) || ((x) == DOF_SECT_XLTAB) ||		\
 	((x) == DOF_SECT_XLMEMBERS) || ((x) == DOF_SECT_XLIMPORT) ||	\
 	((x) == DOF_SECT_XLIMPORT) || ((x) == DOF_SECT_XLEXPORT) ||	\
 	((x) == DOF_SECT_PREXPORT) || ((x) == DOF_SECT_PRENOFFS))
 
 typedef struct dof_ecbdesc {
 	dof_secidx_t dofe_probes;	/* link to DOF_SECT_PROBEDESC */
 	dof_secidx_t dofe_pred;		/* link to DOF_SECT_DIFOHDR */
 	dof_secidx_t dofe_actions;	/* link to DOF_SECT_ACTDESC */
 	uint32_t dofe_pad;		/* reserved for future use */
 	uint64_t dofe_uarg;		/* user-supplied library argument */
 } dof_ecbdesc_t;
 
 typedef struct dof_probedesc {
 	dof_secidx_t dofp_strtab;	/* link to DOF_SECT_STRTAB section */
 	dof_stridx_t dofp_provider;	/* provider string */
 	dof_stridx_t dofp_mod;		/* module string */
 	dof_stridx_t dofp_func;		/* function string */
 	dof_stridx_t dofp_name;		/* name string */
 	uint32_t dofp_id;		/* probe identifier (or zero) */
 } dof_probedesc_t;
 
 typedef struct dof_actdesc {
 	dof_secidx_t dofa_difo;		/* link to DOF_SECT_DIFOHDR */
 	dof_secidx_t dofa_strtab;	/* link to DOF_SECT_STRTAB section */
 	uint32_t dofa_kind;		/* action kind (DTRACEACT_* constant) */
 	uint32_t dofa_ntuple;		/* number of subsequent tuple actions */
 	uint64_t dofa_arg;		/* kind-specific argument */
 	uint64_t dofa_uarg;		/* user-supplied argument */
 } dof_actdesc_t;
 
 typedef struct dof_difohdr {
 	dtrace_diftype_t dofd_rtype;	/* return type for this fragment */
 	dof_secidx_t dofd_links[1];	/* variable length array of indices */
 } dof_difohdr_t;
 
 typedef struct dof_relohdr {
 	dof_secidx_t dofr_strtab;	/* link to DOF_SECT_STRTAB for names */
 	dof_secidx_t dofr_relsec;	/* link to DOF_SECT_RELTAB for relos */
 	dof_secidx_t dofr_tgtsec;	/* link to section we are relocating */
 } dof_relohdr_t;
 
 typedef struct dof_relodesc {
 	dof_stridx_t dofr_name;		/* string name of relocation symbol */
 	uint32_t dofr_type;		/* relo type (DOF_RELO_* constant) */
 	uint64_t dofr_offset;		/* byte offset for relocation */
 	uint64_t dofr_data;		/* additional type-specific data */
 } dof_relodesc_t;
 
 #define	DOF_RELO_NONE	0		/* empty relocation entry */
 #define	DOF_RELO_SETX	1		/* relocate setx value */
 
 typedef struct dof_optdesc {
 	uint32_t dofo_option;		/* option identifier */
 	dof_secidx_t dofo_strtab;	/* string table, if string option */
 	uint64_t dofo_value;		/* option value or string index */
 } dof_optdesc_t;
 
 typedef uint32_t dof_attr_t;		/* encoded stability attributes */
 
 #define	DOF_ATTR(n, d, c)	(((n) << 24) | ((d) << 16) | ((c) << 8))
 #define	DOF_ATTR_NAME(a)	(((a) >> 24) & 0xff)
 #define	DOF_ATTR_DATA(a)	(((a) >> 16) & 0xff)
 #define	DOF_ATTR_CLASS(a)	(((a) >>  8) & 0xff)
 
 typedef struct dof_provider {
 	dof_secidx_t dofpv_strtab;	/* link to DOF_SECT_STRTAB section */
 	dof_secidx_t dofpv_probes;	/* link to DOF_SECT_PROBES section */
 	dof_secidx_t dofpv_prargs;	/* link to DOF_SECT_PRARGS section */
 	dof_secidx_t dofpv_proffs;	/* link to DOF_SECT_PROFFS section */
 	dof_stridx_t dofpv_name;	/* provider name string */
 	dof_attr_t dofpv_provattr;	/* provider attributes */
 	dof_attr_t dofpv_modattr;	/* module attributes */
 	dof_attr_t dofpv_funcattr;	/* function attributes */
 	dof_attr_t dofpv_nameattr;	/* name attributes */
 	dof_attr_t dofpv_argsattr;	/* args attributes */
 	dof_secidx_t dofpv_prenoffs;	/* link to DOF_SECT_PRENOFFS section */
 } dof_provider_t;
 
 typedef struct dof_probe {
 	uint64_t dofpr_addr;		/* probe base address or offset */
 	dof_stridx_t dofpr_func;	/* probe function string */
 	dof_stridx_t dofpr_name;	/* probe name string */
 	dof_stridx_t dofpr_nargv;	/* native argument type strings */
 	dof_stridx_t dofpr_xargv;	/* translated argument type strings */
 	uint32_t dofpr_argidx;		/* index of first argument mapping */
 	uint32_t dofpr_offidx;		/* index of first offset entry */
 	uint8_t dofpr_nargc;		/* native argument count */
 	uint8_t dofpr_xargc;		/* translated argument count */
 	uint16_t dofpr_noffs;		/* number of offset entries for probe */
 	uint32_t dofpr_enoffidx;	/* index of first is-enabled offset */
 	uint16_t dofpr_nenoffs;		/* number of is-enabled offsets */
 	uint16_t dofpr_pad1;		/* reserved for future use */
 	uint32_t dofpr_pad2;		/* reserved for future use */
 } dof_probe_t;
 
 typedef struct dof_xlator {
 	dof_secidx_t dofxl_members;	/* link to DOF_SECT_XLMEMBERS section */
 	dof_secidx_t dofxl_strtab;	/* link to DOF_SECT_STRTAB section */
 	dof_stridx_t dofxl_argv;	/* input parameter type strings */
 	uint32_t dofxl_argc;		/* input parameter list length */
 	dof_stridx_t dofxl_type;	/* output type string name */
 	dof_attr_t dofxl_attr;		/* output stability attributes */
 } dof_xlator_t;
 
 typedef struct dof_xlmember {
 	dof_secidx_t dofxm_difo;	/* member link to DOF_SECT_DIFOHDR */
 	dof_stridx_t dofxm_name;	/* member name */
 	dtrace_diftype_t dofxm_type;	/* member type */
 } dof_xlmember_t;
 
 typedef struct dof_xlref {
 	dof_secidx_t dofxr_xlator;	/* link to DOF_SECT_XLATORS section */
 	uint32_t dofxr_member;		/* index of referenced dof_xlmember */
 	uint32_t dofxr_argn;		/* index of argument for DIF_OP_XLARG */
 } dof_xlref_t;
 
 /*
  * DTrace Intermediate Format Object (DIFO)
  *
  * A DIFO is used to store the compiled DIF for a D expression, its return
  * type, and its string and variable tables.  The string table is a single
  * buffer of character data into which sets instructions and variable
  * references can reference strings using a byte offset.  The variable table
  * is an array of dtrace_difv_t structures that describe the name and type of
  * each variable and the id used in the DIF code.  This structure is described
  * above in the DIF section of this header file.  The DIFO is used at both
  * user-level (in the library) and in the kernel, but the structure is never
  * passed between the two: the DOF structures form the only interface.  As a
  * result, the definition can change depending on the presence of _KERNEL.
  */
 typedef struct dtrace_difo {
 	dif_instr_t *dtdo_buf;		/* instruction buffer */
 	uint64_t *dtdo_inttab;		/* integer table (optional) */
 	char *dtdo_strtab;		/* string table (optional) */
 	dtrace_difv_t *dtdo_vartab;	/* variable table (optional) */
 	uint_t dtdo_len;		/* length of instruction buffer */
 	uint_t dtdo_intlen;		/* length of integer table */
 	uint_t dtdo_strlen;		/* length of string table */
 	uint_t dtdo_varlen;		/* length of variable table */
 	dtrace_diftype_t dtdo_rtype;	/* return type */
 	uint_t dtdo_refcnt;		/* owner reference count */
 	uint_t dtdo_destructive;	/* invokes destructive subroutines */
 #ifndef _KERNEL
 	dof_relodesc_t *dtdo_kreltab;	/* kernel relocations */
 	dof_relodesc_t *dtdo_ureltab;	/* user relocations */
 	struct dt_node **dtdo_xlmtab;	/* translator references */
 	uint_t dtdo_krelen;		/* length of krelo table */
 	uint_t dtdo_urelen;		/* length of urelo table */
 	uint_t dtdo_xlmlen;		/* length of translator table */
 #endif
 } dtrace_difo_t;
 
 /*
  * DTrace Enabling Description Structures
  *
  * When DTrace is tracking the description of a DTrace enabling entity (probe,
  * predicate, action, ECB, record, etc.), it does so in a description
  * structure.  These structures all end in "desc", and are used at both
  * user-level and in the kernel -- but (with the exception of
  * dtrace_probedesc_t) they are never passed between them.  Typically,
  * user-level will use the description structures when assembling an enabling.
  * It will then distill those description structures into a DOF object (see
  * above), and send it into the kernel.  The kernel will again use the
  * description structures to create a description of the enabling as it reads
  * the DOF.  When the description is complete, the enabling will be actually
  * created -- turning it into the structures that represent the enabling
  * instead of merely describing it.  Not surprisingly, the description
  * structures bear a strong resemblance to the DOF structures that act as their
  * conduit.
  */
 struct dtrace_predicate;
 
 typedef struct dtrace_probedesc {
 	dtrace_id_t dtpd_id;			/* probe identifier */
 	char dtpd_provider[DTRACE_PROVNAMELEN]; /* probe provider name */
 	char dtpd_mod[DTRACE_MODNAMELEN];	/* probe module name */
 	char dtpd_func[DTRACE_FUNCNAMELEN];	/* probe function name */
 	char dtpd_name[DTRACE_NAMELEN];		/* probe name */
 } dtrace_probedesc_t;
 
 typedef struct dtrace_repldesc {
 	dtrace_probedesc_t dtrpd_match;		/* probe descr. to match */
 	dtrace_probedesc_t dtrpd_create;	/* probe descr. to create */
 } dtrace_repldesc_t;
 
 typedef struct dtrace_preddesc {
 	dtrace_difo_t *dtpdd_difo;		/* pointer to DIF object */
 	struct dtrace_predicate *dtpdd_predicate; /* pointer to predicate */
 } dtrace_preddesc_t;
 
 typedef struct dtrace_actdesc {
 	dtrace_difo_t *dtad_difo;		/* pointer to DIF object */
 	struct dtrace_actdesc *dtad_next;	/* next action */
 	dtrace_actkind_t dtad_kind;		/* kind of action */
 	uint32_t dtad_ntuple;			/* number in tuple */
 	uint64_t dtad_arg;			/* action argument */
 	uint64_t dtad_uarg;			/* user argument */
 	int dtad_refcnt;			/* reference count */
 } dtrace_actdesc_t;
 
 typedef struct dtrace_ecbdesc {
 	dtrace_actdesc_t *dted_action;		/* action description(s) */
 	dtrace_preddesc_t dted_pred;		/* predicate description */
 	dtrace_probedesc_t dted_probe;		/* probe description */
 	uint64_t dted_uarg;			/* library argument */
 	int dted_refcnt;			/* reference count */
 } dtrace_ecbdesc_t;
 
 /*
  * DTrace Metadata Description Structures
  *
  * DTrace separates the trace data stream from the metadata stream.  The only
  * metadata tokens placed in the data stream are the dtrace_rechdr_t (EPID +
  * timestamp) or (in the case of aggregations) aggregation identifiers.  To
  * determine the structure of the data, DTrace consumers pass the token to the
  * kernel, and receive in return a corresponding description of the enabled
  * probe (via the dtrace_eprobedesc structure) or the aggregation (via the
  * dtrace_aggdesc structure).  Both of these structures are expressed in terms
  * of record descriptions (via the dtrace_recdesc structure) that describe the
  * exact structure of the data.  Some record descriptions may also contain a
  * format identifier; this additional bit of metadata can be retrieved from the
  * kernel, for which a format description is returned via the dtrace_fmtdesc
  * structure.  Note that all four of these structures must be bitness-neutral
  * to allow for a 32-bit DTrace consumer on a 64-bit kernel.
  */
 typedef struct dtrace_recdesc {
 	dtrace_actkind_t dtrd_action;		/* kind of action */
 	uint32_t dtrd_size;			/* size of record */
 	uint32_t dtrd_offset;			/* offset in ECB's data */
 	uint16_t dtrd_alignment;		/* required alignment */
 	uint16_t dtrd_format;			/* format, if any */
 	uint64_t dtrd_arg;			/* action argument */
 	uint64_t dtrd_uarg;			/* user argument */
 } dtrace_recdesc_t;
 
 typedef struct dtrace_eprobedesc {
 	dtrace_epid_t dtepd_epid;		/* enabled probe ID */
 	dtrace_id_t dtepd_probeid;		/* probe ID */
 	uint64_t dtepd_uarg;			/* library argument */
 	uint32_t dtepd_size;			/* total size */
 	int dtepd_nrecs;			/* number of records */
 	dtrace_recdesc_t dtepd_rec[1];		/* records themselves */
 } dtrace_eprobedesc_t;
 
 typedef struct dtrace_aggdesc {
 	DTRACE_PTR(char, dtagd_name);		/* not filled in by kernel */
 	dtrace_aggvarid_t dtagd_varid;		/* not filled in by kernel */
 	int dtagd_flags;			/* not filled in by kernel */
 	dtrace_aggid_t dtagd_id;		/* aggregation ID */
 	dtrace_epid_t dtagd_epid;		/* enabled probe ID */
 	uint32_t dtagd_size;			/* size in bytes */
 	int dtagd_nrecs;			/* number of records */
 	uint32_t dtagd_pad;			/* explicit padding */
 	dtrace_recdesc_t dtagd_rec[1];		/* record descriptions */
 } dtrace_aggdesc_t;
 
 typedef struct dtrace_fmtdesc {
 	DTRACE_PTR(char, dtfd_string);		/* format string */
 	int dtfd_length;			/* length of format string */
 	uint16_t dtfd_format;			/* format identifier */
 } dtrace_fmtdesc_t;
 
 #define	DTRACE_SIZEOF_EPROBEDESC(desc)				\
 	(sizeof (dtrace_eprobedesc_t) + ((desc)->dtepd_nrecs ?	\
 	(((desc)->dtepd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0))
 
 #define	DTRACE_SIZEOF_AGGDESC(desc)				\
 	(sizeof (dtrace_aggdesc_t) + ((desc)->dtagd_nrecs ?	\
 	(((desc)->dtagd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0))
 
 /*
  * DTrace Option Interface
  *
  * Run-time DTrace options are set and retrieved via DOF_SECT_OPTDESC sections
  * in a DOF image.  The dof_optdesc structure contains an option identifier and
  * an option value.  The valid option identifiers are found below; the mapping
  * between option identifiers and option identifying strings is maintained at
  * user-level.  Note that the value of DTRACEOPT_UNSET is such that all of the
  * following are potentially valid option values:  all positive integers, zero
  * and negative one.  Some options (notably "bufpolicy" and "bufresize") take
  * predefined tokens as their values; these are defined with
  * DTRACEOPT_{option}_{token}.
  */
 #define	DTRACEOPT_BUFSIZE	0	/* buffer size */
 #define	DTRACEOPT_BUFPOLICY	1	/* buffer policy */
 #define	DTRACEOPT_DYNVARSIZE	2	/* dynamic variable size */
 #define	DTRACEOPT_AGGSIZE	3	/* aggregation size */
 #define	DTRACEOPT_SPECSIZE	4	/* speculation size */
 #define	DTRACEOPT_NSPEC		5	/* number of speculations */
 #define	DTRACEOPT_STRSIZE	6	/* string size */
 #define	DTRACEOPT_CLEANRATE	7	/* dynvar cleaning rate */
 #define	DTRACEOPT_CPU		8	/* CPU to trace */
 #define	DTRACEOPT_BUFRESIZE	9	/* buffer resizing policy */
 #define	DTRACEOPT_GRABANON	10	/* grab anonymous state, if any */
 #define	DTRACEOPT_FLOWINDENT	11	/* indent function entry/return */
 #define	DTRACEOPT_QUIET		12	/* only output explicitly traced data */
 #define	DTRACEOPT_STACKFRAMES	13	/* number of stack frames */
 #define	DTRACEOPT_USTACKFRAMES	14	/* number of user stack frames */
 #define	DTRACEOPT_AGGRATE	15	/* aggregation snapshot rate */
 #define	DTRACEOPT_SWITCHRATE	16	/* buffer switching rate */
 #define	DTRACEOPT_STATUSRATE	17	/* status rate */
 #define	DTRACEOPT_DESTRUCTIVE	18	/* destructive actions allowed */
 #define	DTRACEOPT_STACKINDENT	19	/* output indent for stack traces */
 #define	DTRACEOPT_RAWBYTES	20	/* always print bytes in raw form */
 #define	DTRACEOPT_JSTACKFRAMES	21	/* number of jstack() frames */
 #define	DTRACEOPT_JSTACKSTRSIZE	22	/* size of jstack() string table */
 #define	DTRACEOPT_AGGSORTKEY	23	/* sort aggregations by key */
 #define	DTRACEOPT_AGGSORTREV	24	/* reverse-sort aggregations */
 #define	DTRACEOPT_AGGSORTPOS	25	/* agg. position to sort on */
 #define	DTRACEOPT_AGGSORTKEYPOS	26	/* agg. key position to sort on */
 #define	DTRACEOPT_TEMPORAL	27	/* temporally ordered output */
 #define	DTRACEOPT_AGGHIST	28	/* histogram aggregation output */
 #define	DTRACEOPT_AGGPACK	29	/* packed aggregation output */
 #define	DTRACEOPT_AGGZOOM	30	/* zoomed aggregation scaling */
 #define	DTRACEOPT_ZONE		31	/* zone in which to enable probes */
 #define	DTRACEOPT_MAX		32	/* number of options */
 
 #define	DTRACEOPT_UNSET		(dtrace_optval_t)-2	/* unset option */
 
 #define	DTRACEOPT_BUFPOLICY_RING	0	/* ring buffer */
 #define	DTRACEOPT_BUFPOLICY_FILL	1	/* fill buffer, then stop */
 #define	DTRACEOPT_BUFPOLICY_SWITCH	2	/* switch buffers */
 
 #define	DTRACEOPT_BUFRESIZE_AUTO	0	/* automatic resizing */
 #define	DTRACEOPT_BUFRESIZE_MANUAL	1	/* manual resizing */
 
 /*
  * DTrace Buffer Interface
  *
  * In order to get a snapshot of the principal or aggregation buffer,
  * user-level passes a buffer description to the kernel with the dtrace_bufdesc
  * structure.  This describes which CPU user-level is interested in, and
  * where user-level wishes the kernel to snapshot the buffer to (the
  * dtbd_data field).  The kernel uses the same structure to pass back some
  * information regarding the buffer:  the size of data actually copied out, the
  * number of drops, the number of errors, the offset of the oldest record,
  * and the time of the snapshot.
  *
  * If the buffer policy is a "switch" policy, taking a snapshot of the
  * principal buffer has the additional effect of switching the active and
  * inactive buffers.  Taking a snapshot of the aggregation buffer _always_ has
  * the additional effect of switching the active and inactive buffers.
  */
 typedef struct dtrace_bufdesc {
 	uint64_t dtbd_size;			/* size of buffer */
 	uint32_t dtbd_cpu;			/* CPU or DTRACE_CPUALL */
 	uint32_t dtbd_errors;			/* number of errors */
 	uint64_t dtbd_drops;			/* number of drops */
 	DTRACE_PTR(char, dtbd_data);		/* data */
 	uint64_t dtbd_oldest;			/* offset of oldest record */
 	uint64_t dtbd_timestamp;		/* hrtime of snapshot */
 } dtrace_bufdesc_t;
 
 /*
  * Each record in the buffer (dtbd_data) begins with a header that includes
  * the epid and a timestamp.  The timestamp is split into two 4-byte parts
  * so that we do not require 8-byte alignment.
  */
 typedef struct dtrace_rechdr {
 	dtrace_epid_t dtrh_epid;		/* enabled probe id */
 	uint32_t dtrh_timestamp_hi;		/* high bits of hrtime_t */
 	uint32_t dtrh_timestamp_lo;		/* low bits of hrtime_t */
 } dtrace_rechdr_t;
 
 #define	DTRACE_RECORD_LOAD_TIMESTAMP(dtrh)			\
 	((dtrh)->dtrh_timestamp_lo +				\
 	((uint64_t)(dtrh)->dtrh_timestamp_hi << 32))
 
 #define	DTRACE_RECORD_STORE_TIMESTAMP(dtrh, hrtime) {		\
 	(dtrh)->dtrh_timestamp_lo = (uint32_t)hrtime;		\
 	(dtrh)->dtrh_timestamp_hi = hrtime >> 32;		\
 }
 
 /*
  * DTrace Status
  *
  * The status of DTrace is relayed via the dtrace_status structure.  This
  * structure contains members to count drops other than the capacity drops
  * available via the buffer interface (see above).  This consists of dynamic
  * drops (including capacity dynamic drops, rinsing drops and dirty drops), and
  * speculative drops (including capacity speculative drops, drops due to busy
  * speculative buffers and drops due to unavailable speculative buffers).
  * Additionally, the status structure contains a field to indicate the number
  * of "fill"-policy buffers have been filled and a boolean field to indicate
  * that exit() has been called.  If the dtst_exiting field is non-zero, no
  * further data will be generated until tracing is stopped (at which time any
  * enablings of the END action will be processed); if user-level sees that
  * this field is non-zero, tracing should be stopped as soon as possible.
  */
 typedef struct dtrace_status {
 	uint64_t dtst_dyndrops;			/* dynamic drops */
 	uint64_t dtst_dyndrops_rinsing;		/* dyn drops due to rinsing */
 	uint64_t dtst_dyndrops_dirty;		/* dyn drops due to dirty */
 	uint64_t dtst_specdrops;		/* speculative drops */
 	uint64_t dtst_specdrops_busy;		/* spec drops due to busy */
 	uint64_t dtst_specdrops_unavail;	/* spec drops due to unavail */
 	uint64_t dtst_errors;			/* total errors */
 	uint64_t dtst_filled;			/* number of filled bufs */
 	uint64_t dtst_stkstroverflows;		/* stack string tab overflows */
 	uint64_t dtst_dblerrors;		/* errors in ERROR probes */
 	char dtst_killed;			/* non-zero if killed */
 	char dtst_exiting;			/* non-zero if exit() called */
 	char dtst_pad[6];			/* pad out to 64-bit align */
 } dtrace_status_t;
 
 /*
  * DTrace Configuration
  *
  * User-level may need to understand some elements of the kernel DTrace
  * configuration in order to generate correct DIF.  This information is
  * conveyed via the dtrace_conf structure.
  */
 typedef struct dtrace_conf {
 	uint_t dtc_difversion;			/* supported DIF version */
 	uint_t dtc_difintregs;			/* # of DIF integer registers */
 	uint_t dtc_diftupregs;			/* # of DIF tuple registers */
 	uint_t dtc_ctfmodel;			/* CTF data model */
 	uint_t dtc_pad[8];			/* reserved for future use */
 } dtrace_conf_t;
 
 /*
  * DTrace Faults
  *
  * The constants below DTRACEFLT_LIBRARY indicate probe processing faults;
  * constants at or above DTRACEFLT_LIBRARY indicate faults in probe
  * postprocessing at user-level.  Probe processing faults induce an ERROR
  * probe and are replicated in unistd.d to allow users' ERROR probes to decode
  * the error condition using thse symbolic labels.
  */
 #define	DTRACEFLT_UNKNOWN		0	/* Unknown fault */
 #define	DTRACEFLT_BADADDR		1	/* Bad address */
 #define	DTRACEFLT_BADALIGN		2	/* Bad alignment */
 #define	DTRACEFLT_ILLOP			3	/* Illegal operation */
 #define	DTRACEFLT_DIVZERO		4	/* Divide-by-zero */
 #define	DTRACEFLT_NOSCRATCH		5	/* Out of scratch space */
 #define	DTRACEFLT_KPRIV			6	/* Illegal kernel access */
 #define	DTRACEFLT_UPRIV			7	/* Illegal user access */
 #define	DTRACEFLT_TUPOFLOW		8	/* Tuple stack overflow */
 #define	DTRACEFLT_BADSTACK		9	/* Bad stack */
 
 #define	DTRACEFLT_LIBRARY		1000	/* Library-level fault */
 
 /*
  * DTrace Argument Types
  *
  * Because it would waste both space and time, argument types do not reside
  * with the probe.  In order to determine argument types for args[X]
  * variables, the D compiler queries for argument types on a probe-by-probe
  * basis.  (This optimizes for the common case that arguments are either not
  * used or used in an untyped fashion.)  Typed arguments are specified with a
  * string of the type name in the dtragd_native member of the argument
  * description structure.  Typed arguments may be further translated to types
  * of greater stability; the provider indicates such a translated argument by
  * filling in the dtargd_xlate member with the string of the translated type.
  * Finally, the provider may indicate which argument value a given argument
  * maps to by setting the dtargd_mapping member -- allowing a single argument
  * to map to multiple args[X] variables.
  */
 typedef struct dtrace_argdesc {
 	dtrace_id_t dtargd_id;			/* probe identifier */
 	int dtargd_ndx;				/* arg number (-1 iff none) */
 	int dtargd_mapping;			/* value mapping */
 	char dtargd_native[DTRACE_ARGTYPELEN];	/* native type name */
 	char dtargd_xlate[DTRACE_ARGTYPELEN];	/* translated type name */
 } dtrace_argdesc_t;
 
 /*
  * DTrace Stability Attributes
  *
  * Each DTrace provider advertises the name and data stability of each of its
  * probe description components, as well as its architectural dependencies.
  * The D compiler can query the provider attributes (dtrace_pattr_t below) in
  * order to compute the properties of an input program and report them.
  */
 typedef uint8_t dtrace_stability_t;	/* stability code (see attributes(5)) */
 typedef uint8_t dtrace_class_t;		/* architectural dependency class */
 
 #define	DTRACE_STABILITY_INTERNAL	0	/* private to DTrace itself */
 #define	DTRACE_STABILITY_PRIVATE	1	/* private to Sun (see docs) */
 #define	DTRACE_STABILITY_OBSOLETE	2	/* scheduled for removal */
 #define	DTRACE_STABILITY_EXTERNAL	3	/* not controlled by Sun */
 #define	DTRACE_STABILITY_UNSTABLE	4	/* new or rapidly changing */
 #define	DTRACE_STABILITY_EVOLVING	5	/* less rapidly changing */
 #define	DTRACE_STABILITY_STABLE		6	/* mature interface from Sun */
 #define	DTRACE_STABILITY_STANDARD	7	/* industry standard */
 #define	DTRACE_STABILITY_MAX		7	/* maximum valid stability */
 
 #define	DTRACE_CLASS_UNKNOWN	0	/* unknown architectural dependency */
 #define	DTRACE_CLASS_CPU	1	/* CPU-module-specific */
 #define	DTRACE_CLASS_PLATFORM	2	/* platform-specific (uname -i) */
 #define	DTRACE_CLASS_GROUP	3	/* hardware-group-specific (uname -m) */
 #define	DTRACE_CLASS_ISA	4	/* ISA-specific (uname -p) */
 #define	DTRACE_CLASS_COMMON	5	/* common to all systems */
 #define	DTRACE_CLASS_MAX	5	/* maximum valid class */
 
 #define	DTRACE_PRIV_NONE	0x0000
 #define	DTRACE_PRIV_KERNEL	0x0001
 #define	DTRACE_PRIV_USER	0x0002
 #define	DTRACE_PRIV_PROC	0x0004
 #define	DTRACE_PRIV_OWNER	0x0008
 #define	DTRACE_PRIV_ZONEOWNER	0x0010
 
 #define	DTRACE_PRIV_ALL	\
 	(DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER | \
 	DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER | DTRACE_PRIV_ZONEOWNER)
 
 typedef struct dtrace_ppriv {
 	uint32_t dtpp_flags;			/* privilege flags */
 	uid_t dtpp_uid;				/* user ID */
 	zoneid_t dtpp_zoneid;			/* zone ID */
 } dtrace_ppriv_t;
 
 typedef struct dtrace_attribute {
 	dtrace_stability_t dtat_name;		/* entity name stability */
 	dtrace_stability_t dtat_data;		/* entity data stability */
 	dtrace_class_t dtat_class;		/* entity data dependency */
 } dtrace_attribute_t;
 
 typedef struct dtrace_pattr {
 	dtrace_attribute_t dtpa_provider;	/* provider attributes */
 	dtrace_attribute_t dtpa_mod;		/* module attributes */
 	dtrace_attribute_t dtpa_func;		/* function attributes */
 	dtrace_attribute_t dtpa_name;		/* name attributes */
 	dtrace_attribute_t dtpa_args;		/* args[] attributes */
 } dtrace_pattr_t;
 
 typedef struct dtrace_providerdesc {
 	char dtvd_name[DTRACE_PROVNAMELEN];	/* provider name */
 	dtrace_pattr_t dtvd_attr;		/* stability attributes */
 	dtrace_ppriv_t dtvd_priv;		/* privileges required */
 } dtrace_providerdesc_t;
 
 /*
  * DTrace Pseudodevice Interface
  *
  * DTrace is controlled through ioctl(2)'s to the in-kernel dtrace:dtrace
  * pseudodevice driver.  These ioctls comprise the user-kernel interface to
  * DTrace.
  */
 #ifdef illumos
 #define	DTRACEIOC		(('d' << 24) | ('t' << 16) | ('r' << 8))
 #define	DTRACEIOC_PROVIDER	(DTRACEIOC | 1)		/* provider query */
 #define	DTRACEIOC_PROBES	(DTRACEIOC | 2)		/* probe query */
 #define	DTRACEIOC_BUFSNAP	(DTRACEIOC | 4)		/* snapshot buffer */
 #define	DTRACEIOC_PROBEMATCH	(DTRACEIOC | 5)		/* match probes */
 #define	DTRACEIOC_ENABLE	(DTRACEIOC | 6)		/* enable probes */
 #define	DTRACEIOC_AGGSNAP	(DTRACEIOC | 7)		/* snapshot agg. */
 #define	DTRACEIOC_EPROBE	(DTRACEIOC | 8)		/* get eprobe desc. */
 #define	DTRACEIOC_PROBEARG	(DTRACEIOC | 9)		/* get probe arg */
 #define	DTRACEIOC_CONF		(DTRACEIOC | 10)	/* get config. */
 #define	DTRACEIOC_STATUS	(DTRACEIOC | 11)	/* get status */
 #define	DTRACEIOC_GO		(DTRACEIOC | 12)	/* start tracing */
 #define	DTRACEIOC_STOP		(DTRACEIOC | 13)	/* stop tracing */
 #define	DTRACEIOC_AGGDESC	(DTRACEIOC | 15)	/* get agg. desc. */
 #define	DTRACEIOC_FORMAT	(DTRACEIOC | 16)	/* get format str */
 #define	DTRACEIOC_DOFGET	(DTRACEIOC | 17)	/* get DOF */
 #define	DTRACEIOC_REPLICATE	(DTRACEIOC | 18)	/* replicate enab */
 #else
 #define	DTRACEIOC_PROVIDER	_IOWR('x',1,dtrace_providerdesc_t)
 							/* provider query */
 #define	DTRACEIOC_PROBES	_IOWR('x',2,dtrace_probedesc_t)
 							/* probe query */
 #define	DTRACEIOC_BUFSNAP	_IOW('x',4,dtrace_bufdesc_t *)	
 							/* snapshot buffer */
 #define	DTRACEIOC_PROBEMATCH	_IOWR('x',5,dtrace_probedesc_t)
 							/* match probes */
 typedef struct {
 	void	*dof;		/* DOF userland address written to driver. */
 	int	n_matched;	/* # matches returned by driver. */
 } dtrace_enable_io_t;
 #define	DTRACEIOC_ENABLE	_IOWR('x',6,dtrace_enable_io_t)
 							/* enable probes */
 #define	DTRACEIOC_AGGSNAP	_IOW('x',7,dtrace_bufdesc_t *)
 							/* snapshot agg. */
 #define	DTRACEIOC_EPROBE	_IOW('x',8,dtrace_eprobedesc_t)
 							/* get eprobe desc. */
 #define	DTRACEIOC_PROBEARG	_IOWR('x',9,dtrace_argdesc_t)
 							/* get probe arg */
 #define	DTRACEIOC_CONF		_IOR('x',10,dtrace_conf_t)
 							/* get config. */
 #define	DTRACEIOC_STATUS	_IOR('x',11,dtrace_status_t)
 							/* get status */
 #define	DTRACEIOC_GO		_IOR('x',12,processorid_t)
 							/* start tracing */
 #define	DTRACEIOC_STOP		_IOWR('x',13,processorid_t)
 							/* stop tracing */
 #define	DTRACEIOC_AGGDESC	_IOW('x',15,dtrace_aggdesc_t *)	
 							/* get agg. desc. */
 #define	DTRACEIOC_FORMAT	_IOWR('x',16,dtrace_fmtdesc_t)	
 							/* get format str */
 #define	DTRACEIOC_DOFGET	_IOW('x',17,dof_hdr_t *)
 							/* get DOF */
 #define	DTRACEIOC_REPLICATE	_IOW('x',18,dtrace_repldesc_t)	
 							/* replicate enab */
 #endif
 
 /*
  * DTrace Helpers
  *
  * In general, DTrace establishes probes in processes and takes actions on
  * processes without knowing their specific user-level structures.  Instead of
  * existing in the framework, process-specific knowledge is contained by the
  * enabling D program -- which can apply process-specific knowledge by making
  * appropriate use of DTrace primitives like copyin() and copyinstr() to
  * operate on user-level data.  However, there may exist some specific probes
  * of particular semantic relevance that the application developer may wish to
  * explicitly export.  For example, an application may wish to export a probe
  * at the point that it begins and ends certain well-defined transactions.  In
  * addition to providing probes, programs may wish to offer assistance for
  * certain actions.  For example, in highly dynamic environments (e.g., Java),
  * it may be difficult to obtain a stack trace in terms of meaningful symbol
  * names (the translation from instruction addresses to corresponding symbol
  * names may only be possible in situ); these environments may wish to define
  * a series of actions to be applied in situ to obtain a meaningful stack
  * trace.
  *
  * These two mechanisms -- user-level statically defined tracing and assisting
  * DTrace actions -- are provided via DTrace _helpers_.  Helpers are specified
  * via DOF, but unlike enabling DOF, helper DOF may contain definitions of
  * providers, probes and their arguments.  If a helper wishes to provide
  * action assistance, probe descriptions and corresponding DIF actions may be
  * specified in the helper DOF.  For such helper actions, however, the probe
  * description describes the specific helper:  all DTrace helpers have the
  * provider name "dtrace" and the module name "helper", and the name of the
  * helper is contained in the function name (for example, the ustack() helper
  * is named "ustack").  Any helper-specific name may be contained in the name
  * (for example, if a helper were to have a constructor, it might be named
  * "dtrace:helper:<helper>:init").  Helper actions are only called when the
  * action that they are helping is taken.  Helper actions may only return DIF
  * expressions, and may only call the following subroutines:
  *
  *    alloca()      <= Allocates memory out of the consumer's scratch space
  *    bcopy()       <= Copies memory to scratch space
  *    copyin()      <= Copies memory from user-level into consumer's scratch
  *    copyinto()    <= Copies memory into a specific location in scratch
  *    copyinstr()   <= Copies a string into a specific location in scratch
  *
  * Helper actions may only access the following built-in variables:
  *
  *    curthread     <= Current kthread_t pointer
  *    tid           <= Current thread identifier
  *    pid           <= Current process identifier
  *    ppid          <= Parent process identifier
  *    uid           <= Current user ID
  *    gid           <= Current group ID
  *    execname      <= Current executable name
  *    zonename      <= Current zone name
  *
  * Helper actions may not manipulate or allocate dynamic variables, but they
  * may have clause-local and statically-allocated global variables.  The
  * helper action variable state is specific to the helper action -- variables
  * used by the helper action may not be accessed outside of the helper
  * action, and the helper action may not access variables that like outside
  * of it.  Helper actions may not load from kernel memory at-large; they are
  * restricting to loading current user state (via copyin() and variants) and
  * scratch space.  As with probe enablings, helper actions are executed in
  * program order.  The result of the helper action is the result of the last
  * executing helper expression.
  *
  * Helpers -- composed of either providers/probes or probes/actions (or both)
  * -- are added by opening the "helper" minor node, and issuing an ioctl(2)
  * (DTRACEHIOC_ADDDOF) that specifies the dof_helper_t structure. This
  * encapsulates the name and base address of the user-level library or
  * executable publishing the helpers and probes as well as the DOF that
  * contains the definitions of those helpers and probes.
  *
  * The DTRACEHIOC_ADD and DTRACEHIOC_REMOVE are left in place for legacy
  * helpers and should no longer be used.  No other ioctls are valid on the
  * helper minor node.
  */
 #ifdef illumos
 #define	DTRACEHIOC		(('d' << 24) | ('t' << 16) | ('h' << 8))
 #define	DTRACEHIOC_ADD		(DTRACEHIOC | 1)	/* add helper */
 #define	DTRACEHIOC_REMOVE	(DTRACEHIOC | 2)	/* remove helper */
 #define	DTRACEHIOC_ADDDOF	(DTRACEHIOC | 3)	/* add helper DOF */
 #else
 #define	DTRACEHIOC_ADD		_IOWR('z', 1, dof_hdr_t)/* add helper */
 #define	DTRACEHIOC_REMOVE	_IOW('z', 2, int)	/* remove helper */
 #define	DTRACEHIOC_ADDDOF	_IOWR('z', 3, dof_helper_t)/* add helper DOF */
 #endif
 
 typedef struct dof_helper {
 	char dofhp_mod[DTRACE_MODNAMELEN];	/* executable or library name */
 	uint64_t dofhp_addr;			/* base address of object */
 	uint64_t dofhp_dof;			/* address of helper DOF */
 #ifdef __FreeBSD__
 	pid_t dofhp_pid;			/* target process ID */
 	int dofhp_gen;
 #endif
 } dof_helper_t;
 
 #define	DTRACEMNR_DTRACE	"dtrace"	/* node for DTrace ops */
 #define	DTRACEMNR_HELPER	"helper"	/* node for helpers */
 #define	DTRACEMNRN_DTRACE	0		/* minor for DTrace ops */
 #define	DTRACEMNRN_HELPER	1		/* minor for helpers */
 #define	DTRACEMNRN_CLONE	2		/* first clone minor */
 
 #ifdef _KERNEL
 
 /*
  * DTrace Provider API
  *
  * The following functions are implemented by the DTrace framework and are
  * used to implement separate in-kernel DTrace providers.  Common functions
  * are provided in uts/common/os/dtrace.c.  ISA-dependent subroutines are
  * defined in uts/<isa>/dtrace/dtrace_asm.s or uts/<isa>/dtrace/dtrace_isa.c.
  *
  * The provider API has two halves:  the API that the providers consume from
  * DTrace, and the API that providers make available to DTrace.
  *
  * 1 Framework-to-Provider API
  *
  * 1.1  Overview
  *
  * The Framework-to-Provider API is represented by the dtrace_pops structure
  * that the provider passes to the framework when registering itself.  This
  * structure consists of the following members:
  *
  *   dtps_provide()          <-- Provide all probes, all modules
  *   dtps_provide_module()   <-- Provide all probes in specified module
  *   dtps_enable()           <-- Enable specified probe
  *   dtps_disable()          <-- Disable specified probe
  *   dtps_suspend()          <-- Suspend specified probe
  *   dtps_resume()           <-- Resume specified probe
  *   dtps_getargdesc()       <-- Get the argument description for args[X]
  *   dtps_getargval()        <-- Get the value for an argX or args[X] variable
  *   dtps_usermode()         <-- Find out if the probe was fired in user mode
  *   dtps_destroy()          <-- Destroy all state associated with this probe
  *
  * 1.2  void dtps_provide(void *arg, const dtrace_probedesc_t *spec)
  *
  * 1.2.1  Overview
  *
  *   Called to indicate that the provider should provide all probes.  If the
  *   specified description is non-NULL, dtps_provide() is being called because
  *   no probe matched a specified probe -- if the provider has the ability to
  *   create custom probes, it may wish to create a probe that matches the
  *   specified description.
  *
  * 1.2.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is a pointer to a probe description that the provider may
  *   wish to consider when creating custom probes.  The provider is expected to
  *   call back into the DTrace framework via dtrace_probe_create() to create
  *   any necessary probes.  dtps_provide() may be called even if the provider
  *   has made available all probes; the provider should check the return value
  *   of dtrace_probe_create() to handle this case.  Note that the provider need
  *   not implement both dtps_provide() and dtps_provide_module(); see
  *   "Arguments and Notes" for dtrace_register(), below.
  *
  * 1.2.3  Return value
  *
  *   None.
  *
  * 1.2.4  Caller's context
  *
  *   dtps_provide() is typically called from open() or ioctl() context, but may
  *   be called from other contexts as well.  The DTrace framework is locked in
  *   such a way that providers may not register or unregister.  This means that
  *   the provider may not call any DTrace API that affects its registration with
  *   the framework, including dtrace_register(), dtrace_unregister(),
  *   dtrace_invalidate(), and dtrace_condense().  However, the context is such
  *   that the provider may (and indeed, is expected to) call probe-related
  *   DTrace routines, including dtrace_probe_create(), dtrace_probe_lookup(),
  *   and dtrace_probe_arg().
  *
  * 1.3  void dtps_provide_module(void *arg, modctl_t *mp)
  *
  * 1.3.1  Overview
  *
  *   Called to indicate that the provider should provide all probes in the
  *   specified module.
  *
  * 1.3.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is a pointer to a modctl structure that indicates the
  *   module for which probes should be created.
  *
  * 1.3.3  Return value
  *
  *   None.
  *
  * 1.3.4  Caller's context
  *
  *   dtps_provide_module() may be called from open() or ioctl() context, but
  *   may also be called from a module loading context.  mod_lock is held, and
  *   the DTrace framework is locked in such a way that providers may not
  *   register or unregister.  This means that the provider may not call any
  *   DTrace API that affects its registration with the framework, including
  *   dtrace_register(), dtrace_unregister(), dtrace_invalidate(), and
  *   dtrace_condense().  However, the context is such that the provider may (and
  *   indeed, is expected to) call probe-related DTrace routines, including
  *   dtrace_probe_create(), dtrace_probe_lookup(), and dtrace_probe_arg().  Note
  *   that the provider need not implement both dtps_provide() and
  *   dtps_provide_module(); see "Arguments and Notes" for dtrace_register(),
  *   below.
  *
  * 1.4  void dtps_enable(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.4.1  Overview
  *
  *   Called to enable the specified probe.
  *
  * 1.4.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be enabled.  The third
  *   argument is the probe argument as passed to dtrace_probe_create().
  *   dtps_enable() will be called when a probe transitions from not being
  *   enabled at all to having one or more ECB.  The number of ECBs associated
  *   with the probe may change without subsequent calls into the provider.
  *   When the number of ECBs drops to zero, the provider will be explicitly
  *   told to disable the probe via dtps_disable().  dtrace_probe() should never
  *   be called for a probe identifier that hasn't been explicitly enabled via
  *   dtps_enable().
  *
  * 1.4.3  Return value
  *
  *   None.
  *
  * 1.4.4  Caller's context
  *
  *   The DTrace framework is locked in such a way that it may not be called
  *   back into at all.  cpu_lock is held.  mod_lock is not held and may not
  *   be acquired.
  *
  * 1.5  void dtps_disable(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.5.1  Overview
  *
  *   Called to disable the specified probe.
  *
  * 1.5.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be disabled.  The third
  *   argument is the probe argument as passed to dtrace_probe_create().
  *   dtps_disable() will be called when a probe transitions from being enabled
  *   to having zero ECBs.  dtrace_probe() should never be called for a probe
  *   identifier that has been explicitly enabled via dtps_disable().
  *
  * 1.5.3  Return value
  *
  *   None.
  *
  * 1.5.4  Caller's context
  *
  *   The DTrace framework is locked in such a way that it may not be called
  *   back into at all.  cpu_lock is held.  mod_lock is not held and may not
  *   be acquired.
  *
  * 1.6  void dtps_suspend(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.6.1  Overview
  *
  *   Called to suspend the specified enabled probe.  This entry point is for
  *   providers that may need to suspend some or all of their probes when CPUs
  *   are being powered on or when the boot monitor is being entered for a
  *   prolonged period of time.
  *
  * 1.6.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be suspended.  The
  *   third argument is the probe argument as passed to dtrace_probe_create().
  *   dtps_suspend will only be called on an enabled probe.  Providers that
  *   provide a dtps_suspend entry point will want to take roughly the action
  *   that it takes for dtps_disable.
  *
  * 1.6.3  Return value
  *
  *   None.
  *
  * 1.6.4  Caller's context
  *
  *   Interrupts are disabled.  The DTrace framework is in a state such that the
  *   specified probe cannot be disabled or destroyed for the duration of
  *   dtps_suspend().  As interrupts are disabled, the provider is afforded
  *   little latitude; the provider is expected to do no more than a store to
  *   memory.
  *
  * 1.7  void dtps_resume(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.7.1  Overview
  *
  *   Called to resume the specified enabled probe.  This entry point is for
  *   providers that may need to resume some or all of their probes after the
  *   completion of an event that induced a call to dtps_suspend().
  *
  * 1.7.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be resumed.  The
  *   third argument is the probe argument as passed to dtrace_probe_create().
  *   dtps_resume will only be called on an enabled probe.  Providers that
  *   provide a dtps_resume entry point will want to take roughly the action
  *   that it takes for dtps_enable.
  *
  * 1.7.3  Return value
  *
  *   None.
  *
  * 1.7.4  Caller's context
  *
  *   Interrupts are disabled.  The DTrace framework is in a state such that the
  *   specified probe cannot be disabled or destroyed for the duration of
  *   dtps_resume().  As interrupts are disabled, the provider is afforded
  *   little latitude; the provider is expected to do no more than a store to
  *   memory.
  *
  * 1.8  void dtps_getargdesc(void *arg, dtrace_id_t id, void *parg,
  *           dtrace_argdesc_t *desc)
  *
  * 1.8.1  Overview
  *
  *   Called to retrieve the argument description for an args[X] variable.
  *
  * 1.8.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register(). The
  *   second argument is the identifier of the current probe. The third
  *   argument is the probe argument as passed to dtrace_probe_create(). The
  *   fourth argument is a pointer to the argument description.  This
  *   description is both an input and output parameter:  it contains the
  *   index of the desired argument in the dtargd_ndx field, and expects
  *   the other fields to be filled in upon return.  If there is no argument
  *   corresponding to the specified index, the dtargd_ndx field should be set
  *   to DTRACE_ARGNONE.
  *
  * 1.8.3  Return value
  *
  *   None.  The dtargd_ndx, dtargd_native, dtargd_xlate and dtargd_mapping
  *   members of the dtrace_argdesc_t structure are all output values.
  *
  * 1.8.4  Caller's context
  *
  *   dtps_getargdesc() is called from ioctl() context. mod_lock is held, and
  *   the DTrace framework is locked in such a way that providers may not
  *   register or unregister.  This means that the provider may not call any
  *   DTrace API that affects its registration with the framework, including
  *   dtrace_register(), dtrace_unregister(), dtrace_invalidate(), and
  *   dtrace_condense().
  *
  * 1.9  uint64_t dtps_getargval(void *arg, dtrace_id_t id, void *parg,
  *               int argno, int aframes)
  *
  * 1.9.1  Overview
  *
  *   Called to retrieve a value for an argX or args[X] variable.
  *
  * 1.9.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register(). The
  *   second argument is the identifier of the current probe. The third
  *   argument is the probe argument as passed to dtrace_probe_create(). The
  *   fourth argument is the number of the argument (the X in the example in
  *   1.9.1). The fifth argument is the number of stack frames that were used
  *   to get from the actual place in the code that fired the probe to
  *   dtrace_probe() itself, the so-called artificial frames. This argument may
  *   be used to descend an appropriate number of frames to find the correct
  *   values. If this entry point is left NULL, the dtrace_getarg() built-in
  *   function is used.
  *
  * 1.9.3  Return value
  *
  *   The value of the argument.
  *
  * 1.9.4  Caller's context
  *
  *   This is called from within dtrace_probe() meaning that interrupts
  *   are disabled. No locks should be taken within this entry point.
  *
  * 1.10  int dtps_usermode(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.10.1  Overview
  *
  *   Called to determine if the probe was fired in a user context.
  *
  * 1.10.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register(). The
  *   second argument is the identifier of the current probe. The third
  *   argument is the probe argument as passed to dtrace_probe_create().  This
  *   entry point must not be left NULL for providers whose probes allow for
  *   mixed mode tracing, that is to say those probes that can fire during
  *   kernel- _or_ user-mode execution
  *
  * 1.10.3  Return value
  *
  *   A bitwise OR that encapsulates both the mode (either DTRACE_MODE_KERNEL
  *   or DTRACE_MODE_USER) and the policy when the privilege of the enabling
  *   is insufficient for that mode (a combination of DTRACE_MODE_NOPRIV_DROP,
  *   DTRACE_MODE_NOPRIV_RESTRICT, and DTRACE_MODE_LIMITEDPRIV_RESTRICT).  If
  *   DTRACE_MODE_NOPRIV_DROP bit is set, insufficient privilege will result
  *   in the probe firing being silently ignored for the enabling; if the
  *   DTRACE_NODE_NOPRIV_RESTRICT bit is set, insufficient privilege will not
  *   prevent probe processing for the enabling, but restrictions will be in
  *   place that induce a UPRIV fault upon attempt to examine probe arguments
  *   or current process state.  If the DTRACE_MODE_LIMITEDPRIV_RESTRICT bit
  *   is set, similar restrictions will be placed upon operation if the
  *   privilege is sufficient to process the enabling, but does not otherwise
  *   entitle the enabling to all zones.  The DTRACE_MODE_NOPRIV_DROP and
  *   DTRACE_MODE_NOPRIV_RESTRICT are mutually exclusive (and one of these
  *   two policies must be specified), but either may be combined (or not)
  *   with DTRACE_MODE_LIMITEDPRIV_RESTRICT.
  *
  * 1.10.4  Caller's context
  *
  *   This is called from within dtrace_probe() meaning that interrupts
  *   are disabled. No locks should be taken within this entry point.
  *
  * 1.11 void dtps_destroy(void *arg, dtrace_id_t id, void *parg)
  *
  * 1.11.1 Overview
  *
  *   Called to destroy the specified probe.
  *
  * 1.11.2 Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_register().  The
  *   second argument is the identifier of the probe to be destroyed.  The third
  *   argument is the probe argument as passed to dtrace_probe_create().  The
  *   provider should free all state associated with the probe.  The framework
  *   guarantees that dtps_destroy() is only called for probes that have either
  *   been disabled via dtps_disable() or were never enabled via dtps_enable().
  *   Once dtps_disable() has been called for a probe, no further call will be
  *   made specifying the probe.
  *
  * 1.11.3 Return value
  *
  *   None.
  *
  * 1.11.4 Caller's context
  *
  *   The DTrace framework is locked in such a way that it may not be called
  *   back into at all.  mod_lock is held.  cpu_lock is not held, and may not be
  *   acquired.
  *
  *
  * 2 Provider-to-Framework API
  *
  * 2.1  Overview
  *
  * The Provider-to-Framework API provides the mechanism for the provider to
  * register itself with the DTrace framework, to create probes, to lookup
  * probes and (most importantly) to fire probes.  The Provider-to-Framework
  * consists of:
  *
  *   dtrace_register()       <-- Register a provider with the DTrace framework
  *   dtrace_unregister()     <-- Remove a provider's DTrace registration
  *   dtrace_invalidate()     <-- Invalidate the specified provider
  *   dtrace_condense()       <-- Remove a provider's unenabled probes
  *   dtrace_attached()       <-- Indicates whether or not DTrace has attached
  *   dtrace_probe_create()   <-- Create a DTrace probe
  *   dtrace_probe_lookup()   <-- Lookup a DTrace probe based on its name
  *   dtrace_probe_arg()      <-- Return the probe argument for a specific probe
  *   dtrace_probe()          <-- Fire the specified probe
  *
  * 2.2  int dtrace_register(const char *name, const dtrace_pattr_t *pap,
  *          uint32_t priv, cred_t *cr, const dtrace_pops_t *pops, void *arg,
  *          dtrace_provider_id_t *idp)
  *
  * 2.2.1  Overview
  *
  *   dtrace_register() registers the calling provider with the DTrace
  *   framework.  It should generally be called by DTrace providers in their
  *   attach(9E) entry point.
  *
  * 2.2.2  Arguments and Notes
  *
  *   The first argument is the name of the provider.  The second argument is a
  *   pointer to the stability attributes for the provider.  The third argument
  *   is the privilege flags for the provider, and must be some combination of:
  *
  *     DTRACE_PRIV_NONE     <= All users may enable probes from this provider
  *
  *     DTRACE_PRIV_PROC     <= Any user with privilege of PRIV_DTRACE_PROC may
  *                             enable probes from this provider
  *
  *     DTRACE_PRIV_USER     <= Any user with privilege of PRIV_DTRACE_USER may
  *                             enable probes from this provider
  *
  *     DTRACE_PRIV_KERNEL   <= Any user with privilege of PRIV_DTRACE_KERNEL
  *                             may enable probes from this provider
  *
  *     DTRACE_PRIV_OWNER    <= This flag places an additional constraint on
  *                             the privilege requirements above. These probes
  *                             require either (a) a user ID matching the user
  *                             ID of the cred passed in the fourth argument
  *                             or (b) the PRIV_PROC_OWNER privilege.
  *
  *     DTRACE_PRIV_ZONEOWNER<= This flag places an additional constraint on
  *                             the privilege requirements above. These probes
  *                             require either (a) a zone ID matching the zone
  *                             ID of the cred passed in the fourth argument
  *                             or (b) the PRIV_PROC_ZONE privilege.
  *
  *   Note that these flags designate the _visibility_ of the probes, not
  *   the conditions under which they may or may not fire.
  *
  *   The fourth argument is the credential that is associated with the
  *   provider.  This argument should be NULL if the privilege flags don't
  *   include DTRACE_PRIV_OWNER or DTRACE_PRIV_ZONEOWNER.  If non-NULL, the
  *   framework stashes the uid and zoneid represented by this credential
  *   for use at probe-time, in implicit predicates.  These limit visibility
  *   of the probes to users and/or zones which have sufficient privilege to
  *   access them.
  *
  *   The fifth argument is a DTrace provider operations vector, which provides
  *   the implementation for the Framework-to-Provider API.  (See Section 1,
  *   above.)  This must be non-NULL, and each member must be non-NULL.  The
  *   exceptions to this are (1) the dtps_provide() and dtps_provide_module()
  *   members (if the provider so desires, _one_ of these members may be left
  *   NULL -- denoting that the provider only implements the other) and (2)
  *   the dtps_suspend() and dtps_resume() members, which must either both be
  *   NULL or both be non-NULL.
  *
  *   The sixth argument is a cookie to be specified as the first argument for
  *   each function in the Framework-to-Provider API.  This argument may have
  *   any value.
  *
  *   The final argument is a pointer to dtrace_provider_id_t.  If
  *   dtrace_register() successfully completes, the provider identifier will be
  *   stored in the memory pointed to be this argument.  This argument must be
  *   non-NULL.
  *
  * 2.2.3  Return value
  *
  *   On success, dtrace_register() returns 0 and stores the new provider's
  *   identifier into the memory pointed to by the idp argument.  On failure,
  *   dtrace_register() returns an errno:
  *
  *     EINVAL   The arguments passed to dtrace_register() were somehow invalid.
  *              This may because a parameter that must be non-NULL was NULL,
  *              because the name was invalid (either empty or an illegal
  *              provider name) or because the attributes were invalid.
  *
  *   No other failure code is returned.
  *
  * 2.2.4  Caller's context
  *
  *   dtrace_register() may induce calls to dtrace_provide(); the provider must
  *   hold no locks across dtrace_register() that may also be acquired by
  *   dtrace_provide().  cpu_lock and mod_lock must not be held.
  *
  * 2.3  int dtrace_unregister(dtrace_provider_t id)
  *
  * 2.3.1  Overview
  *
  *   Unregisters the specified provider from the DTrace framework.  It should
  *   generally be called by DTrace providers in their detach(9E) entry point.
  *
  * 2.3.2  Arguments and Notes
  *
  *   The only argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  As a result of calling
  *   dtrace_unregister(), the DTrace framework will call back into the provider
  *   via the dtps_destroy() entry point.  Once dtrace_unregister() successfully
  *   completes, however, the DTrace framework will no longer make calls through
  *   the Framework-to-Provider API.
  *
  * 2.3.3  Return value
  *
  *   On success, dtrace_unregister returns 0.  On failure, dtrace_unregister()
  *   returns an errno:
  *
  *     EBUSY    There are currently processes that have the DTrace pseudodevice
  *              open, or there exists an anonymous enabling that hasn't yet
  *              been claimed.
  *
  *   No other failure code is returned.
  *
  * 2.3.4  Caller's context
  *
  *   Because a call to dtrace_unregister() may induce calls through the
  *   Framework-to-Provider API, the caller may not hold any lock across
  *   dtrace_register() that is also acquired in any of the Framework-to-
  *   Provider API functions.  Additionally, mod_lock may not be held.
  *
  * 2.4  void dtrace_invalidate(dtrace_provider_id_t id)
  *
  * 2.4.1  Overview
  *
  *   Invalidates the specified provider.  All subsequent probe lookups for the
  *   specified provider will fail, but its probes will not be removed.
  *
  * 2.4.2  Arguments and note
  *
  *   The only argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  In general, a provider's probes
  *   always remain valid; dtrace_invalidate() is a mechanism for invalidating
  *   an entire provider, regardless of whether or not probes are enabled or
  *   not.  Note that dtrace_invalidate() will _not_ prevent already enabled
  *   probes from firing -- it will merely prevent any new enablings of the
  *   provider's probes.
  *
  * 2.5 int dtrace_condense(dtrace_provider_id_t id)
  *
  * 2.5.1  Overview
  *
  *   Removes all the unenabled probes for the given provider. This function is
  *   not unlike dtrace_unregister(), except that it doesn't remove the
  *   provider just as many of its associated probes as it can.
  *
  * 2.5.2  Arguments and Notes
  *
  *   As with dtrace_unregister(), the sole argument is the provider identifier
  *   as returned from a successful call to dtrace_register().  As a result of
  *   calling dtrace_condense(), the DTrace framework will call back into the
  *   given provider's dtps_destroy() entry point for each of the provider's
  *   unenabled probes.
  *
  * 2.5.3  Return value
  *
  *   Currently, dtrace_condense() always returns 0.  However, consumers of this
  *   function should check the return value as appropriate; its behavior may
  *   change in the future.
  *
  * 2.5.4  Caller's context
  *
  *   As with dtrace_unregister(), the caller may not hold any lock across
  *   dtrace_condense() that is also acquired in the provider's entry points.
  *   Also, mod_lock may not be held.
  *
  * 2.6 int dtrace_attached()
  *
  * 2.6.1  Overview
  *
  *   Indicates whether or not DTrace has attached.
  *
  * 2.6.2  Arguments and Notes
  *
  *   For most providers, DTrace makes initial contact beyond registration.
  *   That is, once a provider has registered with DTrace, it waits to hear
  *   from DTrace to create probes.  However, some providers may wish to
  *   proactively create probes without first being told by DTrace to do so.
  *   If providers wish to do this, they must first call dtrace_attached() to
  *   determine if DTrace itself has attached.  If dtrace_attached() returns 0,
  *   the provider must not make any other Provider-to-Framework API call.
  *
  * 2.6.3  Return value
  *
  *   dtrace_attached() returns 1 if DTrace has attached, 0 otherwise.
  *
  * 2.7  int dtrace_probe_create(dtrace_provider_t id, const char *mod,
  *	    const char *func, const char *name, int aframes, void *arg)
  *
  * 2.7.1  Overview
  *
  *   Creates a probe with specified module name, function name, and name.
  *
  * 2.7.2  Arguments and Notes
  *
  *   The first argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  The second, third, and fourth
  *   arguments are the module name, function name, and probe name,
  *   respectively.  Of these, module name and function name may both be NULL
  *   (in which case the probe is considered to be unanchored), or they may both
  *   be non-NULL.  The name must be non-NULL, and must point to a non-empty
  *   string.
  *
  *   The fifth argument is the number of artificial stack frames that will be
  *   found on the stack when dtrace_probe() is called for the new probe.  These
  *   artificial frames will be automatically be pruned should the stack() or
  *   stackdepth() functions be called as part of one of the probe's ECBs.  If
  *   the parameter doesn't add an artificial frame, this parameter should be
  *   zero.
  *
  *   The final argument is a probe argument that will be passed back to the
  *   provider when a probe-specific operation is called.  (e.g., via
  *   dtps_enable(), dtps_disable(), etc.)
  *
  *   Note that it is up to the provider to be sure that the probe that it
  *   creates does not already exist -- if the provider is unsure of the probe's
  *   existence, it should assure its absence with dtrace_probe_lookup() before
  *   calling dtrace_probe_create().
  *
  * 2.7.3  Return value
  *
  *   dtrace_probe_create() always succeeds, and always returns the identifier
  *   of the newly-created probe.
  *
  * 2.7.4  Caller's context
  *
  *   While dtrace_probe_create() is generally expected to be called from
  *   dtps_provide() and/or dtps_provide_module(), it may be called from other
  *   non-DTrace contexts.  Neither cpu_lock nor mod_lock may be held.
  *
  * 2.8  dtrace_id_t dtrace_probe_lookup(dtrace_provider_t id, const char *mod,
  *	    const char *func, const char *name)
  *
  * 2.8.1  Overview
  *
  *   Looks up a probe based on provdider and one or more of module name,
  *   function name and probe name.
  *
  * 2.8.2  Arguments and Notes
  *
  *   The first argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  The second, third, and fourth
  *   arguments are the module name, function name, and probe name,
  *   respectively.  Any of these may be NULL; dtrace_probe_lookup() will return
  *   the identifier of the first probe that is provided by the specified
  *   provider and matches all of the non-NULL matching criteria.
  *   dtrace_probe_lookup() is generally used by a provider to be check the
  *   existence of a probe before creating it with dtrace_probe_create().
  *
  * 2.8.3  Return value
  *
  *   If the probe exists, returns its identifier.  If the probe does not exist,
  *   return DTRACE_IDNONE.
  *
  * 2.8.4  Caller's context
  *
  *   While dtrace_probe_lookup() is generally expected to be called from
  *   dtps_provide() and/or dtps_provide_module(), it may also be called from
  *   other non-DTrace contexts.  Neither cpu_lock nor mod_lock may be held.
  *
  * 2.9  void *dtrace_probe_arg(dtrace_provider_t id, dtrace_id_t probe)
  *
  * 2.9.1  Overview
  *
  *   Returns the probe argument associated with the specified probe.
  *
  * 2.9.2  Arguments and Notes
  *
  *   The first argument is the provider identifier, as returned from a
  *   successful call to dtrace_register().  The second argument is a probe
  *   identifier, as returned from dtrace_probe_lookup() or
  *   dtrace_probe_create().  This is useful if a probe has multiple
  *   provider-specific components to it:  the provider can create the probe
  *   once with provider-specific state, and then add to the state by looking
  *   up the probe based on probe identifier.
  *
  * 2.9.3  Return value
  *
  *   Returns the argument associated with the specified probe.  If the
  *   specified probe does not exist, or if the specified probe is not provided
  *   by the specified provider, NULL is returned.
  *
  * 2.9.4  Caller's context
  *
  *   While dtrace_probe_arg() is generally expected to be called from
  *   dtps_provide() and/or dtps_provide_module(), it may also be called from
  *   other non-DTrace contexts.  Neither cpu_lock nor mod_lock may be held.
  *
  * 2.10  void dtrace_probe(dtrace_id_t probe, uintptr_t arg0, uintptr_t arg1,
  *		uintptr_t arg2, uintptr_t arg3, uintptr_t arg4)
  *
  * 2.10.1  Overview
  *
  *   The epicenter of DTrace:  fires the specified probes with the specified
  *   arguments.
  *
  * 2.10.2  Arguments and Notes
  *
  *   The first argument is a probe identifier as returned by
  *   dtrace_probe_create() or dtrace_probe_lookup().  The second through sixth
  *   arguments are the values to which the D variables "arg0" through "arg4"
  *   will be mapped.
  *
  *   dtrace_probe() should be called whenever the specified probe has fired --
  *   however the provider defines it.
  *
  * 2.10.3  Return value
  *
  *   None.
  *
  * 2.10.4  Caller's context
  *
  *   dtrace_probe() may be called in virtually any context:  kernel, user,
  *   interrupt, high-level interrupt, with arbitrary adaptive locks held, with
  *   dispatcher locks held, with interrupts disabled, etc.  The only latitude
  *   that must be afforded to DTrace is the ability to make calls within
  *   itself (and to its in-kernel subroutines) and the ability to access
  *   arbitrary (but mapped) memory.  On some platforms, this constrains
  *   context.  For example, on UltraSPARC, dtrace_probe() cannot be called
  *   from any context in which TL is greater than zero.  dtrace_probe() may
  *   also not be called from any routine which may be called by dtrace_probe()
  *   -- which includes functions in the DTrace framework and some in-kernel
  *   DTrace subroutines.  All such functions "dtrace_"; providers that
  *   instrument the kernel arbitrarily should be sure to not instrument these
  *   routines.
  */
 typedef struct dtrace_pops {
 	void (*dtps_provide)(void *arg, dtrace_probedesc_t *spec);
 	void (*dtps_provide_module)(void *arg, modctl_t *mp);
 	void (*dtps_enable)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_disable)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_suspend)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_resume)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_getargdesc)(void *arg, dtrace_id_t id, void *parg,
 	    dtrace_argdesc_t *desc);
 	uint64_t (*dtps_getargval)(void *arg, dtrace_id_t id, void *parg,
 	    int argno, int aframes);
 	int (*dtps_usermode)(void *arg, dtrace_id_t id, void *parg);
 	void (*dtps_destroy)(void *arg, dtrace_id_t id, void *parg);
 } dtrace_pops_t;
 
 #define	DTRACE_MODE_KERNEL			0x01
 #define	DTRACE_MODE_USER			0x02
 #define	DTRACE_MODE_NOPRIV_DROP			0x10
 #define	DTRACE_MODE_NOPRIV_RESTRICT		0x20
 #define	DTRACE_MODE_LIMITEDPRIV_RESTRICT	0x40
 
 typedef uintptr_t	dtrace_provider_id_t;
 
 extern int dtrace_register(const char *, const dtrace_pattr_t *, uint32_t,
     cred_t *, const dtrace_pops_t *, void *, dtrace_provider_id_t *);
 extern int dtrace_unregister(dtrace_provider_id_t);
 extern int dtrace_condense(dtrace_provider_id_t);
 extern void dtrace_invalidate(dtrace_provider_id_t);
 extern dtrace_id_t dtrace_probe_lookup(dtrace_provider_id_t, char *,
     char *, char *);
 extern dtrace_id_t dtrace_probe_create(dtrace_provider_id_t, const char *,
     const char *, const char *, int, void *);
 extern void *dtrace_probe_arg(dtrace_provider_id_t, dtrace_id_t);
 extern void dtrace_probe(dtrace_id_t, uintptr_t arg0, uintptr_t arg1,
     uintptr_t arg2, uintptr_t arg3, uintptr_t arg4);
 
 /*
  * DTrace Meta Provider API
  *
  * The following functions are implemented by the DTrace framework and are
  * used to implement meta providers. Meta providers plug into the DTrace
  * framework and are used to instantiate new providers on the fly. At
  * present, there is only one type of meta provider and only one meta
  * provider may be registered with the DTrace framework at a time. The
  * sole meta provider type provides user-land static tracing facilities
  * by taking meta probe descriptions and adding a corresponding provider
  * into the DTrace framework.
  *
  * 1 Framework-to-Provider
  *
  * 1.1 Overview
  *
  * The Framework-to-Provider API is represented by the dtrace_mops structure
  * that the meta provider passes to the framework when registering itself as
  * a meta provider. This structure consists of the following members:
  *
  *   dtms_create_probe()	<-- Add a new probe to a created provider
  *   dtms_provide_pid()		<-- Create a new provider for a given process
  *   dtms_remove_pid()		<-- Remove a previously created provider
  *
  * 1.2  void dtms_create_probe(void *arg, void *parg,
  *           dtrace_helper_probedesc_t *probedesc);
  *
  * 1.2.1  Overview
  *
  *   Called by the DTrace framework to create a new probe in a provider
  *   created by this meta provider.
  *
  * 1.2.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_meta_register().
  *   The second argument is the provider cookie for the associated provider;
  *   this is obtained from the return value of dtms_provide_pid(). The third
  *   argument is the helper probe description.
  *
  * 1.2.3  Return value
  *
  *   None
  *
  * 1.2.4  Caller's context
  *
  *   dtms_create_probe() is called from either ioctl() or module load context
  *   in the context of a newly-created provider (that is, a provider that
  *   is a result of a call to dtms_provide_pid()). The DTrace framework is
  *   locked in such a way that meta providers may not register or unregister,
  *   such that no other thread can call into a meta provider operation and that
  *   atomicity is assured with respect to meta provider operations across
  *   dtms_provide_pid() and subsequent calls to dtms_create_probe().
  *   The context is thus effectively single-threaded with respect to the meta
  *   provider, and that the meta provider cannot call dtrace_meta_register()
  *   or dtrace_meta_unregister(). However, the context is such that the
  *   provider may (and is expected to) call provider-related DTrace provider
  *   APIs including dtrace_probe_create().
  *
  * 1.3  void *dtms_provide_pid(void *arg, dtrace_meta_provider_t *mprov,
  *	      pid_t pid)
  *
  * 1.3.1  Overview
  *
  *   Called by the DTrace framework to instantiate a new provider given the
  *   description of the provider and probes in the mprov argument. The
  *   meta provider should call dtrace_register() to insert the new provider
  *   into the DTrace framework.
  *
  * 1.3.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_meta_register().
  *   The second argument is a pointer to a structure describing the new
  *   helper provider. The third argument is the process identifier for
  *   process associated with this new provider. Note that the name of the
  *   provider as passed to dtrace_register() should be the contatenation of
  *   the dtmpb_provname member of the mprov argument and the processs
  *   identifier as a string.
  *
  * 1.3.3  Return value
  *
  *   The cookie for the provider that the meta provider creates. This is
  *   the same value that it passed to dtrace_register().
  *
  * 1.3.4  Caller's context
  *
  *   dtms_provide_pid() is called from either ioctl() or module load context.
  *   The DTrace framework is locked in such a way that meta providers may not
  *   register or unregister. This means that the meta provider cannot call
  *   dtrace_meta_register() or dtrace_meta_unregister(). However, the context
  *   is such that the provider may -- and is expected to --  call
  *   provider-related DTrace provider APIs including dtrace_register().
  *
  * 1.4  void dtms_remove_pid(void *arg, dtrace_meta_provider_t *mprov,
  *	     pid_t pid)
  *
  * 1.4.1  Overview
  *
  *   Called by the DTrace framework to remove a provider that had previously
  *   been instantiated via the dtms_provide_pid() entry point. The meta
  *   provider need not remove the provider immediately, but this entry
  *   point indicates that the provider should be removed as soon as possible
  *   using the dtrace_unregister() API.
  *
  * 1.4.2  Arguments and notes
  *
  *   The first argument is the cookie as passed to dtrace_meta_register().
  *   The second argument is a pointer to a structure describing the helper
  *   provider. The third argument is the process identifier for process
  *   associated with this new provider.
  *
  * 1.4.3  Return value
  *
  *   None
  *
  * 1.4.4  Caller's context
  *
  *   dtms_remove_pid() is called from either ioctl() or exit() context.
  *   The DTrace framework is locked in such a way that meta providers may not
  *   register or unregister. This means that the meta provider cannot call
  *   dtrace_meta_register() or dtrace_meta_unregister(). However, the context
  *   is such that the provider may -- and is expected to -- call
  *   provider-related DTrace provider APIs including dtrace_unregister().
  */
 typedef struct dtrace_helper_probedesc {
 	char *dthpb_mod;			/* probe module */
 	char *dthpb_func; 			/* probe function */
 	char *dthpb_name; 			/* probe name */
 	uint64_t dthpb_base;			/* base address */
 	uint32_t *dthpb_offs;			/* offsets array */
 	uint32_t *dthpb_enoffs;			/* is-enabled offsets array */
 	uint32_t dthpb_noffs;			/* offsets count */
 	uint32_t dthpb_nenoffs;			/* is-enabled offsets count */
 	uint8_t *dthpb_args;			/* argument mapping array */
 	uint8_t dthpb_xargc;			/* translated argument count */
 	uint8_t dthpb_nargc;			/* native argument count */
 	char *dthpb_xtypes;			/* translated types strings */
 	char *dthpb_ntypes;			/* native types strings */
 } dtrace_helper_probedesc_t;
 
 typedef struct dtrace_helper_provdesc {
 	char *dthpv_provname;			/* provider name */
 	dtrace_pattr_t dthpv_pattr;		/* stability attributes */
 } dtrace_helper_provdesc_t;
 
 typedef struct dtrace_mops {
 	void (*dtms_create_probe)(void *, void *, dtrace_helper_probedesc_t *);
 	void *(*dtms_provide_pid)(void *, dtrace_helper_provdesc_t *, pid_t);
 	void (*dtms_remove_pid)(void *, dtrace_helper_provdesc_t *, pid_t);
 } dtrace_mops_t;
 
 typedef uintptr_t	dtrace_meta_provider_id_t;
 
 extern int dtrace_meta_register(const char *, const dtrace_mops_t *, void *,
     dtrace_meta_provider_id_t *);
 extern int dtrace_meta_unregister(dtrace_meta_provider_id_t);
 
 /*
  * DTrace Kernel Hooks
  *
  * The following functions are implemented by the base kernel and form a set of
  * hooks used by the DTrace framework.  DTrace hooks are implemented in either
  * uts/common/os/dtrace_subr.c, an ISA-specific assembly file, or in a
  * uts/<platform>/os/dtrace_subr.c corresponding to each hardware platform.
  */
 
 typedef enum dtrace_vtime_state {
 	DTRACE_VTIME_INACTIVE = 0,	/* No DTrace, no TNF */
 	DTRACE_VTIME_ACTIVE,		/* DTrace virtual time, no TNF */
 	DTRACE_VTIME_INACTIVE_TNF,	/* No DTrace, TNF active */
 	DTRACE_VTIME_ACTIVE_TNF		/* DTrace virtual time _and_ TNF */
 } dtrace_vtime_state_t;
 
 #ifdef illumos
 extern dtrace_vtime_state_t dtrace_vtime_active;
 #endif
 extern void dtrace_vtime_switch(kthread_t *next);
 extern void dtrace_vtime_enable_tnf(void);
 extern void dtrace_vtime_disable_tnf(void);
 extern void dtrace_vtime_enable(void);
 extern void dtrace_vtime_disable(void);
 
 struct regs;
 struct reg;
 
 #ifdef illumos
 extern int (*dtrace_pid_probe_ptr)(struct reg *);
 extern int (*dtrace_return_probe_ptr)(struct reg *);
 extern void (*dtrace_fasttrap_fork_ptr)(proc_t *, proc_t *);
 extern void (*dtrace_fasttrap_exec_ptr)(proc_t *);
 extern void (*dtrace_fasttrap_exit_ptr)(proc_t *);
 extern void dtrace_fasttrap_fork(proc_t *, proc_t *);
 #endif
 
 typedef uintptr_t dtrace_icookie_t;
 typedef void (*dtrace_xcall_t)(void *);
 
 extern dtrace_icookie_t dtrace_interrupt_disable(void);
 extern void dtrace_interrupt_enable(dtrace_icookie_t);
 
 extern void dtrace_membar_producer(void);
 extern void dtrace_membar_consumer(void);
 
 extern void (*dtrace_cpu_init)(processorid_t);
 #ifdef illumos
 extern void (*dtrace_modload)(modctl_t *);
 extern void (*dtrace_modunload)(modctl_t *);
 #endif
 extern void (*dtrace_helpers_cleanup)(void);
 extern void (*dtrace_helpers_fork)(proc_t *parent, proc_t *child);
 extern void (*dtrace_cpustart_init)(void);
 extern void (*dtrace_cpustart_fini)(void);
 extern void (*dtrace_closef)(void);
 
 extern void (*dtrace_debugger_init)(void);
 extern void (*dtrace_debugger_fini)(void);
 extern dtrace_cacheid_t dtrace_predcache_id;
 
 #ifdef illumos
 extern hrtime_t dtrace_gethrtime(void);
 #else
 void dtrace_debug_printf(const char *, ...) __printflike(1, 2);
 #endif
 extern void dtrace_sync(void);
 extern void dtrace_toxic_ranges(void (*)(uintptr_t, uintptr_t));
 extern void dtrace_xcall(processorid_t, dtrace_xcall_t, void *);
 extern void dtrace_vpanic(const char *, __va_list);
 extern void dtrace_panic(const char *, ...);
 
 extern int dtrace_safe_defer_signal(void);
 extern void dtrace_safe_synchronous_signal(void);
 
 extern int dtrace_mach_aframes(void);
 
 #if defined(__i386) || defined(__amd64)
 extern int dtrace_instr_size(uchar_t *instr);
 extern int dtrace_instr_size_isa(uchar_t *, model_t, int *);
 extern void dtrace_invop_callsite(void);
 #endif
 extern void dtrace_invop_add(int (*)(uintptr_t, struct trapframe *, uintptr_t));
 extern void dtrace_invop_remove(int (*)(uintptr_t, struct trapframe *,
     uintptr_t));
 
 #ifdef __sparc
 extern int dtrace_blksuword32(uintptr_t, uint32_t *, int);
 extern void dtrace_getfsr(uint64_t *);
 #endif
 
 #ifndef illumos
 extern void dtrace_helpers_duplicate(proc_t *, proc_t *);
 extern void dtrace_helpers_destroy(proc_t *);
 #endif
 
 #define	DTRACE_CPUFLAG_ISSET(flag) \
 	(cpu_core[curcpu].cpuc_dtrace_flags & (flag))
 
 #define	DTRACE_CPUFLAG_SET(flag) \
 	(cpu_core[curcpu].cpuc_dtrace_flags |= (flag))
 
 #define	DTRACE_CPUFLAG_CLEAR(flag) \
 	(cpu_core[curcpu].cpuc_dtrace_flags &= ~(flag))
 
 #endif /* _KERNEL */
 
 #endif	/* _ASM */
 
 #if defined(__i386) || defined(__amd64)
 
 #define	DTRACE_INVOP_PUSHL_EBP		1
 #define	DTRACE_INVOP_PUSHQ_RBP		DTRACE_INVOP_PUSHL_EBP
 #define	DTRACE_INVOP_POPL_EBP		2
 #define	DTRACE_INVOP_POPQ_RBP		DTRACE_INVOP_POPL_EBP
 #define	DTRACE_INVOP_LEAVE		3
 #define	DTRACE_INVOP_NOP		4
 #define	DTRACE_INVOP_RET		5
 
 #elif defined(__powerpc__)
 
 #define DTRACE_INVOP_RET	1
 #define DTRACE_INVOP_BCTR	2
 #define DTRACE_INVOP_BLR	3
 #define DTRACE_INVOP_JUMP	4
 #define DTRACE_INVOP_MFLR_R0	5
 #define DTRACE_INVOP_NOP	6
 
 #elif defined(__arm__)
 
 #define	DTRACE_INVOP_SHIFT	4
 #define	DTRACE_INVOP_MASK	((1 << DTRACE_INVOP_SHIFT) - 1)
 #define	DTRACE_INVOP_DATA(x)	((x) >> DTRACE_INVOP_SHIFT)
 
 #define DTRACE_INVOP_PUSHM	1
 #define DTRACE_INVOP_POPM	2
 #define DTRACE_INVOP_B		3
 
 #elif defined(__aarch64__)
 
 #define	INSN_SIZE	4
 
 #define	B_MASK		0xff000000
 #define	B_DATA_MASK	0x00ffffff
 #define	B_INSTR		0x14000000
 
 #define	RET_INSTR	0xd65f03c0
 
 #define	LDP_STP_MASK	0xffc00000
 #define	STP_32		0x29800000
 #define	STP_64		0xa9800000
 #define	LDP_32		0x28c00000
 #define	LDP_64		0xa8c00000
 #define	LDP_STP_PREIND	(1 << 24)
 #define	LDP_STP_DIR	(1 << 22) /* Load instruction */
 #define	ARG1_SHIFT	0
 #define	ARG1_MASK	0x1f
 #define	ARG2_SHIFT	10
 #define	ARG2_MASK	0x1f
 #define	OFFSET_SHIFT	15
 #define	OFFSET_SIZE	7
 #define	OFFSET_MASK	((1 << OFFSET_SIZE) - 1)
 
 #define	DTRACE_INVOP_PUSHM	1
 #define	DTRACE_INVOP_RET	2
 #define	DTRACE_INVOP_B		3
 
 #elif defined(__mips__)
 
 #define	INSN_SIZE		4
 
 /* Load/Store double RA to/from SP */
 #define	LDSD_RA_SP_MASK		0xffff0000
 #define	LDSD_DATA_MASK		0x0000ffff
 #define	SD_RA_SP		0xffbf0000
 #define	LD_RA_SP		0xdfbf0000
 
 #define	DTRACE_INVOP_SD		1
 #define	DTRACE_INVOP_LD		2
 
 #elif defined(__riscv__)
 
-#define	SD_RA_SP_MASK		0x1fff07f
-#define	SD_RA_SP		0x0113023
+#define	SD_RA_SP_MASK		0x01fff07f
+#define	SD_RA_SP		0x00113023
 
 #define	DTRACE_INVOP_SD		1
 #define	DTRACE_INVOP_RET	2
 #define	DTRACE_INVOP_NOP	3
 
 #endif
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_DTRACE_H */
Index: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris
===================================================================
--- user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris	(revision 303667)

Property changes on: user/alc/PQ_LAUNDRY/sys/cddl/contrib/opensolaris
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/cddl/contrib/opensolaris:r303642-303666
Index: user/alc/PQ_LAUNDRY/sys/cddl/dev/dtrace/riscv/dtrace_asm.S
===================================================================
--- user/alc/PQ_LAUNDRY/sys/cddl/dev/dtrace/riscv/dtrace_asm.S	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/cddl/dev/dtrace/riscv/dtrace_asm.S	(revision 303667)
@@ -1,177 +1,177 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License, Version 1.0 only
  * (the "License").  You may not use this file except in compliance
  * with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * Portions Copyright 2016 Ruslan Bukin <br@bsdpad.com>
  *
  * $FreeBSD$
  */
 /*
  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #define _ASM
 #define _LOCORE
 
 #include <sys/cpuvar_defs.h>
 #include <sys/dtrace.h>
 
 #include <machine/riscvreg.h>
 #include <machine/asm.h>
 
 #include "assym.s"
 
 /*
 void dtrace_membar_producer(void)
 */
 ENTRY(dtrace_membar_producer)
 	RET
 END(dtrace_membar_producer)
 
 /*
 void dtrace_membar_consumer(void)
 */
 ENTRY(dtrace_membar_consumer)
 	RET
 END(dtrace_membar_consumer)
 
 /*
 dtrace_icookie_t dtrace_interrupt_disable(void)
 */
 ENTRY(dtrace_interrupt_disable)
-	csrrci	a0, sstatus, SSTATUS_IE
-	andi	a0, a0, SSTATUS_IE
+	csrrci	a0, sstatus, (SSTATUS_SIE)
+	andi	a0, a0, (SSTATUS_SIE)
 	RET
 END(dtrace_interrupt_disable)
 
 /*
 void dtrace_interrupt_enable(dtrace_icookie_t cookie)
 */
 ENTRY(dtrace_interrupt_enable)
 	csrs	sstatus, a0
 	RET
 END(dtrace_interrupt_enable)
 /*
 uint8_t
 dtrace_fuword8_nocheck(void *addr)
 */
 ENTRY(dtrace_fuword8_nocheck)
 	lb	a0, 0(a0)
 	RET
 END(dtrace_fuword8_nocheck)
 
 /*
 uint16_t
 dtrace_fuword16_nocheck(void *addr)
 */
 ENTRY(dtrace_fuword16_nocheck)
 	lh	a0, 0(a0)
 	RET
 END(dtrace_fuword16_nocheck)
 
 /*
 uint32_t
 dtrace_fuword32_nocheck(void *addr)
 */
 ENTRY(dtrace_fuword32_nocheck)
 	lw	a0, 0(a0)
 	RET
 END(dtrace_fuword32_nocheck)
 
 /*
 uint64_t
 dtrace_fuword64_nocheck(void *addr)
 */
 ENTRY(dtrace_fuword64_nocheck)
 	ld	a0, 0(a0)
 	RET
 END(dtrace_fuword64_nocheck)
 
 /*
 void
 dtrace_copy(uintptr_t uaddr, uintptr_t kaddr, size_t size)
 */
 ENTRY(dtrace_copy)
 	beqz	a2, 2f		/* If len == 0 then skip loop */
 1:
 	lb	a4, 0(a0)	/* Load from uaddr */
 	addi	a0, a0, 1
 	sb	a4, 0(a1)	/* Store in kaddr */
 	addi	a1, a1, 1
 	addi	a2, a2, -1	/* len-- */
 	bnez	a2, 1b
 2:
 	RET
 END(dtrace_copy)
 
 /*
 void
 dtrace_copystr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 XXX: Check for flags?
 */
 ENTRY(dtrace_copystr)
 	beqz	a2, 2f		/* If len == 0 then skip loop */
 	lb	a4, 0(a0)	/* Load from uaddr */
 	addi	a0, a0, 1
 	sb	a4, 0(a1)	/* Store in kaddr */
 	addi	a1, a1, 1
 	beqz	a4, 2f		/* If == 0 then break */
 	addi	a2, a2, -1	/* len-- */
 	bnez	a2, 1b
 2:
 	RET
 END(dtrace_copystr)
 
 /*
 uintptr_t
 dtrace_caller(int aframes)
 */
 ENTRY(dtrace_caller)
 	li	a0, -1
 	RET
 END(dtrace_caller)
 
 /*
 uint32_t
 dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new)
 */
 ENTRY(dtrace_cas32)
 1:	lr.w	a3, 0(a0)	/* Load target */
 	bne	a3, a1, 2f	/* *target != cmp ? return */
 	sc.w	a4, a2, 0(a0)	/* Store new to target */
 	bnez	a4, 1b		/* Try again if store not succeed */
 2:	mv	a0, a3		/* Return the value loaded from target */
 	RET
 END(dtrace_cas32)
 
 /*
 void *
 dtrace_casptr(volatile void *target, volatile void *cmp, volatile void *new)
 */
 ENTRY(dtrace_casptr)
 1:	lr.d	a3, 0(a0)	/* Load target */
 	bne	a3, a1, 2f	/* *target != cmp ? return */
 	sc.d	a4, a2, 0(a0)	/* Store new to target */
 	bnez	a4, 1b		/* Try again if store not succeed */
 2:	mv	a0, a3		/* Return the value loaded from target */
 	RET
 END(dtrace_casptr)
Index: user/alc/PQ_LAUNDRY/sys/cddl/dev/dtrace/riscv/dtrace_subr.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/cddl/dev/dtrace/riscv/dtrace_subr.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/cddl/dev/dtrace/riscv/dtrace_subr.c	(revision 303667)
@@ -1,282 +1,282 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License, Version 1.0 only
  * (the "License").  You may not use this file except in compliance
  * with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  *
  * Portions Copyright 2016 Ruslan Bukin <br@bsdpad.com>
  *
  * $FreeBSD$
  *
  */
 /*
  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/types.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/kmem.h>
 #include <sys/smp.h>
 #include <sys/dtrace_impl.h>
 #include <sys/dtrace_bsd.h>
 #include <machine/vmparam.h>
 #include <machine/riscvreg.h>
 #include <machine/riscv_opcode.h>
 #include <machine/clock.h>
 #include <machine/frame.h>
 #include <machine/trap.h>
 #include <vm/pmap.h>
 
 extern dtrace_id_t	dtrace_probeid_error;
 extern int (*dtrace_invop_jump_addr)(struct trapframe *);
 extern void dtrace_getnanotime(struct timespec *tsp);
 
 int dtrace_invop(uintptr_t, struct trapframe *, uintptr_t);
 void dtrace_invop_init(void);
 void dtrace_invop_uninit(void);
 
 typedef struct dtrace_invop_hdlr {
 	int (*dtih_func)(uintptr_t, struct trapframe *, uintptr_t);
 	struct dtrace_invop_hdlr *dtih_next;
 } dtrace_invop_hdlr_t;
 
 dtrace_invop_hdlr_t *dtrace_invop_hdlr;
 
 int
 dtrace_invop(uintptr_t addr, struct trapframe *frame, uintptr_t eax)
 {
 	dtrace_invop_hdlr_t *hdlr;
 	int rval;
 
 	for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next)
 		if ((rval = hdlr->dtih_func(addr, frame, eax)) != 0)
 			return (rval);
 
 	return (0);
 }
 
 
 void
 dtrace_invop_add(int (*func)(uintptr_t, struct trapframe *, uintptr_t))
 {
 	dtrace_invop_hdlr_t *hdlr;
 
 	hdlr = kmem_alloc(sizeof (dtrace_invop_hdlr_t), KM_SLEEP);
 	hdlr->dtih_func = func;
 	hdlr->dtih_next = dtrace_invop_hdlr;
 	dtrace_invop_hdlr = hdlr;
 }
 
 void
 dtrace_invop_remove(int (*func)(uintptr_t, struct trapframe *, uintptr_t))
 {
 	dtrace_invop_hdlr_t *hdlr, *prev;
 
 	hdlr = dtrace_invop_hdlr;
 	prev = NULL;
 
 	for (;;) {
 		if (hdlr == NULL)
 			panic("attempt to remove non-existent invop handler");
 
 		if (hdlr->dtih_func == func)
 			break;
 
 		prev = hdlr;
 		hdlr = hdlr->dtih_next;
 	}
 
 	if (prev == NULL) {
 		ASSERT(dtrace_invop_hdlr == hdlr);
 		dtrace_invop_hdlr = hdlr->dtih_next;
 	} else {
 		ASSERT(dtrace_invop_hdlr != hdlr);
 		prev->dtih_next = hdlr->dtih_next;
 	}
 
 	kmem_free(hdlr, 0);
 }
 
 /*ARGSUSED*/
 void
 dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
 {
 
 	(*func)(0, (uintptr_t)VM_MIN_KERNEL_ADDRESS);
 }
 
 void
 dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg)
 {
 	cpuset_t cpus;
 
 	if (cpu == DTRACE_CPUALL)
 		cpus = all_cpus;
 	else
 		CPU_SETOF(cpu, &cpus);
 
 	smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func,
 	    smp_no_rendevous_barrier, arg);
 }
 
 static void
 dtrace_sync_func(void)
 {
 
 }
 
 void
 dtrace_sync(void)
 {
 
 	dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL);
 }
 
 /*
  * DTrace needs a high resolution time function which can
  * be called from a probe context and guaranteed not to have
  * instrumented with probes itself.
  *
  * Returns nanoseconds since boot.
  */
 uint64_t
 dtrace_gethrtime()
 {
 	struct timespec curtime;
 
 	nanouptime(&curtime);
 
 	return (curtime.tv_sec * 1000000000UL + curtime.tv_nsec);
 
 }
 
 uint64_t
 dtrace_gethrestime(void)
 {
 	struct timespec current_time;
 
 	dtrace_getnanotime(&current_time);
 
 	return (current_time.tv_sec * 1000000000UL + current_time.tv_nsec);
 }
 
 /* Function to handle DTrace traps during probes. See riscv/riscv/trap.c */
 int
 dtrace_trap(struct trapframe *frame, u_int type)
 {
 	/*
 	 * A trap can occur while DTrace executes a probe. Before
 	 * executing the probe, DTrace blocks re-scheduling and sets
 	 * a flag in it's per-cpu flags to indicate that it doesn't
 	 * want to fault. On returning from the probe, the no-fault
 	 * flag is cleared and finally re-scheduling is enabled.
 	 *
 	 * Check if DTrace has enabled 'no-fault' mode:
 	 *
 	 */
 
 	if ((cpu_core[curcpu].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT) != 0) {
 		/*
 		 * There are only a couple of trap types that are expected.
 		 * All the rest will be handled in the usual way.
 		 */
 		switch (type) {
-		case EXCP_LOAD_ACCESS_FAULT:
-		case EXCP_STORE_ACCESS_FAULT:
-		case EXCP_INSTR_ACCESS_FAULT:
+		case EXCP_FAULT_LOAD:
+		case EXCP_FAULT_STORE:
+		case EXCP_FAULT_FETCH:
 			/* Flag a bad address. */
 			cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR;
 			cpu_core[curcpu].cpuc_dtrace_illval = 0;
 
 			/*
 			 * Offset the instruction pointer to the instruction
 			 * following the one causing the fault.
 			 */
 			frame->tf_sepc += 4;
 
 			return (1);
 		default:
 			/* Handle all other traps in the usual way. */
 			break;
 		}
 	}
 
 	/* Handle the trap in the usual way. */
 	return (0);
 }
 
 void
 dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which,
     int fault, int fltoffs, uintptr_t illval)
 {
 
 	dtrace_probe(dtrace_probeid_error, (uint64_t)(uintptr_t)state,
 	    (uintptr_t)epid,
 	    (uintptr_t)which, (uintptr_t)fault, (uintptr_t)fltoffs);
 }
 
 static int
 dtrace_invop_start(struct trapframe *frame)
 {
 	int data, invop, reg, update_sp;
 	register_t arg1, arg2;
 	register_t *sp;
 	uint32_t imm;
 	InstFmt i;
 	int offs;
 	int tmp;
 
 	invop = dtrace_invop(frame->tf_sepc, frame, frame->tf_sepc);
 
 	if (invop == RISCV_INSN_RET) {
 		frame->tf_sepc = frame->tf_ra;
 		return (0);
 	}
 
 	if ((invop & SD_RA_SP_MASK) == SD_RA_SP) {
 		i.word = invop;
 		imm = i.SType.imm0_4 | (i.SType.imm5_11 << 5);
 		sp = (register_t *)((uint8_t *)frame->tf_sp + imm);
 		*sp = frame->tf_ra;
 		frame->tf_sepc += INSN_SIZE;
 		return (0);
 	}
 
 	return (-1);
 }
 
 void
 dtrace_invop_init(void)
 {
 
 	dtrace_invop_jump_addr = dtrace_invop_start;
 }
 
 void
 dtrace_invop_uninit(void)
 {
 
 	dtrace_invop_jump_addr = 0;
 }
Index: user/alc/PQ_LAUNDRY/sys/conf/ldscript.riscv
===================================================================
--- user/alc/PQ_LAUNDRY/sys/conf/ldscript.riscv	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/conf/ldscript.riscv	(revision 303667)
@@ -1,135 +1,135 @@
 /* $FreeBSD$ */
 OUTPUT_ARCH(riscv)
 ENTRY(_start)
 
 SEARCH_DIR(/usr/lib);
 SECTIONS
 {
   /* Read-only sections, merged into text segment: */
-  . = kernbase + 0x100;
+  . = kernbase + 0x80000000 /* KERNENTRY */;
   .text      : AT(ADDR(.text) - kernbase)
   {
     *(.text)
     *(.stub)
     /* .gnu.warning sections are handled specially by elf32.em.  */
     *(.gnu.warning)
     *(.gnu.linkonce.t*)
   } =0x9090
   _etext = .;
   PROVIDE (etext = .);
   .fini      : { *(.fini)    } =0x9090
   .rodata    : { *(.rodata) *(.gnu.linkonce.r*) }
   .rodata1   : { *(.rodata1) }
    .interp     : { *(.interp) 	}
   .hash          : { *(.hash)		}
   .dynsym        : { *(.dynsym)		}
   .dynstr        : { *(.dynstr)		}
   .gnu.version   : { *(.gnu.version)	}
   .gnu.version_d   : { *(.gnu.version_d)	}
   .gnu.version_r   : { *(.gnu.version_r)	}
   .rel.text      :
     { *(.rel.text) *(.rel.gnu.linkonce.t*) }
   .rela.text     :
     { *(.rela.text) *(.rela.gnu.linkonce.t*) }
   .rel.data      :
     { *(.rel.data) *(.rel.gnu.linkonce.d*) }
   .rela.data     :
     { *(.rela.data) *(.rela.gnu.linkonce.d*) }
   .rel.rodata    :
     { *(.rel.rodata) *(.rel.gnu.linkonce.r*) }
   .rela.rodata   :
     { *(.rela.rodata) *(.rela.gnu.linkonce.r*) }
   .rel.got       : { *(.rel.got)		}
   .rela.got      : { *(.rela.got)		}
   .rel.ctors     : { *(.rel.ctors)	}
   .rela.ctors    : { *(.rela.ctors)	}
   .rel.dtors     : { *(.rel.dtors)	}
   .rela.dtors    : { *(.rela.dtors)	}
   .rel.init      : { *(.rel.init)	}
   .rela.init     : { *(.rela.init)	}
   .rel.fini      : { *(.rel.fini)	}
   .rela.fini     : { *(.rela.fini)	}
   .rel.bss       : { *(.rel.bss)		}
   .rela.bss      : { *(.rela.bss)		}
   .rel.plt       : { *(.rel.plt)		}
   .rela.plt      : { *(.rela.plt)		}
   .init          : { *(.init)	} =0x9090
   .plt      : { *(.plt)	}
 
   /* Adjust the address for the data segment.  We want to adjust up to
      the same address within the page on the next page up.  */
   . = ALIGN(0x1000) + (. & (0x1000 - 1)) ;
   .data    :
   {
     *(.data)
     *(.gnu.linkonce.d*)
   }
   .data1   : { *(.data1) }
   . = ALIGN(32 / 8);
   _start_ctors = .;
   PROVIDE (start_ctors = .);
   .ctors         :
   {
     *(.ctors)
   }
   _stop_ctors = .;
   PROVIDE (stop_ctors = .);
   .dtors         :
   {
     *(.dtors)
   }
   .got           : { *(.got.plt) *(.got) }
   .dynamic       : { *(.dynamic) }
   /* We want the small data sections together, so single-instruction offsets
      can access them all, and initialized data all before uninitialized, so
      we can shorten the on-disk segment size.  */
   . = ALIGN(8);
   .sdata     : { *(.sdata) }
   _edata  =  .;
   PROVIDE (edata = .);
   __bss_start = .;
   .sbss      : { *(.sbss) *(.scommon) }
   .bss       :
   {
    *(.dynbss)
    *(.bss)
    *(COMMON)
   }
   . = ALIGN(8);
   _end = . ;
   PROVIDE (end = .);
   /* Stabs debugging sections.  */
   .stab 0 : { *(.stab) }
   .stabstr 0 : { *(.stabstr) }
   .stab.excl 0 : { *(.stab.excl) }
   .stab.exclstr 0 : { *(.stab.exclstr) }
   .stab.index 0 : { *(.stab.index) }
   .stab.indexstr 0 : { *(.stab.indexstr) }
   .comment 0 : { *(.comment) }
   /* DWARF debug sections.
      Symbols in the DWARF debugging sections are relative to the beginning
      of the section so we begin them at 0.  */
   /* DWARF 1 */
   .debug          0 : { *(.debug) }
   .line           0 : { *(.line) }
   /* GNU DWARF 1 extensions */
   .debug_srcinfo  0 : { *(.debug_srcinfo) }
   .debug_sfnames  0 : { *(.debug_sfnames) }
   /* DWARF 1.1 and DWARF 2 */
   .debug_aranges  0 : { *(.debug_aranges) }
   .debug_pubnames 0 : { *(.debug_pubnames) }
   /* DWARF 2 */
   .debug_info     0 : { *(.debug_info) }
   .debug_abbrev   0 : { *(.debug_abbrev) }
   .debug_line     0 : { *(.debug_line) }
   .debug_frame    0 : { *(.debug_frame) }
   .debug_str      0 : { *(.debug_str) }
   .debug_loc      0 : { *(.debug_loc) }
   .debug_macinfo  0 : { *(.debug_macinfo) }
   /* SGI/MIPS DWARF 2 extensions */
   .debug_weaknames 0 : { *(.debug_weaknames) }
   .debug_funcnames 0 : { *(.debug_funcnames) }
   .debug_typenames 0 : { *(.debug_typenames) }
   .debug_varnames  0 : { *(.debug_varnames) }
   /* These must appear regardless of  .  */
 }
Index: user/alc/PQ_LAUNDRY/sys/kern/kern_mutex.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/kern/kern_mutex.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/kern/kern_mutex.c	(revision 303667)
@@ -1,1080 +1,1082 @@
 /*-
  * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
  *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
  */
 
 /*
  * Machine independent bits of mutex implementation.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_adaptive_mutexes.h"
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sbuf.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/turnstile.h>
 #include <sys/vmmeter.h>
 #include <sys/lock_profile.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/cpu.h>
 
 #include <ddb/ddb.h>
 
 #include <fs/devfs/devfs_int.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
 #define	ADAPTIVE_MUTEXES
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DEFINE( , , lock, failed);
 #endif
 
 /*
  * Return the mutex address when the lock cookie address is provided.
  * This functionality assumes that struct mtx* have a member named mtx_lock.
  */
 #define	mtxlock2mtx(c)	(__containerof(c, struct mtx, mtx_lock))
 
 /*
  * Internal utility macros.
  */
 #define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
 
 #define	mtx_destroyed(m) ((m)->mtx_lock == MTX_DESTROYED)
 
 #define	mtx_owner(m)	((struct thread *)((m)->mtx_lock & ~MTX_FLAGMASK))
 
 static void	assert_mtx(const struct lock_object *lock, int what);
 #ifdef DDB
 static void	db_show_mtx(const struct lock_object *lock);
 #endif
 static void	lock_mtx(struct lock_object *lock, uintptr_t how);
 static void	lock_spin(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_mtx(const struct lock_object *lock,
 		    struct thread **owner);
 #endif
 static uintptr_t unlock_mtx(struct lock_object *lock);
 static uintptr_t unlock_spin(struct lock_object *lock);
 
 /*
  * Lock classes for sleep and spin mutexes.
  */
 struct lock_class lock_class_mtx_sleep = {
 	.lc_name = "sleep mutex",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE,
 	.lc_assert = assert_mtx,
 #ifdef DDB
 	.lc_ddb_show = db_show_mtx,
 #endif
 	.lc_lock = lock_mtx,
 	.lc_unlock = unlock_mtx,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_mtx,
 #endif
 };
 struct lock_class lock_class_mtx_spin = {
 	.lc_name = "spin mutex",
 	.lc_flags = LC_SPINLOCK | LC_RECURSABLE,
 	.lc_assert = assert_mtx,
 #ifdef DDB
 	.lc_ddb_show = db_show_mtx,
 #endif
 	.lc_lock = lock_spin,
 	.lc_unlock = unlock_spin,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_mtx,
 #endif
 };
 
 #ifdef ADAPTIVE_MUTEXES
 static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD, NULL, "mtx debugging");
 
 static struct lock_delay_config mtx_delay = {
 	.initial	= 1000,
 	.step		= 500,
 	.min		= 100,
 	.max		= 5000,
 };
 
 SYSCTL_INT(_debug_mtx, OID_AUTO, delay_initial, CTLFLAG_RW, &mtx_delay.initial,
     0, "");
 SYSCTL_INT(_debug_mtx, OID_AUTO, delay_step, CTLFLAG_RW, &mtx_delay.step,
     0, "");
 SYSCTL_INT(_debug_mtx, OID_AUTO, delay_min, CTLFLAG_RW, &mtx_delay.min,
     0, "");
 SYSCTL_INT(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max,
     0, "");
 
 static void
 mtx_delay_sysinit(void *dummy)
 {
 
 	mtx_delay.initial = mp_ncpus * 25;
 	mtx_delay.step = (mp_ncpus * 25) / 2;
 	mtx_delay.min = mp_ncpus * 5;
 	mtx_delay.max = mp_ncpus * 25 * 10;
 }
 LOCK_DELAY_SYSINIT(mtx_delay_sysinit);
 #endif
 
 /*
  * System-wide mutexes
  */
 struct mtx blocked_lock;
 struct mtx Giant;
 
 void
 assert_mtx(const struct lock_object *lock, int what)
 {
 
 	mtx_assert((const struct mtx *)lock, what);
 }
 
 void
 lock_mtx(struct lock_object *lock, uintptr_t how)
 {
 
 	mtx_lock((struct mtx *)lock);
 }
 
 void
 lock_spin(struct lock_object *lock, uintptr_t how)
 {
 
 	panic("spin locks can only use msleep_spin");
 }
 
 uintptr_t
 unlock_mtx(struct lock_object *lock)
 {
 	struct mtx *m;
 
 	m = (struct mtx *)lock;
 	mtx_assert(m, MA_OWNED | MA_NOTRECURSED);
 	mtx_unlock(m);
 	return (0);
 }
 
 uintptr_t
 unlock_spin(struct lock_object *lock)
 {
 
 	panic("spin locks can only use msleep_spin");
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_mtx(const struct lock_object *lock, struct thread **owner)
 {
 	const struct mtx *m = (const struct mtx *)lock;
 
 	*owner = mtx_owner(m);
 	return (mtx_unowned(m) == 0);
 }
 #endif
 
 /*
  * Function versions of the inlined __mtx_* macros.  These are used by
  * modules and can also be called from assembly language if needed.
  */
 void
 __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d",
 	    curthread, m->lock_object.lo_name, file, line));
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_lock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 	WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) |
 	    LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
 
 	__mtx_lock(m, curthread, opts, file, line);
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE,
 	    file, line);
 	TD_LOCKS_INC(curthread);
 }
 
 void
 __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
 
 	__mtx_unlock(m, curthread, opts, file, line);
 	TD_LOCKS_DEC(curthread);
 }
 
 void
 __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	if (mtx_owned(m))
 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 		    (opts & MTX_RECURSE) != 0,
 	    ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->lock_object.lo_name, file, line));
 	opts &= ~MTX_RECURSE;
 	WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
 	    file, line, NULL);
 	__mtx_lock_spin(m, curthread, opts, file, line);
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 }
 
 int
 __mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_trylock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_trylock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	KASSERT((opts & MTX_RECURSE) == 0,
 	    ("mtx_trylock_spin: unsupp. opt MTX_RECURSE on mutex %s @ %s:%d\n",
 	    m->lock_object.lo_name, file, line));
 	if (__mtx_trylock_spin(m, curthread, opts, file, line)) {
 		LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 1, file, line);
 		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 		return (1);
 	}
 	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 0, file, line);
 	return (0);
 }
 
 void
 __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file,
     int line)
 {
 	struct mtx *m;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
 	    m->lock_object.lo_name, file, line));
 	WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
 
 	__mtx_unlock_spin(m);
 }
 
 /*
  * The important part of mtx_trylock{,_flags}()
  * Tries to acquire lock `m.'  If this function is called on a mutex that
  * is already owned, it will recursively acquire the lock.
  */
 int
 _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int rval;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	m = mtxlock2mtx(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d",
 	    curthread, m->lock_object.lo_name, file, line));
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_trylock() of destroyed mutex @ %s:%d", file, line));
 	KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep,
 	    ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name,
 	    file, line));
 
 	if (mtx_owned(m) && ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 	    (opts & MTX_RECURSE) != 0)) {
 		m->mtx_recurse++;
 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 		rval = 1;
 	} else
 		rval = _mtx_obtain_lock(m, (uintptr_t)curthread);
 	opts &= ~MTX_RECURSE;
 
 	LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		TD_LOCKS_INC(curthread);
 		if (m->mtx_recurse == 0)
 			LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire,
 			    m, contested, waittime, file, line);
 
 	}
 
 	return (rval);
 }
 
 /*
  * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
  *
  * We call this if the lock is either contested (i.e. we need to go to
  * sleep waiting for it), or if we need to recurse on it.
  */
 void
 __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
     const char *file, int line)
 {
 	struct mtx *m;
 	struct turnstile *ts;
 	uintptr_t v;
 #ifdef ADAPTIVE_MUTEXES
 	volatile struct thread *owner;
 #endif
 #ifdef KTR
 	int cont_logged = 0;
 #endif
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
-#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+#if defined(ADAPTIVE_MUTEXES)
 	lock_delay_arg_init(&lda, &mtx_delay);
+#elif defined(KDTRACE_HOOKS)
+	lock_delay_arg_init(&lda, NULL);
 #endif
 	m = mtxlock2mtx(c);
 
 	if (mtx_owned(m)) {
 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
 		    (opts & MTX_RECURSE) != 0,
 	    ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->lock_object.lo_name, file, line));
 		opts &= ~MTX_RECURSE;
 		m->mtx_recurse++;
 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->lock_object, opts))
 			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
 		return;
 	}
 	opts &= ~MTX_RECURSE;
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&m->lock_object,
 		    &contested, &waittime);
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR4(KTR_LOCK,
 		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
 		    m->lock_object.lo_name, (void *)m->mtx_lock, file, line);
 #ifdef KDTRACE_HOOKS
 	all_time -= lockstat_nsecs(&m->lock_object);
 #endif
 
 	for (;;) {
 		if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
 			break;
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 #ifdef ADAPTIVE_MUTEXES
 		/*
 		 * If the owner is running on another CPU, spin until the
 		 * owner stops running or the state of the lock changes.
 		 */
 		v = m->mtx_lock;
 		if (v != MTX_UNOWNED) {
 			owner = (struct thread *)(v & ~MTX_FLAGMASK);
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&m->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, m, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname((struct thread *)tid),
 				    "spinning", "lockname:\"%s\"",
 				    m->lock_object.lo_name);
 				while (mtx_owner(m) == owner &&
 				    TD_IS_RUNNING(owner))
 					lock_delay(&lda);
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname((struct thread *)tid),
 				    "running");
 				continue;
 			}
 		}
 #endif
 
 		ts = turnstile_trywait(&m->lock_object);
 		v = m->mtx_lock;
 
 		/*
 		 * Check if the lock has been released while spinning for
 		 * the turnstile chain lock.
 		 */
 		if (v == MTX_UNOWNED) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 #ifdef ADAPTIVE_MUTEXES
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		owner = (struct thread *)(v & ~MTX_FLAGMASK);
 		if (TD_IS_RUNNING(owner)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 #endif
 
 		/*
 		 * If the mutex isn't already contested and a failure occurs
 		 * setting the contested bit, the mutex was either released
 		 * or the state of the MTX_RECURSED bit changed.
 		 */
 		if ((v & MTX_CONTESTED) == 0 &&
 		    !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 		/*
 		 * We definitely must sleep for this lock.
 		 */
 		mtx_assert(m, MA_NOTOWNED);
 
 #ifdef KTR
 		if (!cont_logged) {
 			CTR6(KTR_CONTENTION,
 			    "contention: %p at %s:%d wants %s, taken by %s:%d",
 			    (void *)tid, file, line, m->lock_object.lo_name,
 			    WITNESS_FILE(&m->lock_object),
 			    WITNESS_LINE(&m->lock_object));
 			cont_logged = 1;
 		}
 #endif
 
 		/*
 		 * Block on the turnstile.
 		 */
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&m->lock_object);
 #endif
 		turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&m->lock_object);
 		sleep_cnt++;
 #endif
 	}
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&m->lock_object);
 #endif
 #ifdef KTR
 	if (cont_logged) {
 		CTR4(KTR_CONTENTION,
 		    "contention end: %s acquired by %p at %s:%d",
 		    m->lock_object.lo_name, (void *)tid, file, line);
 	}
 #endif
 	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested,
 	    waittime, file, line);
 #ifdef KDTRACE_HOOKS
 	if (sleep_time)
 		LOCKSTAT_RECORD1(adaptive__block, m, sleep_time);
 
 	/*
 	 * Only record the loops spinning and not sleeping. 
 	 */
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time);
 #endif
 }
 
 static void
 _mtx_lock_spin_failed(struct mtx *m)
 {
 	struct thread *td;
 
 	td = mtx_owner(m);
 
 	/* If the mutex is unlocked, try again. */
 	if (td == NULL)
 		return;
 
 	printf( "spin lock %p (%s) held by %p (tid %d) too long\n",
 	    m, m->lock_object.lo_name, td, td->td_tid);
 #ifdef WITNESS
 	witness_display_spinlock(&m->lock_object, td, printf);
 #endif
 	panic("spin lock held too long");
 }
 
 #ifdef SMP
 /*
  * _mtx_lock_spin_cookie: the tougher part of acquiring an MTX_SPIN lock.
  *
  * This is only called if we need to actually spin for the lock. Recursion
  * is handled inline.
  */
 void
 _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts,
     const char *file, int line)
 {
 	struct mtx *m;
 	int i = 0;
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #ifdef KDTRACE_HOOKS
 	int64_t spin_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
 	    "spinning", "lockname:\"%s\"", m->lock_object.lo_name);
 
 #ifdef HWPMC_HOOKS
 	PMC_SOFT_CALL( , , lock, failed);
 #endif
 	lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
 #ifdef KDTRACE_HOOKS
 	spin_time -= lockstat_nsecs(&m->lock_object);
 #endif
 	for (;;) {
 		if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
 			break;
 		/* Give interrupts a chance while we spin. */
 		spinlock_exit();
 		while (m->mtx_lock != MTX_UNOWNED) {
 			if (i++ < 10000000) {
 				cpu_spinwait();
 				continue;
 			}
 			if (i < 60000000 || kdb_active || panicstr != NULL)
 				DELAY(1);
 			else
 				_mtx_lock_spin_failed(m);
 			cpu_spinwait();
 		}
 		spinlock_enter();
 	}
 #ifdef KDTRACE_HOOKS
 	spin_time += lockstat_nsecs(&m->lock_object);
 #endif
 
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
 	KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
 	    "running");
 
 #ifdef KDTRACE_HOOKS
 	LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m,
 	    contested, waittime, file, line);
 	if (spin_time != 0)
 		LOCKSTAT_RECORD1(spin__spin, m, spin_time);
 #endif
 }
 #endif /* SMP */
 
 void
 thread_lock_flags_(struct thread *td, int opts, const char *file, int line)
 {
 	struct mtx *m;
 	uintptr_t tid;
 	int i;
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
 #endif
 #ifdef KDTRACE_HOOKS
 	int64_t spin_time = 0;
 #endif
 
 	i = 0;
 	tid = (uintptr_t)curthread;
 
 	if (SCHEDULER_STOPPED()) {
 		/*
 		 * Ensure that spinlock sections are balanced even when the
 		 * scheduler is stopped, since we may otherwise inadvertently
 		 * re-enable interrupts while dumping core.
 		 */
 		spinlock_enter();
 		return;
 	}
 
 #ifdef KDTRACE_HOOKS
 	spin_time -= lockstat_nsecs(&td->td_lock->lock_object);
 #endif
 	for (;;) {
 retry:
 		spinlock_enter();
 		m = td->td_lock;
 		KASSERT(m->mtx_lock != MTX_DESTROYED,
 		    ("thread_lock() of destroyed mutex @ %s:%d", file, line));
 		KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin,
 		    ("thread_lock() of sleep mutex %s @ %s:%d",
 		    m->lock_object.lo_name, file, line));
 		if (mtx_owned(m))
 			KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
 	    ("thread_lock: recursed on non-recursive mutex %s @ %s:%d\n",
 			    m->lock_object.lo_name, file, line));
 		WITNESS_CHECKORDER(&m->lock_object,
 		    opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
 		for (;;) {
 			if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
 				break;
 			if (m->mtx_lock == tid) {
 				m->mtx_recurse++;
 				break;
 			}
 #ifdef HWPMC_HOOKS
 			PMC_SOFT_CALL( , , lock, failed);
 #endif
 			lock_profile_obtain_lock_failed(&m->lock_object,
 			    &contested, &waittime);
 			/* Give interrupts a chance while we spin. */
 			spinlock_exit();
 			while (m->mtx_lock != MTX_UNOWNED) {
 				if (i++ < 10000000)
 					cpu_spinwait();
 				else if (i < 60000000 ||
 				    kdb_active || panicstr != NULL)
 					DELAY(1);
 				else
 					_mtx_lock_spin_failed(m);
 				cpu_spinwait();
 				if (m != td->td_lock)
 					goto retry;
 			}
 			spinlock_enter();
 		}
 		if (m == td->td_lock)
 			break;
 		__mtx_unlock_spin(m);	/* does spinlock_exit() */
 	}
 #ifdef KDTRACE_HOOKS
 	spin_time += lockstat_nsecs(&m->lock_object);
 #endif
 	if (m->mtx_recurse == 0)
 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m,
 		    contested, waittime, file, line);
 	LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
 #ifdef KDTRACE_HOOKS
 	if (spin_time != 0)
 		LOCKSTAT_RECORD1(thread__spin, m, spin_time);
 #endif
 }
 
 struct mtx *
 thread_lock_block(struct thread *td)
 {
 	struct mtx *lock;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	lock = td->td_lock;
 	td->td_lock = &blocked_lock;
 	mtx_unlock_spin(lock);
 
 	return (lock);
 }
 
 void
 thread_lock_unblock(struct thread *td, struct mtx *new)
 {
 	mtx_assert(new, MA_OWNED);
 	MPASS(td->td_lock == &blocked_lock);
 	atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new);
 }
 
 void
 thread_lock_set(struct thread *td, struct mtx *new)
 {
 	struct mtx *lock;
 
 	mtx_assert(new, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	lock = td->td_lock;
 	td->td_lock = new;
 	mtx_unlock_spin(lock);
 }
 
 /*
  * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
  *
  * We are only called here if the lock is recursed or contested (i.e. we
  * need to wake up a blocked thread).
  */
 void
 __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line)
 {
 	struct mtx *m;
 	struct turnstile *ts;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
 
 	if (mtx_recursed(m)) {
 		if (--(m->mtx_recurse) == 0)
 			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->lock_object, opts))
 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
 		return;
 	}
 
 	/*
 	 * We have to lock the chain before the turnstile so this turnstile
 	 * can be removed from the hash list if it is empty.
 	 */
 	turnstile_chain_lock(&m->lock_object);
 	ts = turnstile_lookup(&m->lock_object);
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
 	MPASS(ts != NULL);
 	turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
 	_mtx_release_lock_quick(m);
 
 	/*
 	 * This turnstile is now no longer associated with the mutex.  We can
 	 * unlock the chain lock so a new turnstile may take it's place.
 	 */
 	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 	turnstile_chain_unlock(&m->lock_object);
 }
 
 /*
  * All the unlocking of MTX_SPIN locks is done inline.
  * See the __mtx_unlock_spin() macro for the details.
  */
 
 /*
  * The backing function for the INVARIANTS-enabled mtx_assert()
  */
 #ifdef INVARIANT_SUPPORT
 void
 __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
 	const struct mtx *m;
 
 	if (panicstr != NULL || dumping)
 		return;
 
 	m = mtxlock2mtx(c);
 
 	switch (what) {
 	case MA_OWNED:
 	case MA_OWNED | MA_RECURSED:
 	case MA_OWNED | MA_NOTRECURSED:
 		if (!mtx_owned(m))
 			panic("mutex %s not owned at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		if (mtx_recursed(m)) {
 			if ((what & MA_NOTRECURSED) != 0)
 				panic("mutex %s recursed at %s:%d",
 				    m->lock_object.lo_name, file, line);
 		} else if ((what & MA_RECURSED) != 0) {
 			panic("mutex %s unrecursed at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		}
 		break;
 	case MA_NOTOWNED:
 		if (mtx_owned(m))
 			panic("mutex %s owned at %s:%d",
 			    m->lock_object.lo_name, file, line);
 		break;
 	default:
 		panic("unknown mtx_assert at %s:%d", file, line);
 	}
 }
 #endif
 
 /*
  * General init routine used by the MTX_SYSINIT() macro.
  */
 void
 mtx_sysinit(void *arg)
 {
 	struct mtx_args *margs = arg;
 
 	mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL,
 	    margs->ma_opts);
 }
 
 /*
  * Mutex initialization routine; initialize lock `m' of type contained in
  * `opts' with options contained in `opts' and name `name.'  The optional
  * lock type `type' is used as a general lock category name for use with
  * witness.
  */
 void
 _mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts)
 {
 	struct mtx *m;
 	struct lock_class *class;
 	int flags;
 
 	m = mtxlock2mtx(c);
 
 	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
 	    MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE | MTX_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock,
 	    ("%s: mtx_lock not aligned for %s: %p", __func__, name,
 	    &m->mtx_lock));
 
 	/* Determine lock class and lock flags. */
 	if (opts & MTX_SPIN)
 		class = &lock_class_mtx_spin;
 	else
 		class = &lock_class_mtx_sleep;
 	flags = 0;
 	if (opts & MTX_QUIET)
 		flags |= LO_QUIET;
 	if (opts & MTX_RECURSE)
 		flags |= LO_RECURSABLE;
 	if ((opts & MTX_NOWITNESS) == 0)
 		flags |= LO_WITNESS;
 	if (opts & MTX_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & MTX_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (opts & MTX_NEW)
 		flags |= LO_NEW;
 
 	/* Initialize mutex. */
 	lock_init(&m->lock_object, class, name, type, flags);
 
 	m->mtx_lock = MTX_UNOWNED;
 	m->mtx_recurse = 0;
 }
 
 /*
  * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
  * passed in as a flag here because if the corresponding mtx_init() was
  * called with MTX_QUIET set, then it will already be set in the mutex's
  * flags.
  */
 void
 _mtx_destroy(volatile uintptr_t *c)
 {
 	struct mtx *m;
 
 	m = mtxlock2mtx(c);
 
 	if (!mtx_owned(m))
 		MPASS(mtx_unowned(m));
 	else {
 		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
 
 		/* Perform the non-mtx related part of mtx_unlock_spin(). */
 		if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin)
 			spinlock_exit();
 		else
 			TD_LOCKS_DEC(curthread);
 
 		lock_profile_release_lock(&m->lock_object);
 		/* Tell witness this isn't locked to make it happy. */
 		WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__,
 		    __LINE__);
 	}
 
 	m->mtx_lock = MTX_DESTROYED;
 	lock_destroy(&m->lock_object);
 }
 
 /*
  * Intialize the mutex code and system mutexes.  This is called from the MD
  * startup code prior to mi_startup().  The per-CPU data space needs to be
  * setup before this is called.
  */
 void
 mutex_init(void)
 {
 
 	/* Setup turnstiles so that sleep mutexes work. */
 	init_turnstiles();
 
 	/*
 	 * Initialize mutexes.
 	 */
 	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
 	mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN);
 	blocked_lock.mtx_lock = 0xdeadc0de;	/* Always blocked. */
 	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
 	mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_statmtx, "pstatl", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_itimmtx, "pitiml", NULL, MTX_SPIN);
 	mtx_init(&proc0.p_profmtx, "pprofl", NULL, MTX_SPIN);
 	mtx_init(&devmtx, "cdev", NULL, MTX_DEF);
 	mtx_lock(&Giant);
 }
 
 #ifdef DDB
 void
 db_show_mtx(const struct lock_object *lock)
 {
 	struct thread *td;
 	const struct mtx *m;
 
 	m = (const struct mtx *)lock;
 
 	db_printf(" flags: {");
 	if (LOCK_CLASS(lock) == &lock_class_mtx_spin)
 		db_printf("SPIN");
 	else
 		db_printf("DEF");
 	if (m->lock_object.lo_flags & LO_RECURSABLE)
 		db_printf(", RECURSE");
 	if (m->lock_object.lo_flags & LO_DUPOK)
 		db_printf(", DUPOK");
 	db_printf("}\n");
 	db_printf(" state: {");
 	if (mtx_unowned(m))
 		db_printf("UNOWNED");
 	else if (mtx_destroyed(m))
 		db_printf("DESTROYED");
 	else {
 		db_printf("OWNED");
 		if (m->mtx_lock & MTX_CONTESTED)
 			db_printf(", CONTESTED");
 		if (m->mtx_lock & MTX_RECURSED)
 			db_printf(", RECURSED");
 	}
 	db_printf("}\n");
 	if (!mtx_unowned(m) && !mtx_destroyed(m)) {
 		td = mtx_owner(m);
 		db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (mtx_recursed(m))
 			db_printf(" recursed: %d\n", m->mtx_recurse);
 	}
 }
 #endif
Index: user/alc/PQ_LAUNDRY/sys/kern/kern_rwlock.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/kern/kern_rwlock.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/kern/kern_rwlock.c	(revision 303667)
@@ -1,1307 +1,1311 @@
 /*-
  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Machine independent bits of reader/writer lock implementation.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_no_adaptive_rwlocks.h"
 
 #include <sys/param.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/turnstile.h>
 
 #include <machine/cpu.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
 #define	ADAPTIVE_RWLOCKS
 #endif
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
 /*
  * Return the rwlock address when the lock cookie address is provided.
  * This functionality assumes that struct rwlock* have a member named rw_lock.
  */
 #define	rwlock2rw(c)	(__containerof(c, struct rwlock, rw_lock))
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 static void	db_show_rwlock(const struct lock_object *lock);
 #endif
 static void	assert_rw(const struct lock_object *lock, int what);
 static void	lock_rw(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_rw(const struct lock_object *lock, struct thread **owner);
 #endif
 static uintptr_t unlock_rw(struct lock_object *lock);
 
 struct lock_class lock_class_rw = {
 	.lc_name = "rw",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
 	.lc_assert = assert_rw,
 #ifdef DDB
 	.lc_ddb_show = db_show_rwlock,
 #endif
 	.lc_lock = lock_rw,
 	.lc_unlock = unlock_rw,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_rw,
 #endif
 };
 
 #ifdef ADAPTIVE_RWLOCKS
 static int rowner_retries = 10;
 static int rowner_loops = 10000;
 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
     "rwlock debugging");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
 
 static struct lock_delay_config rw_delay = {
 	.initial	= 1000,
 	.step		= 500,
 	.min		= 100,
 	.max		= 5000,
 };
 
 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_initial, CTLFLAG_RW, &rw_delay.initial,
     0, "");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_step, CTLFLAG_RW, &rw_delay.step,
     0, "");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_min, CTLFLAG_RW, &rw_delay.min,
     0, "");
 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
     0, "");
 
 static void
 rw_delay_sysinit(void *dummy)
 {
 
 	rw_delay.initial = mp_ncpus * 25;
 	rw_delay.step = (mp_ncpus * 25) / 2;
 	rw_delay.min = mp_ncpus * 5;
 	rw_delay.max = mp_ncpus * 25 * 10;
 }
 LOCK_DELAY_SYSINIT(rw_delay_sysinit);
 #endif
 
 /*
  * Return a pointer to the owning thread if the lock is write-locked or
  * NULL if the lock is unlocked or read-locked.
  */
 #define	rw_wowner(rw)							\
 	((rw)->rw_lock & RW_LOCK_READ ? NULL :				\
 	    (struct thread *)RW_OWNER((rw)->rw_lock))
 
 /*
  * Returns if a write owner is recursed.  Write ownership is not assured
  * here and should be previously checked.
  */
 #define	rw_recursed(rw)		((rw)->rw_recurse != 0)
 
 /*
  * Return true if curthread helds the lock.
  */
 #define	rw_wlocked(rw)		(rw_wowner((rw)) == curthread)
 
 /*
  * Return a pointer to the owning thread for this lock who should receive
  * any priority lent by threads that block on this lock.  Currently this
  * is identical to rw_wowner().
  */
 #define	rw_owner(rw)		rw_wowner(rw)
 
 #ifndef INVARIANTS
 #define	__rw_assert(c, what, file, line)
 #endif
 
 void
 assert_rw(const struct lock_object *lock, int what)
 {
 
 	rw_assert((const struct rwlock *)lock, what);
 }
 
 void
 lock_rw(struct lock_object *lock, uintptr_t how)
 {
 	struct rwlock *rw;
 
 	rw = (struct rwlock *)lock;
 	if (how)
 		rw_rlock(rw);
 	else
 		rw_wlock(rw);
 }
 
 uintptr_t
 unlock_rw(struct lock_object *lock)
 {
 	struct rwlock *rw;
 
 	rw = (struct rwlock *)lock;
 	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
 	if (rw->rw_lock & RW_LOCK_READ) {
 		rw_runlock(rw);
 		return (1);
 	} else {
 		rw_wunlock(rw);
 		return (0);
 	}
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_rw(const struct lock_object *lock, struct thread **owner)
 {
 	const struct rwlock *rw = (const struct rwlock *)lock;
 	uintptr_t x = rw->rw_lock;
 
 	*owner = rw_wowner(rw);
 	return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
 	    (*owner != NULL));
 }
 #endif
 
 void
 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
 {
 	struct rwlock *rw;
 	int flags;
 
 	rw = rwlock2rw(c);
 
 	MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
 	    RW_RECURSE | RW_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
 	    ("%s: rw_lock not aligned for %s: %p", __func__, name,
 	    &rw->rw_lock));
 
 	flags = LO_UPGRADABLE;
 	if (opts & RW_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & RW_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (!(opts & RW_NOWITNESS))
 		flags |= LO_WITNESS;
 	if (opts & RW_RECURSE)
 		flags |= LO_RECURSABLE;
 	if (opts & RW_QUIET)
 		flags |= LO_QUIET;
 	if (opts & RW_NEW)
 		flags |= LO_NEW;
 
 	lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
 	rw->rw_lock = RW_UNLOCKED;
 	rw->rw_recurse = 0;
 }
 
 void
 _rw_destroy(volatile uintptr_t *c)
 {
 	struct rwlock *rw;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
 	KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
 	rw->rw_lock = RW_DESTROYED;
 	lock_destroy(&rw->lock_object);
 }
 
 void
 rw_sysinit(void *arg)
 {
 	struct rw_args *args = arg;
 
 	rw_init((struct rwlock *)args->ra_rw, args->ra_desc);
 }
 
 void
 rw_sysinit_flags(void *arg)
 {
 	struct rw_args_flags *args = arg;
 
 	rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
 	    args->ra_flags);
 }
 
 int
 _rw_wowned(const volatile uintptr_t *c)
 {
 
 	return (rw_wowner(rwlock2rw(c)) == curthread);
 }
 
 void
 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
 	__rw_wlock(rw, curthread, file, line);
 	LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
 	WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	TD_LOCKS_INC(curthread);
 }
 
 int
 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	int rval;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
 
 	if (rw_wlocked(rw) &&
 	    (rw->lock_object.lo_flags & LO_RECURSABLE) != 0) {
 		rw->rw_recurse++;
 		rval = 1;
 	} else
 		rval = atomic_cmpset_acq_ptr(&rw->rw_lock, RW_UNLOCKED,
 		    (uintptr_t)curthread);
 
 	LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		if (!rw_recursed(rw))
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
 			    rw, 0, 0, file, line, LOCKSTAT_WRITER);
 		TD_LOCKS_INC(curthread);
 	}
 	return (rval);
 }
 
 void
 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_WLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
 	    line);
 	__rw_wunlock(rw, curthread, file, line);
 	TD_LOCKS_DEC(curthread);
 }
 
 /*
  * Determines whether a new reader can acquire a lock.  Succeeds if the
  * reader already owns a read lock and the lock is locked for read to
  * prevent deadlock from reader recursion.  Also succeeds if the lock
  * is unlocked and has no writer waiters or spinners.  Failing otherwise
  * prioritizes writers before readers.
  */
 #define	RW_CAN_READ(_rw)						\
     ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) &	\
     (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) ==	\
     RW_LOCK_READ)
 
 void
 __rw_rlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 #ifdef ADAPTIVE_RWLOCKS
 	volatile struct thread *owner;
 	int spintries = 0;
 	int i;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	uintptr_t v;
 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	uintptr_t state;
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
-#if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
+#if defined(ADAPTIVE_RWLOCKS)
 	lock_delay_arg_init(&lda, &rw_delay);
+#elif defined(KDTRACE_HOOKS)
+	lock_delay_arg_init(&lda, NULL);
 #endif
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
 	KASSERT(rw_wowner(rw) != curthread,
 	    ("rw_rlock: wlock already held for %s @ %s:%d",
 	    rw->lock_object.lo_name, file, line));
 	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
 
 #ifdef KDTRACE_HOOKS
 	all_time -= lockstat_nsecs(&rw->lock_object);
 	state = rw->rw_lock;
 #endif
 	for (;;) {
 		/*
 		 * Handle the easy case.  If no other thread has a write
 		 * lock, then try to bump up the count of read locks.  Note
 		 * that we have to preserve the current state of the
 		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
 		 * read lock, then rw_lock must have changed, so restart
 		 * the loop.  Note that this handles the case of a
 		 * completely unlocked rwlock since such a lock is encoded
 		 * as a read lock with no waiters.
 		 */
 		v = rw->rw_lock;
 		if (RW_CAN_READ(v)) {
 			/*
 			 * The RW_LOCK_READ_WAITERS flag should only be set
 			 * if the lock has been unlocked and write waiters
 			 * were present.
 			 */
 			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v,
 			    v + RW_ONE_READER)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeed %p -> %p", __func__,
 					    rw, (void *)v,
 					    (void *)(v + RW_ONE_READER));
 				break;
 			}
 			continue;
 		}
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 		lock_profile_obtain_lock_failed(&rw->lock_object,
 		    &contested, &waittime);
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if ((v & RW_LOCK_READ) == 0) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, rw, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", rw->lock_object.lo_name);
 				while ((struct thread*)RW_OWNER(rw->rw_lock) ==
 				    owner && TD_IS_RUNNING(owner))
 					lock_delay(&lda);
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				continue;
 			}
 		} else if (spintries < rowner_retries) {
 			spintries++;
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			for (i = 0; i < rowner_loops; i++) {
 				v = rw->rw_lock;
 				if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v))
 					break;
 				cpu_spinwait();
 			}
 #ifdef KDTRACE_HOOKS
 			lda.spin_cnt += rowner_loops - i;
 #endif
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			if (i != rowner_loops)
 				continue;
 		}
 #endif
 
 		/*
 		 * Okay, now it's the hard case.  Some other thread already
 		 * has a write lock or there are write waiters present,
 		 * acquire the turnstile lock so we can begin the process
 		 * of blocking.
 		 */
 		ts = turnstile_trywait(&rw->lock_object);
 
 		/*
 		 * The lock might have been released while we spun, so
 		 * recheck its state and restart the loop if needed.
 		 */
 		v = rw->rw_lock;
 		if (RW_CAN_READ(v)) {
 			turnstile_cancel(ts);
 			continue;
 		}
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		if ((v & RW_LOCK_READ) == 0) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * The lock is held in write mode or it already has waiters.
 		 */
 		MPASS(!RW_CAN_READ(v));
 
 		/*
 		 * If the RW_LOCK_READ_WAITERS flag is already set, then
 		 * we can go ahead and block.  If it is not set then try
 		 * to set it.  If we fail to set it drop the turnstile
 		 * lock and restart the loop.
 		 */
 		if (!(v & RW_LOCK_READ_WAITERS)) {
 			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
 			    v | RW_LOCK_READ_WAITERS)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set read waiters flag",
 				    __func__, rw);
 		}
 
 		/*
 		 * We were unable to acquire the lock and the read waiters
 		 * flag is set, so we must block on the turnstile.
 		 */
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&rw->lock_object);
 #endif
 		turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&rw->lock_object);
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 			    __func__, rw);
 	}
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&rw->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
 		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
 	/* Record only the loops spinning and not sleeping. */
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
 		    LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 #endif
 	/*
 	 * TODO: acquire "owner of record" here.  Here be turnstile dragons
 	 * however.  turnstiles don't like owners changing between calls to
 	 * turnstile_wait() currently.
 	 */
 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 	    waittime, file, line, LOCKSTAT_READER);
 	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
 	WITNESS_LOCK(&rw->lock_object, 0, file, line);
 	TD_LOCKS_INC(curthread);
 	curthread->td_rw_rlocks++;
 }
 
 int
 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	uintptr_t x;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	rw = rwlock2rw(c);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
 	    curthread, rw->lock_object.lo_name, file, line));
 
 	for (;;) {
 		x = rw->rw_lock;
 		KASSERT(rw->rw_lock != RW_DESTROYED,
 		    ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
 		if (!(x & RW_LOCK_READ))
 			break;
 		if (atomic_cmpset_acq_ptr(&rw->rw_lock, x, x + RW_ONE_READER)) {
 			LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
 			    line);
 			WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
 			    rw, 0, 0, file, line, LOCKSTAT_READER);
 			TD_LOCKS_INC(curthread);
 			curthread->td_rw_rlocks++;
 			return (1);
 		}
 	}
 
 	LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
 	return (0);
 }
 
 void
 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t x, v, queue;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_RLOCKED, file, line);
 	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
 
 	/* TODO: drop "owner of record" here. */
 
 	for (;;) {
 		/*
 		 * See if there is more than one read lock held.  If so,
 		 * just drop one and return.
 		 */
 		x = rw->rw_lock;
 		if (RW_READERS(x) > 1) {
 			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
 			    x - RW_ONE_READER)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeeded %p -> %p",
 					    __func__, rw, (void *)x,
 					    (void *)(x - RW_ONE_READER));
 				break;
 			}
 			continue;
 		}
 		/*
 		 * If there aren't any waiters for a write lock, then try
 		 * to drop it quickly.
 		 */
 		if (!(x & RW_LOCK_WAITERS)) {
 			MPASS((x & ~RW_LOCK_WRITE_SPINNER) ==
 			    RW_READERS_LOCK(1));
 			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
 			    RW_UNLOCKED)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p last succeeded",
 					    __func__, rw);
 				break;
 			}
 			continue;
 		}
 		/*
 		 * Ok, we know we have waiters and we think we are the
 		 * last reader, so grab the turnstile lock.
 		 */
 		turnstile_chain_lock(&rw->lock_object);
 		v = rw->rw_lock & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 		MPASS(v & RW_LOCK_WAITERS);
 
 		/*
 		 * Try to drop our lock leaving the lock in a unlocked
 		 * state.
 		 *
 		 * If you wanted to do explicit lock handoff you'd have to
 		 * do it here.  You'd also want to use turnstile_signal()
 		 * and you'd have to handle the race where a higher
 		 * priority thread blocks on the write lock before the
 		 * thread you wakeup actually runs and have the new thread
 		 * "steal" the lock.  For now it's a lot simpler to just
 		 * wakeup all of the waiters.
 		 *
 		 * As above, if we fail, then another thread might have
 		 * acquired a read lock, so drop the turnstile lock and
 		 * restart.
 		 */
 		x = RW_UNLOCKED;
 		if (v & RW_LOCK_WRITE_WAITERS) {
 			queue = TS_EXCLUSIVE_QUEUE;
 			x |= (v & RW_LOCK_READ_WAITERS);
 		} else
 			queue = TS_SHARED_QUEUE;
 		if (!atomic_cmpset_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v,
 		    x)) {
 			turnstile_chain_unlock(&rw->lock_object);
 			continue;
 		}
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
 			    __func__, rw);
 
 		/*
 		 * Ok.  The lock is released and all that's left is to
 		 * wake up the waiters.  Note that the lock might not be
 		 * free anymore, but in that case the writers will just
 		 * block again if they run before the new lock holder(s)
 		 * release the lock.
 		 */
 		ts = turnstile_lookup(&rw->lock_object);
 		MPASS(ts != NULL);
 		turnstile_broadcast(ts, queue);
 		turnstile_unpend(ts, TS_SHARED_LOCK);
 		turnstile_chain_unlock(&rw->lock_object);
 		break;
 	}
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER);
 	TD_LOCKS_DEC(curthread);
 	curthread->td_rw_rlocks--;
 }
 
 /*
  * This function is called when we are unable to obtain a write lock on the
  * first try.  This means that at least one other thread holds either a
  * read or write lock.
  */
 void
 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
     int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 #ifdef ADAPTIVE_RWLOCKS
 	volatile struct thread *owner;
 	int spintries = 0;
 	int i;
 #endif
 	uintptr_t v, x;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	uintptr_t state;
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return;
 
-#if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
+#if defined(ADAPTIVE_RWLOCKS)
 	lock_delay_arg_init(&lda, &rw_delay);
+#elif defined(KDTRACE_HOOKS)
+	lock_delay_arg_init(&lda, NULL);
 #endif
 	rw = rwlock2rw(c);
 
 	if (rw_wlocked(rw)) {
 		KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
 		    ("%s: recursing but non-recursive rw %s @ %s:%d\n",
 		    __func__, rw->lock_object.lo_name, file, line));
 		rw->rw_recurse++;
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
 		return;
 	}
 
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
 		    rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
 
 #ifdef KDTRACE_HOOKS
 	all_time -= lockstat_nsecs(&rw->lock_object);
 	state = rw->rw_lock;
 #endif
 	for (;;) {
 		if (rw->rw_lock == RW_UNLOCKED && _rw_write_lock(rw, tid))
 			break;
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 		lock_profile_obtain_lock_failed(&rw->lock_object,
 		    &contested, &waittime);
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * If the lock is write locked and the owner is
 		 * running on another CPU, spin until the owner stops
 		 * running or the state of the lock changes.
 		 */
 		v = rw->rw_lock;
 		owner = (struct thread *)RW_OWNER(v);
 		if (!(v & RW_LOCK_READ) && TD_IS_RUNNING(owner)) {
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
 				    __func__, rw, owner);
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
 			    TD_IS_RUNNING(owner))
 				lock_delay(&lda);
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 			continue;
 		}
 		if ((v & RW_LOCK_READ) && RW_READERS(v) &&
 		    spintries < rowner_retries) {
 			if (!(v & RW_LOCK_WRITE_SPINNER)) {
 				if (!atomic_cmpset_ptr(&rw->rw_lock, v,
 				    v | RW_LOCK_WRITE_SPINNER)) {
 					continue;
 				}
 			}
 			spintries++;
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
 			for (i = 0; i < rowner_loops; i++) {
 				if ((rw->rw_lock & RW_LOCK_WRITE_SPINNER) == 0)
 					break;
 				cpu_spinwait();
 			}
 			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "running");
 #ifdef KDTRACE_HOOKS
 			lda.spin_cnt += rowner_loops - i;
 #endif
 			if (i != rowner_loops)
 				continue;
 		}
 #endif
 		ts = turnstile_trywait(&rw->lock_object);
 		v = rw->rw_lock;
 
 #ifdef ADAPTIVE_RWLOCKS
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the turnstile
 		 * chain lock.  If so, drop the turnstile lock and try
 		 * again.
 		 */
 		if (!(v & RW_LOCK_READ)) {
 			owner = (struct thread *)RW_OWNER(v);
 			if (TD_IS_RUNNING(owner)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 		}
 #endif
 		/*
 		 * Check for the waiters flags about this rwlock.
 		 * If the lock was released, without maintain any pending
 		 * waiters queue, simply try to acquire it.
 		 * If a pending waiters queue is present, claim the lock
 		 * ownership and maintain the pending queue.
 		 */
 		x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 		if ((v & ~x) == RW_UNLOCKED) {
 			x &= ~RW_LOCK_WRITE_SPINNER;
 			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid | x)) {
 				if (x)
 					turnstile_claim(ts);
 				else
 					turnstile_cancel(ts);
 				break;
 			}
 			turnstile_cancel(ts);
 			continue;
 		}
 		/*
 		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
 		 * set it.  If we fail to set it, then loop back and try
 		 * again.
 		 */
 		if (!(v & RW_LOCK_WRITE_WAITERS)) {
 			if (!atomic_cmpset_ptr(&rw->rw_lock, v,
 			    v | RW_LOCK_WRITE_WAITERS)) {
 				turnstile_cancel(ts);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&rw->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
 				    __func__, rw);
 		}
 		/*
 		 * We were unable to acquire the lock and the write waiters
 		 * flag is set, so we must block on the turnstile.
 		 */
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 			    rw);
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&rw->lock_object);
 #endif
 		turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&rw->lock_object);
 		sleep_cnt++;
 #endif
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 			    __func__, rw);
 #ifdef ADAPTIVE_RWLOCKS
 		spintries = 0;
 #endif
 	}
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&rw->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
 		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 
 	/* Record only the loops spinning and not sleeping. */
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
 		    LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 		    (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 #endif
 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 	    waittime, file, line, LOCKSTAT_WRITER);
 }
 
 /*
  * This function is called if the first try at releasing a write lock failed.
  * This means that one of the 2 waiter bits must be set indicating that at
  * least one thread is waiting on this lock.
  */
 void
 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
     int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t v;
 	int queue;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	if (rw_wlocked(rw) && rw_recursed(rw)) {
 		rw->rw_recurse--;
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
 		return;
 	}
 
 	KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
 	    ("%s: neither of the waiter flags are set", __func__));
 
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
 
 	turnstile_chain_lock(&rw->lock_object);
 	ts = turnstile_lookup(&rw->lock_object);
 	MPASS(ts != NULL);
 
 	/*
 	 * Use the same algo as sx locks for now.  Prefer waking up shared
 	 * waiters if we have any over writers.  This is probably not ideal.
 	 *
 	 * 'v' is the value we are going to write back to rw_lock.  If we
 	 * have waiters on both queues, we need to preserve the state of
 	 * the waiter flag for the queue we don't wake up.  For now this is
 	 * hardcoded for the algorithm mentioned above.
 	 *
 	 * In the case of both readers and writers waiting we wakeup the
 	 * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
 	 * new writer comes in before a reader it will claim the lock up
 	 * above.  There is probably a potential priority inversion in
 	 * there that could be worked around either by waking both queues
 	 * of waiters or doing some complicated lock handoff gymnastics.
 	 */
 	v = RW_UNLOCKED;
 	if (rw->rw_lock & RW_LOCK_WRITE_WAITERS) {
 		queue = TS_EXCLUSIVE_QUEUE;
 		v |= (rw->rw_lock & RW_LOCK_READ_WAITERS);
 	} else
 		queue = TS_SHARED_QUEUE;
 
 	/* Wake up all waiters for the specific queue. */
 	if (LOCK_LOG_TEST(&rw->lock_object, 0))
 		CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
 		    queue == TS_SHARED_QUEUE ? "read" : "write");
 	turnstile_broadcast(ts, queue);
 	atomic_store_rel_ptr(&rw->rw_lock, v);
 	turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 	turnstile_chain_unlock(&rw->lock_object);
 }
 
 /*
  * Attempt to do a non-blocking upgrade from a read lock to a write
  * lock.  This will only succeed if this thread holds a single read
  * lock.  Returns true if the upgrade succeeded and false otherwise.
  */
 int
 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	uintptr_t v, x, tid;
 	struct turnstile *ts;
 	int success;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_RLOCKED, file, line);
 
 	/*
 	 * Attempt to switch from one reader to a writer.  If there
 	 * are any write waiters, then we will have to lock the
 	 * turnstile first to prevent races with another writer
 	 * calling turnstile_wait() before we have claimed this
 	 * turnstile.  So, do the simple case of no waiters first.
 	 */
 	tid = (uintptr_t)curthread;
 	success = 0;
 	for (;;) {
 		v = rw->rw_lock;
 		if (RW_READERS(v) > 1)
 			break;
 		if (!(v & RW_LOCK_WAITERS)) {
 			success = atomic_cmpset_ptr(&rw->rw_lock, v, tid);
 			if (!success)
 				continue;
 			break;
 		}
 
 		/*
 		 * Ok, we think we have waiters, so lock the turnstile.
 		 */
 		ts = turnstile_trywait(&rw->lock_object);
 		v = rw->rw_lock;
 		if (RW_READERS(v) > 1) {
 			turnstile_cancel(ts);
 			break;
 		}
 		/*
 		 * Try to switch from one reader to a writer again.  This time
 		 * we honor the current state of the waiters flags.
 		 * If we obtain the lock with the flags set, then claim
 		 * ownership of the turnstile.
 		 */
 		x = rw->rw_lock & RW_LOCK_WAITERS;
 		success = atomic_cmpset_ptr(&rw->rw_lock, v, tid | x);
 		if (success) {
 			if (x)
 				turnstile_claim(ts);
 			else
 				turnstile_cancel(ts);
 			break;
 		}
 		turnstile_cancel(ts);
 	}
 	LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
 	if (success) {
 		curthread->td_rw_rlocks--;
 		WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		LOCKSTAT_RECORD0(rw__upgrade, rw);
 	}
 	return (success);
 }
 
 /*
  * Downgrade a write lock into a single read lock.
  */
 void
 __rw_downgrade(volatile uintptr_t *c, const char *file, int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
 	uintptr_t tid, v;
 	int rwait, wwait;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	rw = rwlock2rw(c);
 
 	KASSERT(rw->rw_lock != RW_DESTROYED,
 	    ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
 	__rw_assert(c, RA_WLOCKED | RA_NOTRECURSED, file, line);
 #ifndef INVARIANTS
 	if (rw_recursed(rw))
 		panic("downgrade of a recursed lock");
 #endif
 
 	WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
 
 	/*
 	 * Convert from a writer to a single reader.  First we handle
 	 * the easy case with no waiters.  If there are any waiters, we
 	 * lock the turnstile and "disown" the lock.
 	 */
 	tid = (uintptr_t)curthread;
 	if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
 		goto out;
 
 	/*
 	 * Ok, we think we have waiters, so lock the turnstile so we can
 	 * read the waiter flags without any races.
 	 */
 	turnstile_chain_lock(&rw->lock_object);
 	v = rw->rw_lock & RW_LOCK_WAITERS;
 	rwait = v & RW_LOCK_READ_WAITERS;
 	wwait = v & RW_LOCK_WRITE_WAITERS;
 	MPASS(rwait | wwait);
 
 	/*
 	 * Downgrade from a write lock while preserving waiters flag
 	 * and give up ownership of the turnstile.
 	 */
 	ts = turnstile_lookup(&rw->lock_object);
 	MPASS(ts != NULL);
 	if (!wwait)
 		v &= ~RW_LOCK_READ_WAITERS;
 	atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
 	/*
 	 * Wake other readers if there are no writers pending.  Otherwise they
 	 * won't be able to acquire the lock anyway.
 	 */
 	if (rwait && !wwait) {
 		turnstile_broadcast(ts, TS_SHARED_QUEUE);
 		turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 	} else
 		turnstile_disown(ts);
 	turnstile_chain_unlock(&rw->lock_object);
 out:
 	curthread->td_rw_rlocks++;
 	LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
 	LOCKSTAT_RECORD0(rw__downgrade, rw);
 }
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
 #undef __rw_assert
 #endif
 
 /*
  * In the non-WITNESS case, rw_assert() can only detect that at least
  * *some* thread owns an rlock, but it cannot guarantee that *this*
  * thread owns an rlock.
  */
 void
 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
 	const struct rwlock *rw;
 
 	if (panicstr != NULL)
 		return;
 
 	rw = rwlock2rw(c);
 
 	switch (what) {
 	case RA_LOCKED:
 	case RA_LOCKED | RA_RECURSED:
 	case RA_LOCKED | RA_NOTRECURSED:
 	case RA_RLOCKED:
 	case RA_RLOCKED | RA_RECURSED:
 	case RA_RLOCKED | RA_NOTRECURSED:
 #ifdef WITNESS
 		witness_assert(&rw->lock_object, what, file, line);
 #else
 		/*
 		 * If some other thread has a write lock or we have one
 		 * and are asserting a read lock, fail.  Also, if no one
 		 * has a lock at all, fail.
 		 */
 		if (rw->rw_lock == RW_UNLOCKED ||
 		    (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
 		    rw_wowner(rw) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    rw->lock_object.lo_name, (what & RA_RLOCKED) ?
 			    "read " : "", file, line);
 
 		if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
 			if (rw_recursed(rw)) {
 				if (what & RA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
 					    rw->lock_object.lo_name, file,
 					    line);
 			} else if (what & RA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    rw->lock_object.lo_name, file, line);
 		}
 #endif
 		break;
 	case RA_WLOCKED:
 	case RA_WLOCKED | RA_RECURSED:
 	case RA_WLOCKED | RA_NOTRECURSED:
 		if (rw_wowner(rw) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 		if (rw_recursed(rw)) {
 			if (what & RA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    rw->lock_object.lo_name, file, line);
 		} else if (what & RA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 		break;
 	case RA_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&rw->lock_object, what, file, line);
 #else
 		/*
 		 * If we hold a write lock fail.  We can't reliably check
 		 * to see if we hold a read lock or not.
 		 */
 		if (rw_wowner(rw) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    rw->lock_object.lo_name, file, line);
 #endif
 		break;
 	default:
 		panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
 		    line);
 	}
 }
 #endif /* INVARIANT_SUPPORT */
 
 #ifdef DDB
 void
 db_show_rwlock(const struct lock_object *lock)
 {
 	const struct rwlock *rw;
 	struct thread *td;
 
 	rw = (const struct rwlock *)lock;
 
 	db_printf(" state: ");
 	if (rw->rw_lock == RW_UNLOCKED)
 		db_printf("UNLOCKED\n");
 	else if (rw->rw_lock == RW_DESTROYED) {
 		db_printf("DESTROYED\n");
 		return;
 	} else if (rw->rw_lock & RW_LOCK_READ)
 		db_printf("RLOCK: %ju locks\n",
 		    (uintmax_t)(RW_READERS(rw->rw_lock)));
 	else {
 		td = rw_wowner(rw);
 		db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (rw_recursed(rw))
 			db_printf(" recursed: %u\n", rw->rw_recurse);
 	}
 	db_printf(" waiters: ");
 	switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
 	case RW_LOCK_READ_WAITERS:
 		db_printf("readers\n");
 		break;
 	case RW_LOCK_WRITE_WAITERS:
 		db_printf("writers\n");
 		break;
 	case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
 		db_printf("readers and writers\n");
 		break;
 	default:
 		db_printf("none\n");
 		break;
 	}
 }
 
 #endif
Index: user/alc/PQ_LAUNDRY/sys/kern/kern_sx.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/kern/kern_sx.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/kern/kern_sx.c	(revision 303667)
@@ -1,1289 +1,1293 @@
 /*-
  * Copyright (c) 2007 Attilio Rao <attilio@freebsd.org>
  * Copyright (c) 2001 Jason Evans <jasone@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice(s), this list of conditions and the following disclaimer as
  *    the first lines of this file unmodified other than the possible
  *    addition of one or more copyright notices.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice(s), this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  */
 
 /*
  * Shared/exclusive locks.  This implementation attempts to ensure
  * deterministic lock granting behavior, so that slocks and xlocks are
  * interleaved.
  *
  * Priority propagation will not generally raise the priority of lock holders,
  * so should not be relied upon in combination with sx locks.
  */
 
 #include "opt_ddb.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_no_adaptive_sx.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/sx.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
 #include <machine/cpu.h>
 #endif
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_SX)
 #define	ADAPTIVE_SX
 #endif
 
 CTASSERT((SX_NOADAPTIVE & LO_CLASSFLAGS) == SX_NOADAPTIVE);
 
 #ifdef HWPMC_HOOKS
 #include <sys/pmckern.h>
 PMC_SOFT_DECLARE( , , lock, failed);
 #endif
 
 /* Handy macros for sleep queues. */
 #define	SQ_EXCLUSIVE_QUEUE	0
 #define	SQ_SHARED_QUEUE		1
 
 /*
  * Variations on DROP_GIANT()/PICKUP_GIANT() for use in this file.  We
  * drop Giant anytime we have to sleep or if we adaptively spin.
  */
 #define	GIANT_DECLARE							\
 	int _giantcnt = 0;						\
 	WITNESS_SAVE_DECL(Giant)					\
 
 #define	GIANT_SAVE() do {						\
 	if (mtx_owned(&Giant)) {					\
 		WITNESS_SAVE(&Giant.lock_object, Giant);		\
 		while (mtx_owned(&Giant)) {				\
 			_giantcnt++;					\
 			mtx_unlock(&Giant);				\
 		}							\
 	}								\
 } while (0)
 
 #define GIANT_RESTORE() do {						\
 	if (_giantcnt > 0) {						\
 		mtx_assert(&Giant, MA_NOTOWNED);			\
 		while (_giantcnt--)					\
 			mtx_lock(&Giant);				\
 		WITNESS_RESTORE(&Giant.lock_object, Giant);		\
 	}								\
 } while (0)
 
 /*
  * Returns true if an exclusive lock is recursed.  It assumes
  * curthread currently has an exclusive lock.
  */
 #define	sx_recursed(sx)		((sx)->sx_recurse != 0)
 
 static void	assert_sx(const struct lock_object *lock, int what);
 #ifdef DDB
 static void	db_show_sx(const struct lock_object *lock);
 #endif
 static void	lock_sx(struct lock_object *lock, uintptr_t how);
 #ifdef KDTRACE_HOOKS
 static int	owner_sx(const struct lock_object *lock, struct thread **owner);
 #endif
 static uintptr_t unlock_sx(struct lock_object *lock);
 
 struct lock_class lock_class_sx = {
 	.lc_name = "sx",
 	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
 	.lc_assert = assert_sx,
 #ifdef DDB
 	.lc_ddb_show = db_show_sx,
 #endif
 	.lc_lock = lock_sx,
 	.lc_unlock = unlock_sx,
 #ifdef KDTRACE_HOOKS
 	.lc_owner = owner_sx,
 #endif
 };
 
 #ifndef INVARIANTS
 #define	_sx_assert(sx, what, file, line)
 #endif
 
 #ifdef ADAPTIVE_SX
 static u_int asx_retries = 10;
 static u_int asx_loops = 10000;
 static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging");
 SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
 SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
 
 static struct lock_delay_config sx_delay = {
 	.initial	= 1000,
 	.step           = 500,
 	.min		= 100,
 	.max		= 5000,
 };
 
 SYSCTL_INT(_debug_sx, OID_AUTO, delay_initial, CTLFLAG_RW, &sx_delay.initial,
     0, "");
 SYSCTL_INT(_debug_sx, OID_AUTO, delay_step, CTLFLAG_RW, &sx_delay.step,
     0, "");
 SYSCTL_INT(_debug_sx, OID_AUTO, delay_min, CTLFLAG_RW, &sx_delay.min,
     0, "");
 SYSCTL_INT(_debug_sx, OID_AUTO, delay_max, CTLFLAG_RW, &sx_delay.max,
     0, "");
 
 static void
 sx_delay_sysinit(void *dummy)
 {
 
 	sx_delay.initial = mp_ncpus * 25;
 	sx_delay.step = (mp_ncpus * 25) / 2;
 	sx_delay.min = mp_ncpus * 5;
 	sx_delay.max = mp_ncpus * 25 * 10;
 }
 LOCK_DELAY_SYSINIT(sx_delay_sysinit);
 #endif
 
 void
 assert_sx(const struct lock_object *lock, int what)
 {
 
 	sx_assert((const struct sx *)lock, what);
 }
 
 void
 lock_sx(struct lock_object *lock, uintptr_t how)
 {
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
 	if (how)
 		sx_slock(sx);
 	else
 		sx_xlock(sx);
 }
 
 uintptr_t
 unlock_sx(struct lock_object *lock)
 {
 	struct sx *sx;
 
 	sx = (struct sx *)lock;
 	sx_assert(sx, SA_LOCKED | SA_NOTRECURSED);
 	if (sx_xlocked(sx)) {
 		sx_xunlock(sx);
 		return (0);
 	} else {
 		sx_sunlock(sx);
 		return (1);
 	}
 }
 
 #ifdef KDTRACE_HOOKS
 int
 owner_sx(const struct lock_object *lock, struct thread **owner)
 {
         const struct sx *sx = (const struct sx *)lock;
 	uintptr_t x = sx->sx_lock;
 
         *owner = (struct thread *)SX_OWNER(x);
         return ((x & SX_LOCK_SHARED) != 0 ? (SX_SHARERS(x) != 0) :
 	    (*owner != NULL));
 }
 #endif
 
 void
 sx_sysinit(void *arg)
 {
 	struct sx_args *sargs = arg;
 
 	sx_init_flags(sargs->sa_sx, sargs->sa_desc, sargs->sa_flags);
 }
 
 void
 sx_init_flags(struct sx *sx, const char *description, int opts)
 {
 	int flags;
 
 	MPASS((opts & ~(SX_QUIET | SX_RECURSE | SX_NOWITNESS | SX_DUPOK |
 	    SX_NOPROFILE | SX_NOADAPTIVE | SX_NEW)) == 0);
 	ASSERT_ATOMIC_LOAD_PTR(sx->sx_lock,
 	    ("%s: sx_lock not aligned for %s: %p", __func__, description,
 	    &sx->sx_lock));
 
 	flags = LO_SLEEPABLE | LO_UPGRADABLE;
 	if (opts & SX_DUPOK)
 		flags |= LO_DUPOK;
 	if (opts & SX_NOPROFILE)
 		flags |= LO_NOPROFILE;
 	if (!(opts & SX_NOWITNESS))
 		flags |= LO_WITNESS;
 	if (opts & SX_RECURSE)
 		flags |= LO_RECURSABLE;
 	if (opts & SX_QUIET)
 		flags |= LO_QUIET;
 	if (opts & SX_NEW)
 		flags |= LO_NEW;
 
 	flags |= opts & SX_NOADAPTIVE;
 	lock_init(&sx->lock_object, &lock_class_sx, description, NULL, flags);
 	sx->sx_lock = SX_LOCK_UNLOCKED;
 	sx->sx_recurse = 0;
 }
 
 void
 sx_destroy(struct sx *sx)
 {
 
 	KASSERT(sx->sx_lock == SX_LOCK_UNLOCKED, ("sx lock still held"));
 	KASSERT(sx->sx_recurse == 0, ("sx lock still recursed"));
 	sx->sx_lock = SX_LOCK_DESTROYED;
 	lock_destroy(&sx->lock_object);
 }
 
 int
 _sx_slock(struct sx *sx, int opts, const char *file, int line)
 {
 	int error = 0;
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_slock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_slock() of destroyed sx @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
 	error = __sx_slock(sx, opts, file, line);
 	if (!error) {
 		LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
 		WITNESS_LOCK(&sx->lock_object, 0, file, line);
 		TD_LOCKS_INC(curthread);
 	}
 
 	return (error);
 }
 
 int
 sx_try_slock_(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_try_slock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 
 	for (;;) {
 		x = sx->sx_lock;
 		KASSERT(x != SX_LOCK_DESTROYED,
 		    ("sx_try_slock() of destroyed sx @ %s:%d", file, line));
 		if (!(x & SX_LOCK_SHARED))
 			break;
 		if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) {
 			LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line);
 			WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line);
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
 			    sx, 0, 0, file, line, LOCKSTAT_READER);
 			TD_LOCKS_INC(curthread);
 			return (1);
 		}
 	}
 
 	LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 0, file, line);
 	return (0);
 }
 
 int
 _sx_xlock(struct sx *sx, int opts, const char *file, int line)
 {
 	int error = 0;
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_xlock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_xlock() of destroyed sx @ %s:%d", file, line));
 	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
 	    line, NULL);
 	error = __sx_xlock(sx, curthread, opts, file, line);
 	if (!error) {
 		LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse,
 		    file, line);
 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 		TD_LOCKS_INC(curthread);
 	}
 
 	return (error);
 }
 
 int
 sx_try_xlock_(struct sx *sx, const char *file, int line)
 {
 	int rval;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
 	    ("sx_try_xlock() by idle thread %p on sx %s @ %s:%d",
 	    curthread, sx->lock_object.lo_name, file, line));
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_try_xlock() of destroyed sx @ %s:%d", file, line));
 
 	if (sx_xlocked(sx) &&
 	    (sx->lock_object.lo_flags & LO_RECURSABLE) != 0) {
 		sx->sx_recurse++;
 		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		rval = 1;
 	} else
 		rval = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED,
 		    (uintptr_t)curthread);
 	LOCK_LOG_TRY("XLOCK", &sx->lock_object, 0, rval, file, line);
 	if (rval) {
 		WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		if (!sx_recursed(sx))
 			LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire,
 			    sx, 0, 0, file, line, LOCKSTAT_WRITER);
 		TD_LOCKS_INC(curthread);
 	}
 
 	return (rval);
 }
 
 void
 _sx_sunlock(struct sx *sx, const char *file, int line)
 {
 
 	if (SCHEDULER_STOPPED())
 		return;
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_SLOCKED, file, line);
 	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
 	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
 	__sx_sunlock(sx, file, line);
 	TD_LOCKS_DEC(curthread);
 }
 
 void
 _sx_xunlock(struct sx *sx, const char *file, int line)
 {
 
 	if (SCHEDULER_STOPPED())
 		return;
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_xunlock() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_XLOCKED, file, line);
 	WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file,
 	    line);
 	__sx_xunlock(sx, curthread, file, line);
 	TD_LOCKS_DEC(curthread);
 }
 
 /*
  * Try to do a non-blocking upgrade from a shared lock to an exclusive lock.
  * This will only succeed if this thread holds a single shared lock.
  * Return 1 if if the upgrade succeed, 0 otherwise.
  */
 int
 sx_try_upgrade_(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
 	int success;
 
 	if (SCHEDULER_STOPPED())
 		return (1);
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_try_upgrade() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_SLOCKED, file, line);
 
 	/*
 	 * Try to switch from one shared lock to an exclusive lock.  We need
 	 * to maintain the SX_LOCK_EXCLUSIVE_WAITERS flag if set so that
 	 * we will wake up the exclusive waiters when we drop the lock.
 	 */
 	x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS;
 	success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
 	    (uintptr_t)curthread | x);
 	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
 	if (success) {
 		WITNESS_UPGRADE(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 		LOCKSTAT_RECORD0(sx__upgrade, sx);
 	}
 	return (success);
 }
 
 /*
  * Downgrade an unrecursed exclusive lock into a single shared lock.
  */
 void
 sx_downgrade_(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
 	int wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
 	    ("sx_downgrade() of destroyed sx @ %s:%d", file, line));
 	_sx_assert(sx, SA_XLOCKED | SA_NOTRECURSED, file, line);
 #ifndef INVARIANTS
 	if (sx_recursed(sx))
 		panic("downgrade of a recursed lock");
 #endif
 
 	WITNESS_DOWNGRADE(&sx->lock_object, 0, file, line);
 
 	/*
 	 * Try to switch from an exclusive lock with no shared waiters
 	 * to one sharer with no shared waiters.  If there are
 	 * exclusive waiters, we don't need to lock the sleep queue so
 	 * long as we preserve the flag.  We do one quick try and if
 	 * that fails we grab the sleepq lock to keep the flags from
 	 * changing and do it the slow way.
 	 *
 	 * We have to lock the sleep queue if there are shared waiters
 	 * so we can wake them up.
 	 */
 	x = sx->sx_lock;
 	if (!(x & SX_LOCK_SHARED_WAITERS) &&
 	    atomic_cmpset_rel_ptr(&sx->sx_lock, x, SX_SHARERS_LOCK(1) |
 	    (x & SX_LOCK_EXCLUSIVE_WAITERS))) {
 		LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
 		return;
 	}
 
 	/*
 	 * Lock the sleep queue so we can read the waiters bits
 	 * without any races and wakeup any shared waiters.
 	 */
 	sleepq_lock(&sx->lock_object);
 
 	/*
 	 * Preserve SX_LOCK_EXCLUSIVE_WAITERS while downgraded to a single
 	 * shared lock.  If there are any shared waiters, wake them up.
 	 */
 	wakeup_swapper = 0;
 	x = sx->sx_lock;
 	atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
 	    (x & SX_LOCK_EXCLUSIVE_WAITERS));
 	if (x & SX_LOCK_SHARED_WAITERS)
 		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
 		    0, SQ_SHARED_QUEUE);
 	sleepq_release(&sx->lock_object);
 
 	LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
 	LOCKSTAT_RECORD0(sx__downgrade, sx);
 
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_xlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 int
 _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
     int line)
 {
 	GIANT_DECLARE;
 #ifdef ADAPTIVE_SX
 	volatile struct thread *owner;
 	u_int i, spintries = 0;
 #endif
 	uintptr_t x;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	int error = 0;
 #if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef	KDTRACE_HOOKS
 	uintptr_t state;
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
-#if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
+#if defined(ADAPTIVE_SX)
 	lock_delay_arg_init(&lda, &sx_delay);
+#elif defined(KDTRACE_HOOKS)
+	lock_delay_arg_init(&lda, NULL);
 #endif
 
 	/* If we already hold an exclusive lock, then recurse. */
 	if (sx_xlocked(sx)) {
 		KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
 	    ("_sx_xlock_hard: recursed on non-recursive sx %s @ %s:%d\n",
 		    sx->lock_object.lo_name, file, line));
 		sx->sx_recurse++;
 		atomic_set_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p recursing", __func__, sx);
 		return (0);
 	}
 
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
 		    sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
 
 #ifdef KDTRACE_HOOKS
 	all_time -= lockstat_nsecs(&sx->lock_object);
 	state = sx->sx_lock;
 #endif
 	for (;;) {
 		if (sx->sx_lock == SX_LOCK_UNLOCKED &&
 		    atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
 			break;
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
 		    &waittime);
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the lock is write locked and the owner is
 		 * running on another CPU, spin until the owner stops
 		 * running or the state of the lock changes.
 		 */
 		x = sx->sx_lock;
 		if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			if ((x & SX_LOCK_SHARED) == 0) {
 				x = SX_OWNER(x);
 				owner = (struct thread *)x;
 				if (TD_IS_RUNNING(owner)) {
 					if (LOCK_LOG_TEST(&sx->lock_object, 0))
 						CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 						    __func__, sx, owner);
 					KTR_STATE1(KTR_SCHED, "thread",
 					    sched_tdname(curthread), "spinning",
 					    "lockname:\"%s\"",
 					    sx->lock_object.lo_name);
 					GIANT_SAVE();
 					while (SX_OWNER(sx->sx_lock) == x &&
 					    TD_IS_RUNNING(owner))
 						lock_delay(&lda);
 					KTR_STATE0(KTR_SCHED, "thread",
 					    sched_tdname(curthread), "running");
 					continue;
 				}
 			} else if (SX_SHARERS(x) && spintries < asx_retries) {
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", sx->lock_object.lo_name);
 				GIANT_SAVE();
 				spintries++;
 				for (i = 0; i < asx_loops; i++) {
 					if (LOCK_LOG_TEST(&sx->lock_object, 0))
 						CTR4(KTR_LOCK,
 				    "%s: shared spinning on %p with %u and %u",
 						    __func__, sx, spintries, i);
 					x = sx->sx_lock;
 					if ((x & SX_LOCK_SHARED) == 0 ||
 					    SX_SHARERS(x) == 0)
 						break;
 					cpu_spinwait();
 #ifdef KDTRACE_HOOKS
 					lda.spin_cnt++;
 #endif
 				}
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				if (i != asx_loops)
 					continue;
 			}
 		}
 #endif
 
 		sleepq_lock(&sx->lock_object);
 		x = sx->sx_lock;
 
 		/*
 		 * If the lock was released while spinning on the
 		 * sleep queue chain lock, try again.
 		 */
 		if (x == SX_LOCK_UNLOCKED) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * The current lock owner might have started executing
 		 * on another CPU (or the lock could have changed
 		 * owners) while we were waiting on the sleep queue
 		 * chain lock.  If so, drop the sleep queue lock and try
 		 * again.
 		 */
 		if (!(x & SX_LOCK_SHARED) &&
 		    (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			owner = (struct thread *)SX_OWNER(x);
 			if (TD_IS_RUNNING(owner)) {
 				sleepq_release(&sx->lock_object);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * If an exclusive lock was released with both shared
 		 * and exclusive waiters and a shared waiter hasn't
 		 * woken up and acquired the lock yet, sx_lock will be
 		 * set to SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS.
 		 * If we see that value, try to acquire it once.  Note
 		 * that we have to preserve SX_LOCK_EXCLUSIVE_WAITERS
 		 * as there are other exclusive waiters still.  If we
 		 * fail, restart the loop.
 		 */
 		if (x == (SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS)) {
 			if (atomic_cmpset_acq_ptr(&sx->sx_lock,
 			    SX_LOCK_UNLOCKED | SX_LOCK_EXCLUSIVE_WAITERS,
 			    tid | SX_LOCK_EXCLUSIVE_WAITERS)) {
 				sleepq_release(&sx->lock_object);
 				CTR2(KTR_LOCK, "%s: %p claimed by new writer",
 				    __func__, sx);
 				break;
 			}
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 
 		/*
 		 * Try to set the SX_LOCK_EXCLUSIVE_WAITERS.  If we fail,
 		 * than loop back and retry.
 		 */
 		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
 			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
 			    x | SX_LOCK_EXCLUSIVE_WAITERS)) {
 				sleepq_release(&sx->lock_object);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set excl waiters flag",
 				    __func__, sx);
 		}
 
 		/*
 		 * Since we have been unable to acquire the exclusive
 		 * lock and the exclusive waiters flag is set, we have
 		 * to sleep.
 		 */
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
 			    __func__, sx);
 
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&sx->lock_object);
 #endif
 		GIANT_SAVE();
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
 		if (!(opts & SX_INTERRUPTIBLE))
 			sleepq_wait(&sx->lock_object, 0);
 		else
 			error = sleepq_wait_sig(&sx->lock_object, 0);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&sx->lock_object);
 		sleep_cnt++;
 #endif
 		if (error) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK,
 			"%s: interruptible sleep by %p suspended by signal",
 				    __func__, sx);
 			break;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
 			    __func__, sx);
 	}
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&sx->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
 		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
 		    LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 #endif
 	if (!error)
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    contested, waittime, file, line, LOCKSTAT_WRITER);
 	GIANT_RESTORE();
 	return (error);
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_xunlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 void
 _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
 {
 	uintptr_t x;
 	int queue, wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	MPASS(!(sx->sx_lock & SX_LOCK_SHARED));
 
 	/* If the lock is recursed, then unrecurse one level. */
 	if (sx_xlocked(sx) && sx_recursed(sx)) {
 		if ((--sx->sx_recurse) == 0)
 			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, sx);
 		return;
 	}
 	MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS |
 	    SX_LOCK_EXCLUSIVE_WAITERS));
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);
 
 	sleepq_lock(&sx->lock_object);
 	x = SX_LOCK_UNLOCKED;
 
 	/*
 	 * The wake up algorithm here is quite simple and probably not
 	 * ideal.  It gives precedence to shared waiters if they are
 	 * present.  For this condition, we have to preserve the
 	 * state of the exclusive waiters flag.
 	 * If interruptible sleeps left the shared queue empty avoid a
 	 * starvation for the threads sleeping on the exclusive queue by giving
 	 * them precedence and cleaning up the shared waiters bit anyway.
 	 */
 	if ((sx->sx_lock & SX_LOCK_SHARED_WAITERS) != 0 &&
 	    sleepq_sleepcnt(&sx->lock_object, SQ_SHARED_QUEUE) != 0) {
 		queue = SQ_SHARED_QUEUE;
 		x |= (sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS);
 	} else
 		queue = SQ_EXCLUSIVE_QUEUE;
 
 	/* Wake up all the waiters for the specific queue. */
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR3(KTR_LOCK, "%s: %p waking up all threads on %s queue",
 		    __func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
 		    "exclusive");
 	atomic_store_rel_ptr(&sx->sx_lock, x);
 	wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
 	    queue);
 	sleepq_release(&sx->lock_object);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_slock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 int
 _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
 {
 	GIANT_DECLARE;
 #ifdef ADAPTIVE_SX
 	volatile struct thread *owner;
 #endif
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
 	uintptr_t x;
 	int error = 0;
 #if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
 #ifdef KDTRACE_HOOKS
 	uintptr_t state;
 	u_int sleep_cnt = 0;
 	int64_t sleep_time = 0;
 	int64_t all_time = 0;
 #endif
 
 	if (SCHEDULER_STOPPED())
 		return (0);
 
-#if defined(ADAPTIVE_SX) || defined(KDTRACE_HOOKS)
+#if defined(ADAPTIVE_SX)
 	lock_delay_arg_init(&lda, &sx_delay);
+#elif defined(KDTRACE_HOOKS)
+	lock_delay_arg_init(&lda, NULL);
 #endif
 #ifdef KDTRACE_HOOKS
 	state = sx->sx_lock;
 	all_time -= lockstat_nsecs(&sx->lock_object);
 #endif
 
 	/*
 	 * As with rwlocks, we don't make any attempt to try to block
 	 * shared locks once there is an exclusive waiter.
 	 */
 	for (;;) {
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
 		x = sx->sx_lock;
 
 		/*
 		 * If no other thread has an exclusive lock then try to bump up
 		 * the count of sharers.  Since we have to preserve the state
 		 * of SX_LOCK_EXCLUSIVE_WAITERS, if we fail to acquire the
 		 * shared lock loop back and retry.
 		 */
 		if (x & SX_LOCK_SHARED) {
 			MPASS(!(x & SX_LOCK_SHARED_WAITERS));
 			if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
 			    x + SX_ONE_SHARER)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeed %p -> %p", __func__,
 					    sx, (void *)x,
 					    (void *)(x + SX_ONE_SHARER));
 				break;
 			}
 			continue;
 		}
 #ifdef HWPMC_HOOKS
 		PMC_SOFT_CALL( , , lock, failed);
 #endif
 		lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
 		    &waittime);
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if ((sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			x = SX_OWNER(x);
 			owner = (struct thread *)x;
 			if (TD_IS_RUNNING(owner)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR3(KTR_LOCK,
 					    "%s: spinning on %p held by %p",
 					    __func__, sx, owner);
 				KTR_STATE1(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "spinning",
 				    "lockname:\"%s\"", sx->lock_object.lo_name);
 				GIANT_SAVE();
 				while (SX_OWNER(sx->sx_lock) == x &&
 				    TD_IS_RUNNING(owner))
 					lock_delay(&lda);
 				KTR_STATE0(KTR_SCHED, "thread",
 				    sched_tdname(curthread), "running");
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * Some other thread already has an exclusive lock, so
 		 * start the process of blocking.
 		 */
 		sleepq_lock(&sx->lock_object);
 		x = sx->sx_lock;
 
 		/*
 		 * The lock could have been released while we spun.
 		 * In this case loop back and retry.
 		 */
 		if (x & SX_LOCK_SHARED) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 
 #ifdef ADAPTIVE_SX
 		/*
 		 * If the owner is running on another CPU, spin until
 		 * the owner stops running or the state of the lock
 		 * changes.
 		 */
 		if (!(x & SX_LOCK_SHARED) &&
 		    (sx->lock_object.lo_flags & SX_NOADAPTIVE) == 0) {
 			owner = (struct thread *)SX_OWNER(x);
 			if (TD_IS_RUNNING(owner)) {
 				sleepq_release(&sx->lock_object);
 				continue;
 			}
 		}
 #endif
 
 		/*
 		 * Try to set the SX_LOCK_SHARED_WAITERS flag.  If we
 		 * fail to set it drop the sleep queue lock and loop
 		 * back.
 		 */
 		if (!(x & SX_LOCK_SHARED_WAITERS)) {
 			if (!atomic_cmpset_ptr(&sx->sx_lock, x,
 			    x | SX_LOCK_SHARED_WAITERS)) {
 				sleepq_release(&sx->lock_object);
 				continue;
 			}
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK, "%s: %p set shared waiters flag",
 				    __func__, sx);
 		}
 
 		/*
 		 * Since we have been unable to acquire the shared lock,
 		 * we have to sleep.
 		 */
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p blocking on sleep queue",
 			    __func__, sx);
 
 #ifdef KDTRACE_HOOKS
 		sleep_time -= lockstat_nsecs(&sx->lock_object);
 #endif
 		GIANT_SAVE();
 		sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
 		    SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
 		    SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
 		if (!(opts & SX_INTERRUPTIBLE))
 			sleepq_wait(&sx->lock_object, 0);
 		else
 			error = sleepq_wait_sig(&sx->lock_object, 0);
 #ifdef KDTRACE_HOOKS
 		sleep_time += lockstat_nsecs(&sx->lock_object);
 		sleep_cnt++;
 #endif
 		if (error) {
 			if (LOCK_LOG_TEST(&sx->lock_object, 0))
 				CTR2(KTR_LOCK,
 			"%s: interruptible sleep by %p suspended by signal",
 				    __func__, sx);
 			break;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
 			    __func__, sx);
 	}
 #ifdef KDTRACE_HOOKS
 	all_time += lockstat_nsecs(&sx->lock_object);
 	if (sleep_time)
 		LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
 		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 	if (lda.spin_cnt > sleep_cnt)
 		LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
 		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 #endif
 	if (error == 0)
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    contested, waittime, file, line, LOCKSTAT_READER);
 	GIANT_RESTORE();
 	return (error);
 }
 
 /*
  * This function represents the so-called 'hard case' for sx_sunlock
  * operation.  All 'easy case' failures are redirected to this.  Note
  * that ideally this would be a static function, but it needs to be
  * accessible from at least sx.h.
  */
 void
 _sx_sunlock_hard(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
 	int wakeup_swapper;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	for (;;) {
 		x = sx->sx_lock;
 
 		/*
 		 * We should never have sharers while at least one thread
 		 * holds a shared lock.
 		 */
 		KASSERT(!(x & SX_LOCK_SHARED_WAITERS),
 		    ("%s: waiting sharers", __func__));
 
 		/*
 		 * See if there is more than one shared lock held.  If
 		 * so, just drop one and return.
 		 */
 		if (SX_SHARERS(x) > 1) {
 			if (atomic_cmpset_rel_ptr(&sx->sx_lock, x,
 			    x - SX_ONE_SHARER)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeeded %p -> %p",
 					    __func__, sx, (void *)x,
 					    (void *)(x - SX_ONE_SHARER));
 				break;
 			}
 			continue;
 		}
 
 		/*
 		 * If there aren't any waiters for an exclusive lock,
 		 * then try to drop it quickly.
 		 */
 		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
 			MPASS(x == SX_SHARERS_LOCK(1));
 			if (atomic_cmpset_rel_ptr(&sx->sx_lock,
 			    SX_SHARERS_LOCK(1), SX_LOCK_UNLOCKED)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p last succeeded",
 					    __func__, sx);
 				break;
 			}
 			continue;
 		}
 
 		/*
 		 * At this point, there should just be one sharer with
 		 * exclusive waiters.
 		 */
 		MPASS(x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS));
 
 		sleepq_lock(&sx->lock_object);
 
 		/*
 		 * Wake up semantic here is quite simple:
 		 * Just wake up all the exclusive waiters.
 		 * Note that the state of the lock could have changed,
 		 * so if it fails loop back and retry.
 		 */
 		if (!atomic_cmpset_rel_ptr(&sx->sx_lock,
 		    SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS,
 		    SX_LOCK_UNLOCKED)) {
 			sleepq_release(&sx->lock_object);
 			continue;
 		}
 		if (LOCK_LOG_TEST(&sx->lock_object, 0))
 			CTR2(KTR_LOCK, "%s: %p waking up all thread on"
 			    "exclusive queue", __func__, sx);
 		wakeup_swapper = sleepq_broadcast(&sx->lock_object, SLEEPQ_SX,
 		    0, SQ_EXCLUSIVE_QUEUE);
 		sleepq_release(&sx->lock_object);
 		if (wakeup_swapper)
 			kick_proc0();
 		break;
 	}
 }
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
 #undef	_sx_assert
 #endif
 
 /*
  * In the non-WITNESS case, sx_assert() can only detect that at least
  * *some* thread owns an slock, but it cannot guarantee that *this*
  * thread owns an slock.
  */
 void
 _sx_assert(const struct sx *sx, int what, const char *file, int line)
 {
 #ifndef WITNESS
 	int slocked = 0;
 #endif
 
 	if (panicstr != NULL)
 		return;
 	switch (what) {
 	case SA_SLOCKED:
 	case SA_SLOCKED | SA_NOTRECURSED:
 	case SA_SLOCKED | SA_RECURSED:
 #ifndef WITNESS
 		slocked = 1;
 		/* FALLTHROUGH */
 #endif
 	case SA_LOCKED:
 	case SA_LOCKED | SA_NOTRECURSED:
 	case SA_LOCKED | SA_RECURSED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
 		 * If some other thread has an exclusive lock or we
 		 * have one and are asserting a shared lock, fail.
 		 * Also, if no one has a lock at all, fail.
 		 */
 		if (sx->sx_lock == SX_LOCK_UNLOCKED ||
 		    (!(sx->sx_lock & SX_LOCK_SHARED) && (slocked ||
 		    sx_xholder(sx) != curthread)))
 			panic("Lock %s not %slocked @ %s:%d\n",
 			    sx->lock_object.lo_name, slocked ? "share " : "",
 			    file, line);
 
 		if (!(sx->sx_lock & SX_LOCK_SHARED)) {
 			if (sx_recursed(sx)) {
 				if (what & SA_NOTRECURSED)
 					panic("Lock %s recursed @ %s:%d\n",
 					    sx->lock_object.lo_name, file,
 					    line);
 			} else if (what & SA_RECURSED)
 				panic("Lock %s not recursed @ %s:%d\n",
 				    sx->lock_object.lo_name, file, line);
 		}
 #endif
 		break;
 	case SA_XLOCKED:
 	case SA_XLOCKED | SA_NOTRECURSED:
 	case SA_XLOCKED | SA_RECURSED:
 		if (sx_xholder(sx) != curthread)
 			panic("Lock %s not exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 		if (sx_recursed(sx)) {
 			if (what & SA_NOTRECURSED)
 				panic("Lock %s recursed @ %s:%d\n",
 				    sx->lock_object.lo_name, file, line);
 		} else if (what & SA_RECURSED)
 			panic("Lock %s not recursed @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 		break;
 	case SA_UNLOCKED:
 #ifdef WITNESS
 		witness_assert(&sx->lock_object, what, file, line);
 #else
 		/*
 		 * If we hold an exclusve lock fail.  We can't
 		 * reliably check to see if we hold a shared lock or
 		 * not.
 		 */
 		if (sx_xholder(sx) == curthread)
 			panic("Lock %s exclusively locked @ %s:%d\n",
 			    sx->lock_object.lo_name, file, line);
 #endif
 		break;
 	default:
 		panic("Unknown sx lock assertion: %d @ %s:%d", what, file,
 		    line);
 	}
 }
 #endif	/* INVARIANT_SUPPORT */
 
 #ifdef DDB
 static void
 db_show_sx(const struct lock_object *lock)
 {
 	struct thread *td;
 	const struct sx *sx;
 
 	sx = (const struct sx *)lock;
 
 	db_printf(" state: ");
 	if (sx->sx_lock == SX_LOCK_UNLOCKED)
 		db_printf("UNLOCKED\n");
 	else if (sx->sx_lock == SX_LOCK_DESTROYED) {
 		db_printf("DESTROYED\n");
 		return;
 	} else if (sx->sx_lock & SX_LOCK_SHARED)
 		db_printf("SLOCK: %ju\n", (uintmax_t)SX_SHARERS(sx->sx_lock));
 	else {
 		td = sx_xholder(sx);
 		db_printf("XLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 		    td->td_tid, td->td_proc->p_pid, td->td_name);
 		if (sx_recursed(sx))
 			db_printf(" recursed: %d\n", sx->sx_recurse);
 	}
 
 	db_printf(" waiters: ");
 	switch(sx->sx_lock &
 	    (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)) {
 	case SX_LOCK_SHARED_WAITERS:
 		db_printf("shared\n");
 		break;
 	case SX_LOCK_EXCLUSIVE_WAITERS:
 		db_printf("exclusive\n");
 		break;
 	case SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS:
 		db_printf("exclusive and shared\n");
 		break;
 	default:
 		db_printf("none\n");
 	}
 }
 
 /*
  * Check to see if a thread that is blocked on a sleep queue is actually
  * blocked on an sx lock.  If so, output some details and return true.
  * If the lock has an exclusive owner, return that in *ownerp.
  */
 int
 sx_chain(struct thread *td, struct thread **ownerp)
 {
 	struct sx *sx;
 
 	/*
 	 * Check to see if this thread is blocked on an sx lock.
 	 * First, we check the lock class.  If that is ok, then we
 	 * compare the lock name against the wait message.
 	 */
 	sx = td->td_wchan;
 	if (LOCK_CLASS(&sx->lock_object) != &lock_class_sx ||
 	    sx->lock_object.lo_name != td->td_wmesg)
 		return (0);
 
 	/* We think we have an sx lock, so output some details. */
 	db_printf("blocked on sx \"%s\" ", td->td_wmesg);
 	*ownerp = sx_xholder(sx);
 	if (sx->sx_lock & SX_LOCK_SHARED)
 		db_printf("SLOCK (count %ju)\n",
 		    (uintmax_t)SX_SHARERS(sx->sx_lock));
 	else
 		db_printf("XLOCK\n");
 	return (1);
 }
 #endif
Index: user/alc/PQ_LAUNDRY/sys/netinet/tcp_lro.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/netinet/tcp_lro.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/netinet/tcp_lro.c	(revision 303667)
@@ -1,876 +1,941 @@
 /*-
  * Copyright (c) 2007, Myricom Inc.
  * Copyright (c) 2008, Intel Corporation.
  * Copyright (c) 2012 The FreeBSD Foundation
  * Copyright (c) 2016 Mellanox Technologies.
  * All rights reserved.
  *
  * Portions of this software were developed by Bjoern Zeeb
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/ethernet.h>
 #include <net/vnet.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip6.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_lro.h>
 
 #include <netinet6/ip6_var.h>
 
 #include <machine/in_cksum.h>
 
 static MALLOC_DEFINE(M_LRO, "LRO", "LRO control structures");
 
 #define	TCP_LRO_UPDATE_CSUM	1
 #ifndef	TCP_LRO_UPDATE_CSUM
 #define	TCP_LRO_INVALID_CSUM	0x0000
 #endif
 
 static void	tcp_lro_rx_done(struct lro_ctrl *lc);
+static int	tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m,
+		    uint32_t csum, int use_hash);
 
 static __inline void
-tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_entry *le)
+tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_head *bucket,
+    struct lro_entry *le)
 {
 
 	LIST_INSERT_HEAD(&lc->lro_active, le, next);
+	LIST_INSERT_HEAD(bucket, le, hash_next);
 }
 
 static __inline void
 tcp_lro_active_remove(struct lro_entry *le)
 {
 
-	LIST_REMOVE(le, next);
+	LIST_REMOVE(le, next);		/* active list */
+	LIST_REMOVE(le, hash_next);	/* hash bucket */
 }
 
 int
 tcp_lro_init(struct lro_ctrl *lc)
 {
 	return (tcp_lro_init_args(lc, NULL, TCP_LRO_ENTRIES, 0));
 }
 
 int
 tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *ifp,
     unsigned lro_entries, unsigned lro_mbufs)
 {
 	struct lro_entry *le;
 	size_t size;
-	unsigned i;
+	unsigned i, elements;
 
 	lc->lro_bad_csum = 0;
 	lc->lro_queued = 0;
 	lc->lro_flushed = 0;
 	lc->lro_cnt = 0;
 	lc->lro_mbuf_count = 0;
 	lc->lro_mbuf_max = lro_mbufs;
 	lc->lro_cnt = lro_entries;
 	lc->lro_ackcnt_lim = TCP_LRO_ACKCNT_MAX;
 	lc->lro_length_lim = TCP_LRO_LENGTH_MAX;
 	lc->ifp = ifp;
 	LIST_INIT(&lc->lro_free);
 	LIST_INIT(&lc->lro_active);
 
+	/* create hash table to accelerate entry lookup */
+	if (lro_entries > lro_mbufs)
+		elements = lro_entries;
+	else
+		elements = lro_mbufs;
+	lc->lro_hash = phashinit_flags(elements, M_LRO, &lc->lro_hashsz,
+	    HASH_NOWAIT);
+	if (lc->lro_hash == NULL) {
+		memset(lc, 0, sizeof(*lc));
+		return (ENOMEM);
+	}
+
 	/* compute size to allocate */
 	size = (lro_mbufs * sizeof(struct lro_mbuf_sort)) +
 	    (lro_entries * sizeof(*le));
 	lc->lro_mbuf_data = (struct lro_mbuf_sort *)
 	    malloc(size, M_LRO, M_NOWAIT | M_ZERO);
 
 	/* check for out of memory */
 	if (lc->lro_mbuf_data == NULL) {
 		memset(lc, 0, sizeof(*lc));
 		return (ENOMEM);
 	}
 	/* compute offset for LRO entries */
 	le = (struct lro_entry *)
 	    (lc->lro_mbuf_data + lro_mbufs);
 
 	/* setup linked list */
 	for (i = 0; i != lro_entries; i++)
 		LIST_INSERT_HEAD(&lc->lro_free, le + i, next);
 
 	return (0);
 }
 
 void
 tcp_lro_free(struct lro_ctrl *lc)
 {
 	struct lro_entry *le;
 	unsigned x;
 
 	/* reset LRO free list */
 	LIST_INIT(&lc->lro_free);
 
 	/* free active mbufs, if any */
 	while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
 		tcp_lro_active_remove(le);
 		m_freem(le->m_head);
 	}
 
+	/* free hash table */
+	if (lc->lro_hash != NULL) {
+		free(lc->lro_hash, M_LRO);
+		lc->lro_hash = NULL;
+	}
+	lc->lro_hashsz = 0;
+
 	/* free mbuf array, if any */
 	for (x = 0; x != lc->lro_mbuf_count; x++)
 		m_freem(lc->lro_mbuf_data[x].mb);
 	lc->lro_mbuf_count = 0;
 	
 	/* free allocated memory, if any */
 	free(lc->lro_mbuf_data, M_LRO);
 	lc->lro_mbuf_data = NULL;
 }
 
 #ifdef TCP_LRO_UPDATE_CSUM
 static uint16_t
 tcp_lro_csum_th(struct tcphdr *th)
 {
 	uint32_t ch;
 	uint16_t *p, l;
 
 	ch = th->th_sum = 0x0000;
 	l = th->th_off;
 	p = (uint16_t *)th;
 	while (l > 0) {
 		ch += *p;
 		p++;
 		ch += *p;
 		p++;
 		l--;
 	}
 	while (ch > 0xffff)
 		ch = (ch >> 16) + (ch & 0xffff);
 
 	return (ch & 0xffff);
 }
 
 static uint16_t
 tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hdr, struct tcphdr *th,
     uint16_t tcp_data_len, uint16_t csum)
 {
 	uint32_t c;
 	uint16_t cs;
 
 	c = csum;
 
 	/* Remove length from checksum. */
 	switch (le->eh_type) {
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 	{
 		struct ip6_hdr *ip6;
 
 		ip6 = (struct ip6_hdr *)l3hdr;
 		if (le->append_cnt == 0)
 			cs = ip6->ip6_plen;
 		else {
 			uint32_t cx;
 
 			cx = ntohs(ip6->ip6_plen);
 			cs = in6_cksum_pseudo(ip6, cx, ip6->ip6_nxt, 0);
 		}
 		break;
 	}
 #endif
 #ifdef INET
 	case ETHERTYPE_IP:
 	{
 		struct ip *ip4;
 
 		ip4 = (struct ip *)l3hdr;
 		if (le->append_cnt == 0)
 			cs = ip4->ip_len;
 		else {
 			cs = in_addword(ntohs(ip4->ip_len) - sizeof(*ip4),
 			    IPPROTO_TCP);
 			cs = in_pseudo(ip4->ip_src.s_addr, ip4->ip_dst.s_addr,
 			    htons(cs));
 		}
 		break;
 	}
 #endif
 	default:
 		cs = 0;		/* Keep compiler happy. */
 	}
 
 	cs = ~cs;
 	c += cs;
 
 	/* Remove TCP header csum. */
 	cs = ~tcp_lro_csum_th(th);
 	c += cs;
 	while (c > 0xffff)
 		c = (c >> 16) + (c & 0xffff);
 
 	return (c & 0xffff);
 }
 #endif
 
 static void
 tcp_lro_rx_done(struct lro_ctrl *lc)
 {
 	struct lro_entry *le;
 
 	while ((le = LIST_FIRST(&lc->lro_active)) != NULL) {
 		tcp_lro_active_remove(le);
 		tcp_lro_flush(lc, le);
 	}
 }
 
 void
 tcp_lro_flush_inactive(struct lro_ctrl *lc, const struct timeval *timeout)
 {
 	struct lro_entry *le, *le_tmp;
 	struct timeval tv;
 
 	if (LIST_EMPTY(&lc->lro_active))
 		return;
 
 	getmicrotime(&tv);
 	timevalsub(&tv, timeout);
 	LIST_FOREACH_SAFE(le, &lc->lro_active, next, le_tmp) {
 		if (timevalcmp(&tv, &le->mtime, >=)) {
 			tcp_lro_active_remove(le);
 			tcp_lro_flush(lc, le);
 		}
 	}
 }
 
 void
 tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
 {
 
 	if (le->append_cnt > 0) {
 		struct tcphdr *th;
 		uint16_t p_len;
 
 		p_len = htons(le->p_len);
 		switch (le->eh_type) {
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 		{
 			struct ip6_hdr *ip6;
 
 			ip6 = le->le_ip6;
 			ip6->ip6_plen = p_len;
 			th = (struct tcphdr *)(ip6 + 1);
 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
 			    CSUM_PSEUDO_HDR;
 			le->p_len += ETHER_HDR_LEN + sizeof(*ip6);
 			break;
 		}
 #endif
 #ifdef INET
 		case ETHERTYPE_IP:
 		{
 			struct ip *ip4;
 #ifdef TCP_LRO_UPDATE_CSUM
 			uint32_t cl;
 			uint16_t c;
 #endif
 
 			ip4 = le->le_ip4;
 #ifdef TCP_LRO_UPDATE_CSUM
 			/* Fix IP header checksum for new length. */
 			c = ~ip4->ip_sum;
 			cl = c;
 			c = ~ip4->ip_len;
 			cl += c + p_len;
 			while (cl > 0xffff)
 				cl = (cl >> 16) + (cl & 0xffff);
 			c = cl;
 			ip4->ip_sum = ~c;
 #else
 			ip4->ip_sum = TCP_LRO_INVALID_CSUM;
 #endif
 			ip4->ip_len = p_len;
 			th = (struct tcphdr *)(ip4 + 1);
 			le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID |
 			    CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID;
 			le->p_len += ETHER_HDR_LEN;
 			break;
 		}
 #endif
 		default:
 			th = NULL;	/* Keep compiler happy. */
 		}
 		le->m_head->m_pkthdr.csum_data = 0xffff;
 		le->m_head->m_pkthdr.len = le->p_len;
 
 		/* Incorporate the latest ACK into the TCP header. */
 		th->th_ack = le->ack_seq;
 		th->th_win = le->window;
 		/* Incorporate latest timestamp into the TCP header. */
 		if (le->timestamp != 0) {
 			uint32_t *ts_ptr;
 
 			ts_ptr = (uint32_t *)(th + 1);
 			ts_ptr[1] = htonl(le->tsval);
 			ts_ptr[2] = le->tsecr;
 		}
 #ifdef TCP_LRO_UPDATE_CSUM
 		/* Update the TCP header checksum. */
 		le->ulp_csum += p_len;
 		le->ulp_csum += tcp_lro_csum_th(th);
 		while (le->ulp_csum > 0xffff)
 			le->ulp_csum = (le->ulp_csum >> 16) +
 			    (le->ulp_csum & 0xffff);
 		th->th_sum = (le->ulp_csum & 0xffff);
 		th->th_sum = ~th->th_sum;
 #else
 		th->th_sum = TCP_LRO_INVALID_CSUM;
 #endif
 	}
 
 	(*lc->ifp->if_input)(lc->ifp, le->m_head);
 	lc->lro_queued += le->append_cnt + 1;
 	lc->lro_flushed++;
 	bzero(le, sizeof(*le));
 	LIST_INSERT_HEAD(&lc->lro_free, le, next);
 }
 
 #ifdef HAVE_INLINE_FLSLL
 #define	tcp_lro_msb_64(x) (1ULL << (flsll(x) - 1))
 #else
 static inline uint64_t
 tcp_lro_msb_64(uint64_t x)
 {
 	x |= (x >> 1);
 	x |= (x >> 2);
 	x |= (x >> 4);
 	x |= (x >> 8);
 	x |= (x >> 16);
 	x |= (x >> 32);
 	return (x & ~(x >> 1));
 }
 #endif
 
 /*
  * The tcp_lro_sort() routine is comparable to qsort(), except it has
  * a worst case complexity limit of O(MIN(N,64)*N), where N is the
  * number of elements to sort and 64 is the number of sequence bits
  * available. The algorithm is bit-slicing the 64-bit sequence number,
  * sorting one bit at a time from the most significant bit until the
  * least significant one, skipping the constant bits. This is
  * typically called a radix sort.
  */
 static void
 tcp_lro_sort(struct lro_mbuf_sort *parray, uint32_t size)
 {
 	struct lro_mbuf_sort temp;
 	uint64_t ones;
 	uint64_t zeros;
 	uint32_t x;
 	uint32_t y;
 
 repeat:
 	/* for small arrays insertion sort is faster */
 	if (size <= 12) {
 		for (x = 1; x < size; x++) {
 			temp = parray[x];
 			for (y = x; y > 0 && temp.seq < parray[y - 1].seq; y--)
 				parray[y] = parray[y - 1];
 			parray[y] = temp;
 		}
 		return;
 	}
 
 	/* compute sequence bits which are constant */
 	ones = 0;
 	zeros = 0;
 	for (x = 0; x != size; x++) {
 		ones |= parray[x].seq;
 		zeros |= ~parray[x].seq;
 	}
 
 	/* compute bits which are not constant into "ones" */
 	ones &= zeros;
 	if (ones == 0)
 		return;
 
 	/* pick the most significant bit which is not constant */
 	ones = tcp_lro_msb_64(ones);
 
 	/*
 	 * Move entries having cleared sequence bits to the beginning
 	 * of the array:
 	 */
 	for (x = y = 0; y != size; y++) {
 		/* skip set bits */
 		if (parray[y].seq & ones)
 			continue;
 		/* swap entries */
 		temp = parray[x];
 		parray[x] = parray[y];
 		parray[y] = temp;
 		x++;
 	}
 
 	KASSERT(x != 0 && x != size, ("Memory is corrupted\n"));
 
 	/* sort zeros */
 	tcp_lro_sort(parray, x);
 
 	/* sort ones */
 	parray += x;
 	size -= x;
 	goto repeat;
 }
 
 void
 tcp_lro_flush_all(struct lro_ctrl *lc)
 {
 	uint64_t seq;
 	uint64_t nseq;
 	unsigned x;
 
 	/* check if no mbufs to flush */
 	if (lc->lro_mbuf_count == 0)
 		goto done;
 
 	/* sort all mbufs according to stream */
 	tcp_lro_sort(lc->lro_mbuf_data, lc->lro_mbuf_count);
 
 	/* input data into LRO engine, stream by stream */
 	seq = 0;
 	for (x = 0; x != lc->lro_mbuf_count; x++) {
 		struct mbuf *mb;
 
 		/* get mbuf */
 		mb = lc->lro_mbuf_data[x].mb;
 
 		/* get sequence number, masking away the packet index */
 		nseq = lc->lro_mbuf_data[x].seq & (-1ULL << 24);
 
 		/* check for new stream */
 		if (seq != nseq) {
 			seq = nseq;
 
 			/* flush active streams */
 			tcp_lro_rx_done(lc);
 		}
 
 		/* add packet to LRO engine */
-		if (tcp_lro_rx(lc, mb, 0) != 0) {
+		if (tcp_lro_rx2(lc, mb, 0, 0) != 0) {
 			/* input packet to network layer */
 			(*lc->ifp->if_input)(lc->ifp, mb);
 			lc->lro_queued++;
 			lc->lro_flushed++;
 		}
 	}
 done:
 	/* flush active streams */
 	tcp_lro_rx_done(lc);
 
 	lc->lro_mbuf_count = 0;
 }
 
 #ifdef INET6
 static int
 tcp_lro_rx_ipv6(struct lro_ctrl *lc, struct mbuf *m, struct ip6_hdr *ip6,
     struct tcphdr **th)
 {
 
 	/* XXX-BZ we should check the flow-label. */
 
 	/* XXX-BZ We do not yet support ext. hdrs. */
 	if (ip6->ip6_nxt != IPPROTO_TCP)
 		return (TCP_LRO_NOT_SUPPORTED);
 
 	/* Find the TCP header. */
 	*th = (struct tcphdr *)(ip6 + 1);
 
 	return (0);
 }
 #endif
 
 #ifdef INET
 static int
 tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
     struct tcphdr **th)
 {
 	int csum_flags;
 	uint16_t csum;
 
 	if (ip4->ip_p != IPPROTO_TCP)
 		return (TCP_LRO_NOT_SUPPORTED);
 
 	/* Ensure there are no options. */
 	if ((ip4->ip_hl << 2) != sizeof (*ip4))
 		return (TCP_LRO_CANNOT);
 
 	/* .. and the packet is not fragmented. */
 	if (ip4->ip_off & htons(IP_MF|IP_OFFMASK))
 		return (TCP_LRO_CANNOT);
 
 	/* Legacy IP has a header checksum that needs to be correct. */
 	csum_flags = m->m_pkthdr.csum_flags;
 	if (csum_flags & CSUM_IP_CHECKED) {
 		if (__predict_false((csum_flags & CSUM_IP_VALID) == 0)) {
 			lc->lro_bad_csum++;
 			return (TCP_LRO_CANNOT);
 		}
 	} else {
 		csum = in_cksum_hdr(ip4);
 		if (__predict_false((csum) != 0)) {
 			lc->lro_bad_csum++;
 			return (TCP_LRO_CANNOT);
 		}
 	}
 
 	/* Find the TCP header (we assured there are no IP options). */
 	*th = (struct tcphdr *)(ip4 + 1);
 
 	return (0);
 }
 #endif
 
-int
-tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
+static int
+tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
 {
 	struct lro_entry *le;
 	struct ether_header *eh;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;	/* Keep compiler happy. */
 #endif
 #ifdef INET
 	struct ip *ip4 = NULL;		/* Keep compiler happy. */
 #endif
 	struct tcphdr *th;
 	void *l3hdr = NULL;		/* Keep compiler happy. */
 	uint32_t *ts_ptr;
 	tcp_seq seq;
 	int error, ip_len, l;
 	uint16_t eh_type, tcp_data_len;
+	struct lro_head *bucket;
 
 	/* We expect a contiguous header [eh, ip, tcp]. */
 
 	eh = mtod(m, struct ether_header *);
 	eh_type = ntohs(eh->ether_type);
 	switch (eh_type) {
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 	{
 		CURVNET_SET(lc->ifp->if_vnet);
 		if (V_ip6_forwarding != 0) {
 			/* XXX-BZ stats but changing lro_ctrl is a problem. */
 			CURVNET_RESTORE();
 			return (TCP_LRO_CANNOT);
 		}
 		CURVNET_RESTORE();
 		l3hdr = ip6 = (struct ip6_hdr *)(eh + 1);
 		error = tcp_lro_rx_ipv6(lc, m, ip6, &th);
 		if (error != 0)
 			return (error);
 		tcp_data_len = ntohs(ip6->ip6_plen);
 		ip_len = sizeof(*ip6) + tcp_data_len;
 		break;
 	}
 #endif
 #ifdef INET
 	case ETHERTYPE_IP:
 	{
 		CURVNET_SET(lc->ifp->if_vnet);
 		if (V_ipforwarding != 0) {
 			/* XXX-BZ stats but changing lro_ctrl is a problem. */
 			CURVNET_RESTORE();
 			return (TCP_LRO_CANNOT);
 		}
 		CURVNET_RESTORE();
 		l3hdr = ip4 = (struct ip *)(eh + 1);
 		error = tcp_lro_rx_ipv4(lc, m, ip4, &th);
 		if (error != 0)
 			return (error);
 		ip_len = ntohs(ip4->ip_len);
 		tcp_data_len = ip_len - sizeof(*ip4);
 		break;
 	}
 #endif
 	/* XXX-BZ what happens in case of VLAN(s)? */
 	default:
 		return (TCP_LRO_NOT_SUPPORTED);
 	}
 
 	/*
 	 * If the frame is padded beyond the end of the IP packet, then we must
 	 * trim the extra bytes off.
 	 */
 	l = m->m_pkthdr.len - (ETHER_HDR_LEN + ip_len);
 	if (l != 0) {
 		if (l < 0)
 			/* Truncated packet. */
 			return (TCP_LRO_CANNOT);
 
 		m_adj(m, -l);
 	}
 
 	/*
 	 * Check TCP header constraints.
 	 */
 	/* Ensure no bits set besides ACK or PSH. */
 	if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
 		return (TCP_LRO_CANNOT);
 
 	/* XXX-BZ We lose a ACK|PUSH flag concatenating multiple segments. */
 	/* XXX-BZ Ideally we'd flush on PUSH? */
 
 	/*
 	 * Check for timestamps.
 	 * Since the only option we handle are timestamps, we only have to
 	 * handle the simple case of aligned timestamps.
 	 */
 	l = (th->th_off << 2);
 	tcp_data_len -= l;
 	l -= sizeof(*th);
 	ts_ptr = (uint32_t *)(th + 1);
 	if (l != 0 && (__predict_false(l != TCPOLEN_TSTAMP_APPA) ||
 	    (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
 	    TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))))
 		return (TCP_LRO_CANNOT);
 
 	/* If the driver did not pass in the checksum, set it now. */
 	if (csum == 0x0000)
 		csum = th->th_sum;
 
 	seq = ntohl(th->th_seq);
 
+	if (!use_hash) {
+		bucket = &lc->lro_hash[0];
+	} else if (M_HASHTYPE_ISHASH(m)) {
+		bucket = &lc->lro_hash[m->m_pkthdr.flowid % lc->lro_hashsz];
+	} else {
+		uint32_t hash;
+
+		switch (eh_type) {
+#ifdef INET
+		case ETHERTYPE_IP:
+			hash = ip4->ip_src.s_addr + ip4->ip_dst.s_addr;
+			break;
+#endif
+#ifdef INET6
+		case ETHERTYPE_IPV6:
+			hash = ip6->ip6_src.s6_addr32[0] +
+			    ip6->ip6_dst.s6_addr32[0];
+			hash += ip6->ip6_src.s6_addr32[1] +
+			    ip6->ip6_dst.s6_addr32[1];
+			hash += ip6->ip6_src.s6_addr32[2] +
+			    ip6->ip6_dst.s6_addr32[2];
+			hash += ip6->ip6_src.s6_addr32[3] +
+			    ip6->ip6_dst.s6_addr32[3];
+			break;
+#endif
+		default:
+			hash = 0;
+			break;
+		}
+		hash += th->th_sport + th->th_dport;
+		bucket = &lc->lro_hash[hash % lc->lro_hashsz];
+	}
+
 	/* Try to find a matching previous segment. */
-	LIST_FOREACH(le, &lc->lro_active, next) {
+	LIST_FOREACH(le, bucket, hash_next) {
 		if (le->eh_type != eh_type)
 			continue;
 		if (le->source_port != th->th_sport ||
 		    le->dest_port != th->th_dport)
 			continue;
 		switch (eh_type) {
 #ifdef INET6
 		case ETHERTYPE_IPV6:
 			if (bcmp(&le->source_ip6, &ip6->ip6_src,
 			    sizeof(struct in6_addr)) != 0 ||
 			    bcmp(&le->dest_ip6, &ip6->ip6_dst,
 			    sizeof(struct in6_addr)) != 0)
 				continue;
 			break;
 #endif
 #ifdef INET
 		case ETHERTYPE_IP:
 			if (le->source_ip4 != ip4->ip_src.s_addr ||
 			    le->dest_ip4 != ip4->ip_dst.s_addr)
 				continue;
 			break;
 #endif
 		}
 
 		/* Flush now if appending will result in overflow. */
 		if (le->p_len > (lc->lro_length_lim - tcp_data_len)) {
 			tcp_lro_active_remove(le);
 			tcp_lro_flush(lc, le);
 			break;
 		}
 
 		/* Try to append the new segment. */
 		if (__predict_false(seq != le->next_seq ||
 		    (tcp_data_len == 0 && le->ack_seq == th->th_ack))) {
 			/* Out of order packet or duplicate ACK. */
 			tcp_lro_active_remove(le);
 			tcp_lro_flush(lc, le);
 			return (TCP_LRO_CANNOT);
 		}
 
 		if (l != 0) {
 			uint32_t tsval = ntohl(*(ts_ptr + 1));
 			/* Make sure timestamp values are increasing. */
 			/* XXX-BZ flip and use TSTMP_GEQ macro for this? */
 			if (__predict_false(le->tsval > tsval ||
 			    *(ts_ptr + 2) == 0))
 				return (TCP_LRO_CANNOT);
 			le->tsval = tsval;
 			le->tsecr = *(ts_ptr + 2);
 		}
 
 		le->next_seq += tcp_data_len;
 		le->ack_seq = th->th_ack;
 		le->window = th->th_win;
 		le->append_cnt++;
 
 #ifdef TCP_LRO_UPDATE_CSUM
 		le->ulp_csum += tcp_lro_rx_csum_fixup(le, l3hdr, th,
 		    tcp_data_len, ~csum);
 #endif
 
 		if (tcp_data_len == 0) {
 			m_freem(m);
 			/*
 			 * Flush this LRO entry, if this ACK should not
 			 * be further delayed.
 			 */
 			if (le->append_cnt >= lc->lro_ackcnt_lim) {
 				tcp_lro_active_remove(le);
 				tcp_lro_flush(lc, le);
 			}
 			return (0);
 		}
 
 		le->p_len += tcp_data_len;
 
 		/*
 		 * Adjust the mbuf so that m_data points to the first byte of
 		 * the ULP payload.  Adjust the mbuf to avoid complications and
 		 * append new segment to existing mbuf chain.
 		 */
 		m_adj(m, m->m_pkthdr.len - tcp_data_len);
 		m_demote_pkthdr(m);
 
 		le->m_tail->m_next = m;
 		le->m_tail = m_last(m);
 
 		/*
 		 * If a possible next full length packet would cause an
 		 * overflow, pro-actively flush now.
 		 */
 		if (le->p_len > (lc->lro_length_lim - lc->ifp->if_mtu)) {
 			tcp_lro_active_remove(le);
 			tcp_lro_flush(lc, le);
 		} else
 			getmicrotime(&le->mtime);
 
 		return (0);
 	}
 
 	/* Try to find an empty slot. */
 	if (LIST_EMPTY(&lc->lro_free))
 		return (TCP_LRO_NO_ENTRIES);
 
 	/* Start a new segment chain. */
 	le = LIST_FIRST(&lc->lro_free);
 	LIST_REMOVE(le, next);
-	tcp_lro_active_insert(lc, le);
+	tcp_lro_active_insert(lc, bucket, le);
 	getmicrotime(&le->mtime);
 
 	/* Start filling in details. */
 	switch (eh_type) {
 #ifdef INET6
 	case ETHERTYPE_IPV6:
 		le->le_ip6 = ip6;
 		le->source_ip6 = ip6->ip6_src;
 		le->dest_ip6 = ip6->ip6_dst;
 		le->eh_type = eh_type;
 		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN - sizeof(*ip6);
 		break;
 #endif
 #ifdef INET
 	case ETHERTYPE_IP:
 		le->le_ip4 = ip4;
 		le->source_ip4 = ip4->ip_src.s_addr;
 		le->dest_ip4 = ip4->ip_dst.s_addr;
 		le->eh_type = eh_type;
 		le->p_len = m->m_pkthdr.len - ETHER_HDR_LEN;
 		break;
 #endif
 	}
 	le->source_port = th->th_sport;
 	le->dest_port = th->th_dport;
 
 	le->next_seq = seq + tcp_data_len;
 	le->ack_seq = th->th_ack;
 	le->window = th->th_win;
 	if (l != 0) {
 		le->timestamp = 1;
 		le->tsval = ntohl(*(ts_ptr + 1));
 		le->tsecr = *(ts_ptr + 2);
 	}
 
 #ifdef TCP_LRO_UPDATE_CSUM
 	/*
 	 * Do not touch the csum of the first packet.  However save the
 	 * "adjusted" checksum of just the source and destination addresses,
 	 * the next header and the TCP payload.  The length and TCP header
 	 * parts may change, so we remove those from the saved checksum and
 	 * re-add with final values on tcp_lro_flush() if needed.
 	 */
 	KASSERT(le->ulp_csum == 0, ("%s: le=%p le->ulp_csum=0x%04x\n",
 	    __func__, le, le->ulp_csum));
 
 	le->ulp_csum = tcp_lro_rx_csum_fixup(le, l3hdr, th, tcp_data_len,
 	    ~csum);
 	th->th_sum = csum;	/* Restore checksum on first packet. */
 #endif
 
 	le->m_head = m;
 	le->m_tail = m_last(m);
 
 	return (0);
+}
+
+int
+tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
+{
+
+	return tcp_lro_rx2(lc, m, csum, 1);
 }
 
 void
 tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
 {
 	/* sanity checks */
 	if (__predict_false(lc->ifp == NULL || lc->lro_mbuf_data == NULL ||
 	    lc->lro_mbuf_max == 0)) {
 		/* packet drop */
 		m_freem(mb);
 		return;
 	}
 
 	/* check if packet is not LRO capable */
 	if (__predict_false(mb->m_pkthdr.csum_flags == 0 ||
 	    (lc->ifp->if_capenable & IFCAP_LRO) == 0)) {
 		lc->lro_flushed++;
 		lc->lro_queued++;
 
 		/* input packet to network layer */
 		(*lc->ifp->if_input) (lc->ifp, mb);
 		return;
 	}
 
 	/* check if array is full */
 	if (__predict_false(lc->lro_mbuf_count == lc->lro_mbuf_max))
 		tcp_lro_flush_all(lc);
 
 	/* create sequence number */
 	lc->lro_mbuf_data[lc->lro_mbuf_count].seq =
 	    (((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
 	    (((uint64_t)mb->m_pkthdr.flowid) << 24) |
 	    ((uint64_t)lc->lro_mbuf_count);
 
 	/* enter mbuf */
 	lc->lro_mbuf_data[lc->lro_mbuf_count++].mb = mb;
 }
 
 /* end */
Index: user/alc/PQ_LAUNDRY/sys/netinet/tcp_lro.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/netinet/tcp_lro.h	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/netinet/tcp_lro.h	(revision 303667)
@@ -1,118 +1,121 @@
 /*-
  * Copyright (c) 2006, Myricom Inc.
  * Copyright (c) 2008, Intel Corporation.
  * Copyright (c) 2016 Mellanox Technologies.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _TCP_LRO_H_
 #define _TCP_LRO_H_
 
 #include <sys/time.h>
 
 #ifndef TCP_LRO_ENTRIES
 /* Define default number of LRO entries per RX queue */
 #define	TCP_LRO_ENTRIES	8
 #endif
 
 struct lro_entry {
 	LIST_ENTRY(lro_entry)	next;
+	LIST_ENTRY(lro_entry)	hash_next;
 	struct mbuf		*m_head;
 	struct mbuf		*m_tail;
 	union {
 		struct ip	*ip4;
 		struct ip6_hdr	*ip6;
 	} leip;
 	union {
 		in_addr_t	s_ip4;
 		struct in6_addr	s_ip6;
 	} lesource;
 	union {
 		in_addr_t	d_ip4;
 		struct in6_addr	d_ip6;
 	} ledest;
 	uint16_t		source_port;
 	uint16_t		dest_port;
 	uint16_t		eh_type;	/* EthernetHeader type. */
 	uint16_t		append_cnt;
 	uint32_t		p_len;		/* IP header payload length. */
 	uint32_t		ulp_csum;	/* TCP, etc. checksum. */
 	uint32_t		next_seq;	/* tcp_seq */
 	uint32_t		ack_seq;	/* tcp_seq */
 	uint32_t		tsval;
 	uint32_t		tsecr;
 	uint16_t		window;
 	uint16_t		timestamp;	/* flag, not a TCP hdr field. */
 	struct timeval		mtime;
 };
 LIST_HEAD(lro_head, lro_entry);
 
 #define	le_ip4			leip.ip4
 #define	le_ip6			leip.ip6
 #define	source_ip4		lesource.s_ip4
 #define	dest_ip4		ledest.d_ip4
 #define	source_ip6		lesource.s_ip6
 #define	dest_ip6		ledest.d_ip6
 
 struct lro_mbuf_sort {
 	uint64_t seq;
 	struct mbuf *mb;
 };
 
 /* NB: This is part of driver structs. */
 struct lro_ctrl {
 	struct ifnet	*ifp;
 	struct lro_mbuf_sort *lro_mbuf_data;
 	uint64_t	lro_queued;
 	uint64_t	lro_flushed;
 	uint64_t	lro_bad_csum;
 	unsigned	lro_cnt;
 	unsigned	lro_mbuf_count;
 	unsigned	lro_mbuf_max;
 	unsigned short	lro_ackcnt_lim;		/* max # of aggregated ACKs */
 	unsigned 	lro_length_lim;		/* max len of aggregated data */
 
+	u_long		lro_hashsz;
+	struct lro_head	*lro_hash;
 	struct lro_head	lro_active;
 	struct lro_head	lro_free;
 };
 
 #define	TCP_LRO_LENGTH_MAX	65535
 #define	TCP_LRO_ACKCNT_MAX	65535		/* unlimited */
 
 int tcp_lro_init(struct lro_ctrl *);
 int tcp_lro_init_args(struct lro_ctrl *, struct ifnet *, unsigned, unsigned);
 void tcp_lro_free(struct lro_ctrl *);
 void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *);
 void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
 void tcp_lro_flush_all(struct lro_ctrl *);
 int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
 void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *);
 
 #define	TCP_LRO_NO_ENTRIES	-2
 #define	TCP_LRO_CANNOT		-1
 #define	TCP_LRO_NOT_SUPPORTED	1
 
 #endif /* _TCP_LRO_H_ */
Index: user/alc/PQ_LAUNDRY/sys/netinet6/ip6_output.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/netinet6/ip6_output.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/netinet6/ip6_output.c	(revision 303667)
@@ -1,3077 +1,3078 @@
 /*-
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
  */
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_sctp.h"
 #include "opt_route.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/errno.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 
 #include <machine/in_cksum.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/pfil.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/in6_rss.h>
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
 #include <netinet6/ip6_ipsec.h>
 #endif /* IPSEC */
 #ifdef SCTP
 #include <netinet/sctp.h>
 #include <netinet/sctp_crc32.h>
 #endif
 
 #include <netinet6/ip6protosw.h>
 #include <netinet6/scope6_var.h>
 
 #ifdef FLOWTABLE
 #include <net/flowtable.h>
 #endif
 
 extern int in6_mcast_loop;
 
 struct ip6_exthdrs {
 	struct mbuf *ip6e_ip6;
 	struct mbuf *ip6e_hbh;
 	struct mbuf *ip6e_dest1;
 	struct mbuf *ip6e_rthdr;
 	struct mbuf *ip6e_dest2;
 };
 
 static MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
 
 static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **,
 			   struct ucred *, int);
 static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *,
 	struct socket *, struct sockopt *);
 static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
 static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *,
 	struct ucred *, int, int, int);
 
 static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
 static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int,
 	struct ip6_frag **);
 static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
 static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
 static int ip6_getpmtu(struct route_in6 *, int,
 	struct ifnet *, const struct in6_addr *, u_long *, int *, u_int,
 	u_int);
 static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long,
 	u_long *, int *, u_int);
 static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *);
 static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
 
 
 /*
  * Make an extension header from option data.  hp is the source, and
  * mp is the destination.
  */
 #define MAKE_EXTHDR(hp, mp)						\
     do {								\
 	if (hp) {							\
 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
 		error = ip6_copyexthdr((mp), (caddr_t)(hp),		\
 		    ((eh)->ip6e_len + 1) << 3);				\
 		if (error)						\
 			goto freehdrs;					\
 	}								\
     } while (/*CONSTCOND*/ 0)
 
 /*
  * Form a chain of extension headers.
  * m is the extension header mbuf
  * mp is the previous mbuf in the chain
  * p is the next header
  * i is the type of option.
  */
 #define MAKE_CHAIN(m, mp, p, i)\
     do {\
 	if (m) {\
 		if (!hdrsplit) \
 			panic("assumption failed: hdr not split"); \
 		*mtod((m), u_char *) = *(p);\
 		*(p) = (i);\
 		p = mtod((m), u_char *);\
 		(m)->m_next = (mp)->m_next;\
 		(mp)->m_next = (m);\
 		(mp) = (m);\
 	}\
     } while (/*CONSTCOND*/ 0)
 
 void
 in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset)
 {
 	u_short csum;
 
 	csum = in_cksum_skip(m, offset + plen, offset);
 	if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0)
 		csum = 0xffff;
 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
 
 	if (offset + sizeof(u_short) > m->m_len) {
 		printf("%s: delayed m_pullup, m->len: %d plen %u off %u "
 		    "csum_flags=%b\n", __func__, m->m_len, plen, offset,
 		    (int)m->m_pkthdr.csum_flags, CSUM_BITS);
 		/*
 		 * XXX this should not happen, but if it does, the correct
 		 * behavior may be to insert the checksum in the appropriate
 		 * next mbuf in the chain.
 		 */
 		return;
 	}
 	*(u_short *)(m->m_data + offset) = csum;
 }
 
 int
 ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto,
     int mtu, uint32_t id)
 {
 	struct mbuf *m, **mnext, *m_frgpart;
 	struct ip6_hdr *ip6, *mhip6;
 	struct ip6_frag *ip6f;
 	int off;
 	int error;
 	int tlen = m0->m_pkthdr.len;
 
 	m = m0;
 	ip6 = mtod(m, struct ip6_hdr *);
 	mnext = &m->m_nextpkt;
 
 	for (off = hlen; off < tlen; off += mtu) {
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (!m) {
 			IP6STAT_INC(ip6s_odropped);
 			return (ENOBUFS);
 		}
 		m->m_flags = m0->m_flags & M_COPYFLAGS;
 		*mnext = m;
 		mnext = &m->m_nextpkt;
 		m->m_data += max_linkhdr;
 		mhip6 = mtod(m, struct ip6_hdr *);
 		*mhip6 = *ip6;
 		m->m_len = sizeof(*mhip6);
 		error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
 		if (error) {
 			IP6STAT_INC(ip6s_odropped);
 			return (error);
 		}
 		ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
 		if (off + mtu >= tlen)
 			mtu = tlen - off;
 		else
 			ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
 		mhip6->ip6_plen = htons((u_short)(mtu + hlen +
 		    sizeof(*ip6f) - sizeof(struct ip6_hdr)));
 		if ((m_frgpart = m_copy(m0, off, mtu)) == NULL) {
 			IP6STAT_INC(ip6s_odropped);
 			return (ENOBUFS);
 		}
 		m_cat(m, m_frgpart);
 		m->m_pkthdr.len = mtu + hlen + sizeof(*ip6f);
 		m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum;
 		m->m_pkthdr.rcvif = NULL;
 		ip6f->ip6f_reserved = 0;
 		ip6f->ip6f_ident = id;
 		ip6f->ip6f_nxt = nextproto;
 		IP6STAT_INC(ip6s_ofragments);
 		in6_ifstat_inc(ifp, ifs6_out_fragcreat);
 	}
 
 	return (0);
 }
 
 /*
  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
  * header (with pri, len, nxt, hlim, src, dst).
  * This function may modify ver and hlim only.
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  * If route_in6 ro is present and has ro_rt initialized, route lookup would be
  * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
  * then result of route lookup is stored in ro->ro_rt.
  *
  * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and
  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
  * which is rt_mtu.
  *
  * ifpp - XXX: just for statistics
  */
 /*
  * XXX TODO: no flowid is assigned for outbound flows?
  */
 int
 ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
     struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
     struct ifnet **ifpp, struct inpcb *inp)
 {
 	struct ip6_hdr *ip6;
 	struct ifnet *ifp, *origifp;
 	struct mbuf *m = m0;
 	struct mbuf *mprev = NULL;
 	int hlen, tlen, len;
 	struct route_in6 ip6route;
 	struct rtentry *rt = NULL;
 	struct sockaddr_in6 *dst, src_sa, dst_sa;
 	struct in6_addr odst;
 	int error = 0;
 	struct in6_ifaddr *ia = NULL;
 	u_long mtu;
 	int alwaysfrag, dontfrag;
 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
 	struct ip6_exthdrs exthdrs;
 	struct in6_addr src0, dst0;
 	u_int32_t zone;
 	struct route_in6 *ro_pmtu = NULL;
 	int hdrsplit = 0;
 	int sw_csum, tso;
 	int needfiblookup;
 	uint32_t fibnum;
 	struct m_tag *fwd_tag = NULL;
 	uint32_t id;
 
 	if (inp != NULL) {
 		M_SETFIB(m, inp->inp_inc.inc_fibnum);
 		if ((flags & IP_NODEFAULTFLOWID) == 0) {
 			/* unconditionally set flowid */
 			m->m_pkthdr.flowid = inp->inp_flowid;
 			M_HASHTYPE_SET(m, inp->inp_flowtype);
 		}
 	}
 
 	bzero(&exthdrs, sizeof(exthdrs));
 	if (opt) {
 		/* Hop-by-Hop options header */
 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
 		/* Destination options header(1st part) */
 		if (opt->ip6po_rthdr) {
 			/*
 			 * Destination options header(1st part)
 			 * This only makes sense with a routing header.
 			 * See Section 9.2 of RFC 3542.
 			 * Disabling this part just for MIP6 convenience is
 			 * a bad idea.  We need to think carefully about a
 			 * way to make the advanced API coexist with MIP6
 			 * options, which might automatically be inserted in
 			 * the kernel.
 			 */
 			MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
 		}
 		/* Routing header */
 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
 		/* Destination options header(2nd part) */
 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
 	}
 
 #ifdef IPSEC
 	/*
 	 * IPSec checking which handles several cases.
 	 * FAST IPSEC: We re-injected the packet.
 	 * XXX: need scope argument.
 	 */
 	switch(ip6_ipsec_output(&m, inp, &error))
 	{
 	case 1:                 /* Bad packet */
 		goto freehdrs;
 	case -1:                /* IPSec done */
 		goto done;
 	case 0:                 /* No IPSec */
 	default:
 		break;
 	}
 #endif /* IPSEC */
 
 	/*
 	 * Calculate the total length of the extension header chain.
 	 * Keep the length of the unfragmentable part for fragmentation.
 	 */
 	optlen = 0;
 	if (exthdrs.ip6e_hbh)
 		optlen += exthdrs.ip6e_hbh->m_len;
 	if (exthdrs.ip6e_dest1)
 		optlen += exthdrs.ip6e_dest1->m_len;
 	if (exthdrs.ip6e_rthdr)
 		optlen += exthdrs.ip6e_rthdr->m_len;
 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
 
 	/* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */
 	if (exthdrs.ip6e_dest2)
 		optlen += exthdrs.ip6e_dest2->m_len;
 
 	/*
 	 * If there is at least one extension header,
 	 * separate IP6 header from the payload.
 	 */
 	if (optlen && !hdrsplit) {
 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 			m = NULL;
 			goto freehdrs;
 		}
 		m = exthdrs.ip6e_ip6;
 		hdrsplit++;
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* adjust mbuf packet header length */
 	m->m_pkthdr.len += optlen;
 	plen = m->m_pkthdr.len - sizeof(*ip6);
 
 	/* If this is a jumbo payload, insert a jumbo payload option. */
 	if (plen > IPV6_MAXPACKET) {
 		if (!hdrsplit) {
 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 				m = NULL;
 				goto freehdrs;
 			}
 			m = exthdrs.ip6e_ip6;
 			hdrsplit++;
 		}
 		/* adjust pointer */
 		ip6 = mtod(m, struct ip6_hdr *);
 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
 			goto freehdrs;
 		ip6->ip6_plen = 0;
 	} else
 		ip6->ip6_plen = htons(plen);
 
 	/*
 	 * Concatenate headers and fill in next header fields.
 	 * Here we have, on "m"
 	 *	IPv6 payload
 	 * and we insert headers accordingly.  Finally, we should be getting:
 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
 	 *
 	 * during the header composing process, "m" points to IPv6 header.
 	 * "mprev" points to an extension header prior to esp.
 	 */
 	u_char *nexthdrp = &ip6->ip6_nxt;
 	mprev = m;
 
 	/*
 	 * we treat dest2 specially.  this makes IPsec processing
 	 * much easier.  the goal here is to make mprev point the
 	 * mbuf prior to dest2.
 	 *
 	 * result: IPv6 dest2 payload
 	 * m and mprev will point to IPv6 header.
 	 */
 	if (exthdrs.ip6e_dest2) {
 		if (!hdrsplit)
 			panic("assumption failed: hdr not split");
 		exthdrs.ip6e_dest2->m_next = m->m_next;
 		m->m_next = exthdrs.ip6e_dest2;
 		*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
 		ip6->ip6_nxt = IPPROTO_DSTOPTS;
 	}
 
 	/*
 	 * result: IPv6 hbh dest1 rthdr dest2 payload
 	 * m will point to IPv6 header.  mprev will point to the
 	 * extension header prior to dest2 (rthdr in the above case).
 	 */
 	MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
 	MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
 		   IPPROTO_DSTOPTS);
 	MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
 		   IPPROTO_ROUTING);
 
 	/*
 	 * If there is a routing header, discard the packet.
 	 */
 	if (exthdrs.ip6e_rthdr) {
 		 error = EINVAL;
 		 goto bad;
 	}
 
 	/* Source address validation */
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
 	    (flags & IPV6_UNSPECSRC) == 0) {
 		error = EOPNOTSUPP;
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
 		error = EOPNOTSUPP;
 		IP6STAT_INC(ip6s_badscope);
 		goto bad;
 	}
 
 	IP6STAT_INC(ip6s_localout);
 
 	/*
 	 * Route packet.
 	 */
 	if (ro == NULL) {
 		ro = &ip6route;
 		bzero((caddr_t)ro, sizeof(*ro));
 	} else
 		ro->ro_flags |= RT_LLE_CACHE;
 	ro_pmtu = ro;
 	if (opt && opt->ip6po_rthdr)
 		ro = &opt->ip6po_route;
 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
 #ifdef FLOWTABLE
 	if (ro->ro_rt == NULL)
 		(void )flowtable_lookup(AF_INET6, m, (struct route *)ro);
 #endif
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
 again:
 	/*
 	 * if specified, try to fill in the traffic class field.
 	 * do not override if a non-zero value is already set.
 	 * we check the diffserv field and the ecn field separately.
 	 */
 	if (opt && opt->ip6po_tclass >= 0) {
 		int mask = 0;
 
 		if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
 			mask |= 0xfc;
 		if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
 			mask |= 0x03;
 		if (mask != 0)
 			ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
 	}
 
 	/* fill in or override the hop limit field, if necessary. */
 	if (opt && opt->ip6po_hlim != -1)
 		ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
 	else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (im6o != NULL)
 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
 		else
 			ip6->ip6_hlim = V_ip6_defmcasthlim;
 	}
 	/*
 	 * Validate route against routing table additions;
 	 * a better/more specific route might have been added.
 	 * Make sure address family is set in route.
 	 */
 	if (inp) {
 		ro->ro_dst.sin6_family = AF_INET6;
 		RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum);
 	}
 	if (ro->ro_rt && fwd_tag == NULL && (ro->ro_rt->rt_flags & RTF_UP) &&
 	    ro->ro_dst.sin6_family == AF_INET6 &&
 	    IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) {
 		rt = ro->ro_rt;
 		ifp = ro->ro_rt->rt_ifp;
 	} else {
 		if (fwd_tag == NULL) {
 			bzero(&dst_sa, sizeof(dst_sa));
 			dst_sa.sin6_family = AF_INET6;
 			dst_sa.sin6_len = sizeof(dst_sa);
 			dst_sa.sin6_addr = ip6->ip6_dst;
 		}
 		error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp,
 		    &rt, fibnum);
 		if (error != 0) {
 			if (ifp != NULL)
 				in6_ifstat_inc(ifp, ifs6_out_discard);
 			goto bad;
 		}
 	}
 	if (rt == NULL) {
 		/*
 		 * If in6_selectroute() does not return a route entry,
 		 * dst may not have been updated.
 		 */
 		*dst = dst_sa;	/* XXX */
 	}
 
 	/*
 	 * then rt (for unicast) and ifp must be non-NULL valid values.
 	 */
 	if ((flags & IPV6_FORWARDING) == 0) {
 		/* XXX: the FORWARDING flag can be set for mrouting. */
 		in6_ifstat_inc(ifp, ifs6_out_request);
 	}
 	if (rt != NULL) {
 		ia = (struct in6_ifaddr *)(rt->rt_ifa);
 		counter_u64_add(rt->rt_pksent, 1);
 	}
 
 
 	/*
 	 * The outgoing interface must be in the zone of source and
 	 * destination addresses.
 	 */
 	origifp = ifp;
 
 	src0 = ip6->ip6_src;
 	if (in6_setscope(&src0, origifp, &zone))
 		goto badscope;
 	bzero(&src_sa, sizeof(src_sa));
 	src_sa.sin6_family = AF_INET6;
 	src_sa.sin6_len = sizeof(src_sa);
 	src_sa.sin6_addr = ip6->ip6_src;
 	if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
 		goto badscope;
 
 	dst0 = ip6->ip6_dst;
 	if (in6_setscope(&dst0, origifp, &zone))
 		goto badscope;
 	/* re-initialize to be sure */
 	bzero(&dst_sa, sizeof(dst_sa));
 	dst_sa.sin6_family = AF_INET6;
 	dst_sa.sin6_len = sizeof(dst_sa);
 	dst_sa.sin6_addr = ip6->ip6_dst;
 	if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
 		goto badscope;
 	}
 
 	/* We should use ia_ifp to support the case of
 	 * sending packets to an address of our own.
 	 */
 	if (ia != NULL && ia->ia_ifp)
 		ifp = ia->ia_ifp;
 
 	/* scope check is done. */
 	goto routefound;
 
   badscope:
 	IP6STAT_INC(ip6s_badscope);
 	in6_ifstat_inc(origifp, ifs6_out_discard);
 	if (error == 0)
 		error = EHOSTUNREACH; /* XXX */
 	goto bad;
 
   routefound:
 	if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		if (opt && opt->ip6po_nextroute.ro_rt) {
 			/*
 			 * The nexthop is explicitly specified by the
 			 * application.  We assume the next hop is an IPv6
 			 * address.
 			 */
 			dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
 		}
 		else if ((rt->rt_flags & RTF_GATEWAY))
 			dst = (struct sockaddr_in6 *)rt->rt_gateway;
 	}
 
 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
 	} else {
 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
 		in6_ifstat_inc(ifp, ifs6_out_mcast);
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if (!(ifp->if_flags & IFF_MULTICAST)) {
 			IP6STAT_INC(ip6s_noroute);
 			in6_ifstat_inc(ifp, ifs6_out_discard);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		if ((im6o == NULL && in6_mcast_loop) ||
 		    (im6o && im6o->im6o_multicast_loop)) {
 			/*
 			 * Loop back multicast datagram if not expressly
 			 * forbidden to do so, even if we have not joined
 			 * the address; protocols will filter it later,
 			 * thus deferring a hash lookup and lock acquisition
 			 * at the expense of an m_copym().
 			 */
 			ip6_mloopback(ifp, m);
 		} else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IPV6_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip6_mloopback(),
 			 * above, will be forwarded by the ip6_input() routine,
 			 * if necessary.
 			 */
 			if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
 				/*
 				 * XXX: ip6_mforward expects that rcvif is NULL
 				 * when it is called from the originating path.
 				 * However, it may not always be the case.
 				 */
 				m->m_pkthdr.rcvif = NULL;
 				if (ip6_mforward(ip6, ifp, m) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 		/*
 		 * Multicasts with a hoplimit of zero may be looped back,
 		 * above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip6_mloopback() will
 		 * loop back a copy if this host actually belongs to the
 		 * destination group on the loopback interface.
 		 */
 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
 		    IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
 			m_freem(m);
 			goto done;
 		}
 	}
 
 	/*
 	 * Fill the outgoing inteface to tell the upper layer
 	 * to increment per-interface statistics.
 	 */
 	if (ifpp)
 		*ifpp = ifp;
 
 	/* Determine path MTU. */
 	if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst,
 		    &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0)
 		goto bad;
 
 	/*
 	 * The caller of this function may specify to use the minimum MTU
 	 * in some cases.
 	 * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
 	 * setting.  The logic is a bit complicated; by default, unicast
 	 * packets will follow path MTU while multicast packets will be sent at
 	 * the minimum MTU.  If IP6PO_MINMTU_ALL is specified, all packets
 	 * including unicast ones will be sent at the minimum MTU.  Multicast
 	 * packets will always be sent at the minimum MTU unless
 	 * IP6PO_MINMTU_DISABLE is explicitly specified.
 	 * See RFC 3542 for more details.
 	 */
 	if (mtu > IPV6_MMTU) {
 		if ((flags & IPV6_MINMTU))
 			mtu = IPV6_MMTU;
 		else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
 			mtu = IPV6_MMTU;
 		else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
 			 (opt == NULL ||
 			  opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
 			mtu = IPV6_MMTU;
 		}
 	}
 
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 
 	/*
 	 * If the outgoing packet contains a hop-by-hop options header,
 	 * it must be examined and processed even by the source node.
 	 * (RFC 2460, section 4.)
 	 */
 	if (exthdrs.ip6e_hbh) {
 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
 		u_int32_t dummy; /* XXX unused */
 		u_int32_t plen = 0; /* XXX: ip6_process will check the value */
 
 #ifdef DIAGNOSTIC
 		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
 			panic("ip6e_hbh is not contiguous");
 #endif
 		/*
 		 *  XXX: if we have to send an ICMPv6 error to the sender,
 		 *       we need the M_LOOP flag since icmp6_error() expects
 		 *       the IPv6 and the hop-by-hop options header are
 		 *       contiguous unless the flag is set.
 		 */
 		m->m_flags |= M_LOOP;
 		m->m_pkthdr.rcvif = ifp;
 		if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
 		    ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
 		    &dummy, &plen) < 0) {
 			/* m was already freed at this point */
 			error = EINVAL;/* better error? */
 			goto done;
 		}
 		m->m_flags &= ~M_LOOP; /* XXX */
 		m->m_pkthdr.rcvif = NULL;
 	}
 
 	/* Jump over all PFIL processing if hooks are not active. */
 	if (!PFIL_HOOKED(&V_inet6_pfil_hook))
 		goto passout;
 
 	odst = ip6->ip6_dst;
 	/* Run through list of hooks for output packets. */
 	error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
 	if (error != 0 || m == NULL)
 		goto done;
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	needfiblookup = 0;
 	/* See if destination IP address was changed by packet filter. */
 	if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		/* If destination is now ourself drop to ip6_input(). */
 		if (in6_localip(&ip6->ip6_dst)) {
 			m->m_flags |= M_FASTFWD_OURS;
 			if (m->m_pkthdr.rcvif == NULL)
 				m->m_pkthdr.rcvif = V_loif;
 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 				m->m_pkthdr.csum_flags |=
 				    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
 				m->m_pkthdr.csum_data = 0xffff;
 			}
 #ifdef SCTP
 			if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
 				m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 			error = netisr_queue(NETISR_IPV6, m);
 			goto done;
 		} else {
 			RO_RTFREE(ro);
 			needfiblookup = 1; /* Redo the routing table lookup. */
 		}
 	}
 	/* See if fib was changed by packet filter. */
 	if (fibnum != M_GETFIB(m)) {
 		m->m_flags |= M_SKIP_FIREWALL;
 		fibnum = M_GETFIB(m);
 		RO_RTFREE(ro);
 		needfiblookup = 1;
 	}
 	if (needfiblookup)
 		goto again;
 
 	/* See if local, if yes, send it to netisr. */
 	if (m->m_flags & M_FASTFWD_OURS) {
 		if (m->m_pkthdr.rcvif == NULL)
 			m->m_pkthdr.rcvif = V_loif;
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 			m->m_pkthdr.csum_flags |=
 			    CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR;
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6)
 			m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID;
 #endif
 		error = netisr_queue(NETISR_IPV6, m);
 		goto done;
 	}
 	/* Or forward to some other address? */
 	if ((m->m_flags & M_IP6_NEXTHOP) &&
 	    (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) {
 		dst = (struct sockaddr_in6 *)&ro->ro_dst;
 		bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6));
 		m->m_flags |= M_SKIP_FIREWALL;
 		m->m_flags &= ~M_IP6_NEXTHOP;
 		m_tag_delete(m, fwd_tag);
 		goto again;
 	}
 
 passout:
 	/*
 	 * Send the packet to the outgoing interface.
 	 * If necessary, do IPv6 fragmentation before sending.
 	 *
 	 * the logic here is rather complex:
 	 * 1: normal case (dontfrag == 0, alwaysfrag == 0)
 	 * 1-a:	send as is if tlen <= path mtu
 	 * 1-b:	fragment if tlen > path mtu
 	 *
 	 * 2: if user asks us not to fragment (dontfrag == 1)
 	 * 2-a:	send as is if tlen <= interface mtu
 	 * 2-b:	error if tlen > interface mtu
 	 *
 	 * 3: if we always need to attach fragment header (alwaysfrag == 1)
 	 *	always fragment
 	 *
 	 * 4: if dontfrag == 1 && alwaysfrag == 1
 	 *	error, as we cannot handle this conflicting request
 	 */
 	sw_csum = m->m_pkthdr.csum_flags;
 	if (!hdrsplit) {
 		tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0;
 		sw_csum &= ~ifp->if_hwassist;
 	} else
 		tso = 0;
 	/*
 	 * If we added extension headers, we will not do TSO and calculate the
 	 * checksums ourselves for now.
 	 * XXX-BZ  Need a framework to know when the NIC can handle it, even
 	 * with ext. hdrs.
 	 */
 	if (sw_csum & CSUM_DELAY_DATA_IPV6) {
 		sw_csum &= ~CSUM_DELAY_DATA_IPV6;
 		in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr));
 	}
 #ifdef SCTP
 	if (sw_csum & CSUM_SCTP_IPV6) {
 		sw_csum &= ~CSUM_SCTP_IPV6;
 		sctp_delayed_cksum(m, sizeof(struct ip6_hdr));
 	}
 #endif
 	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
 	tlen = m->m_pkthdr.len;
 
 	if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso)
 		dontfrag = 1;
 	else
 		dontfrag = 0;
 	if (dontfrag && alwaysfrag) {	/* case 4 */
 		/* conflicting request - can't transmit */
 		error = EMSGSIZE;
 		goto bad;
 	}
 	if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) {	/* case 2-b */
 		/*
 		 * Even if the DONTFRAG option is specified, we cannot send the
 		 * packet when the data length is larger than the MTU of the
 		 * outgoing interface.
 		 * Notify the error by sending IPV6_PATHMTU ancillary data if
 		 * application wanted to know the MTU value. Also return an
 		 * error code (this is not described in the API spec).
 		 */
 		if (inp != NULL)
 			ip6_notify_pmtu(inp, &dst_sa, (u_int32_t)mtu);
 		error = EMSGSIZE;
 		goto bad;
 	}
 
 	/*
 	 * transmit packet without fragmentation
 	 */
 	if (dontfrag || (!alwaysfrag && tlen <= mtu)) {	/* case 1-a and 2-a */
 		struct in6_ifaddr *ia6;
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
 		if (ia6) {
 			/* Record statistics for this interface address. */
 			counter_u64_add(ia6->ia_ifa.ifa_opackets, 1);
 			counter_u64_add(ia6->ia_ifa.ifa_obytes,
 			    m->m_pkthdr.len);
 			ifa_free(&ia6->ia_ifa);
 		}
 		error = nd6_output_ifp(ifp, origifp, m, dst,
 		    (struct route *)ro);
 		goto done;
 	}
 
 	/*
 	 * try to fragment the packet.  case 1-b and 3
 	 */
 	if (mtu < IPV6_MMTU) {
 		/* path MTU cannot be less than IPV6_MMTU */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else if (ip6->ip6_plen == 0) {
 		/* jumbo payload cannot be fragmented */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else {
 		u_char nextproto;
 
 		/*
 		 * Too large for the destination or interface;
 		 * fragment if possible.
 		 * Must be able to put at least 8 bytes per fragment.
 		 */
 		hlen = unfragpartlen;
 		if (mtu > IPV6_MAXPACKET)
 			mtu = IPV6_MAXPACKET;
 
 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
 		if (len < 8) {
 			error = EMSGSIZE;
 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
 			goto bad;
 		}
 
 		/*
 		 * If the interface will not calculate checksums on
 		 * fragmented packets, then do it here.
 		 * XXX-BZ handle the hw offloading case.  Need flags.
 		 */
 		if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 			in6_delayed_cksum(m, plen, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
 		}
 #ifdef SCTP
 		if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) {
 			sctp_delayed_cksum(m, hlen);
 			m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6;
 		}
 #endif
 		/*
 		 * Change the next header field of the last header in the
 		 * unfragmentable part.
 		 */
 		if (exthdrs.ip6e_rthdr) {
 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_dest1) {
 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_hbh) {
 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
 		} else {
 			nextproto = ip6->ip6_nxt;
 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
 		}
 
 		/*
 		 * Loop through length of segment after first fragment,
 		 * make new header and copy data of each part and link onto
 		 * chain.
 		 */
 		m0 = m;
 		id = htonl(ip6_randomid());
 		if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id)))
 			goto sendorfree;
 
 		in6_ifstat_inc(ifp, ifs6_out_fragok);
 	}
 
 	/*
 	 * Remove leading garbages.
 	 */
 sendorfree:
 	m = m0->m_nextpkt;
 	m0->m_nextpkt = 0;
 	m_freem(m0);
 	for (m0 = m; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
 			/* Record statistics for this interface address. */
 			if (ia) {
 				counter_u64_add(ia->ia_ifa.ifa_opackets, 1);
 				counter_u64_add(ia->ia_ifa.ifa_obytes,
 				    m->m_pkthdr.len);
 			}
 			error = nd6_output_ifp(ifp, origifp, m, dst,
 			    (struct route *)ro);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		IP6STAT_INC(ip6s_fragmented);
 
 done:
 	/*
 	 * Release the route if using our private route, or if
 	 * (with flowtable) we don't have our own reference.
 	 */
-	if (ro == &ip6route || ro->ro_flags & RT_NORTREF)
+	if (ro == &ip6route ||
+	    (ro != NULL && ro->ro_flags & RT_NORTREF))
 		RO_RTFREE(ro);
 	return (error);
 
 freehdrs:
 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
 	m_freem(exthdrs.ip6e_dest1);
 	m_freem(exthdrs.ip6e_rthdr);
 	m_freem(exthdrs.ip6e_dest2);
 	/* FALLTHROUGH */
 bad:
 	if (m)
 		m_freem(m);
 	goto done;
 }
 
 static int
 ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
 {
 	struct mbuf *m;
 
 	if (hlen > MCLBYTES)
 		return (ENOBUFS); /* XXX */
 
 	if (hlen > MLEN)
 		m = m_getcl(M_NOWAIT, MT_DATA, 0);
 	else
 		m = m_get(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = hlen;
 	if (hdr)
 		bcopy(hdr, mtod(m, caddr_t), hlen);
 
 	*mp = m;
 	return (0);
 }
 
 /*
  * Insert jumbo payload option.
  */
 static int
 ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
 {
 	struct mbuf *mopt;
 	u_char *optbuf;
 	u_int32_t v;
 
 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
 
 	/*
 	 * If there is no hop-by-hop options header, allocate new one.
 	 * If there is one but it doesn't have enough space to store the
 	 * jumbo payload option, allocate a cluster to store the whole options.
 	 * Otherwise, use it to store the options.
 	 */
 	if (exthdrs->ip6e_hbh == NULL) {
 		mopt = m_get(M_NOWAIT, MT_DATA);
 		if (mopt == NULL)
 			return (ENOBUFS);
 		mopt->m_len = JUMBOOPTLEN;
 		optbuf = mtod(mopt, u_char *);
 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
 		exthdrs->ip6e_hbh = mopt;
 	} else {
 		struct ip6_hbh *hbh;
 
 		mopt = exthdrs->ip6e_hbh;
 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
 			/*
 			 * XXX assumption:
 			 * - exthdrs->ip6e_hbh is not referenced from places
 			 *   other than exthdrs.
 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
 			 */
 			int oldoptlen = mopt->m_len;
 			struct mbuf *n;
 
 			/*
 			 * XXX: give up if the whole (new) hbh header does
 			 * not fit even in an mbuf cluster.
 			 */
 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
 				return (ENOBUFS);
 
 			/*
 			 * As a consequence, we must always prepare a cluster
 			 * at this point.
 			 */
 			n = m_getcl(M_NOWAIT, MT_DATA, 0);
 			if (n == NULL)
 				return (ENOBUFS);
 			n->m_len = oldoptlen + JUMBOOPTLEN;
 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
 			    oldoptlen);
 			optbuf = mtod(n, caddr_t) + oldoptlen;
 			m_freem(mopt);
 			mopt = exthdrs->ip6e_hbh = n;
 		} else {
 			optbuf = mtod(mopt, u_char *) + mopt->m_len;
 			mopt->m_len += JUMBOOPTLEN;
 		}
 		optbuf[0] = IP6OPT_PADN;
 		optbuf[1] = 1;
 
 		/*
 		 * Adjust the header length according to the pad and
 		 * the jumbo payload option.
 		 */
 		hbh = mtod(mopt, struct ip6_hbh *);
 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
 	}
 
 	/* fill in the option. */
 	optbuf[2] = IP6OPT_JUMBO;
 	optbuf[3] = 4;
 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
 	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
 
 	/* finally, adjust the packet header length */
 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
 
 	return (0);
 #undef JUMBOOPTLEN
 }
 
 /*
  * Insert fragment header and copy unfragmentable header portions.
  */
 static int
 ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
     struct ip6_frag **frghdrp)
 {
 	struct mbuf *n, *mlast;
 
 	if (hlen > sizeof(struct ip6_hdr)) {
 		n = m_copym(m0, sizeof(struct ip6_hdr),
 		    hlen - sizeof(struct ip6_hdr), M_NOWAIT);
 		if (n == NULL)
 			return (ENOBUFS);
 		m->m_next = n;
 	} else
 		n = m;
 
 	/* Search for the last mbuf of unfragmentable part. */
 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
 		;
 
 	if (M_WRITABLE(mlast) &&
 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
 		/* use the trailing space of the last mbuf for the fragment hdr */
 		*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
 		    mlast->m_len);
 		mlast->m_len += sizeof(struct ip6_frag);
 		m->m_pkthdr.len += sizeof(struct ip6_frag);
 	} else {
 		/* allocate a new mbuf for the fragment header */
 		struct mbuf *mfrg;
 
 		mfrg = m_get(M_NOWAIT, MT_DATA);
 		if (mfrg == NULL)
 			return (ENOBUFS);
 		mfrg->m_len = sizeof(struct ip6_frag);
 		*frghdrp = mtod(mfrg, struct ip6_frag *);
 		mlast->m_next = mfrg;
 	}
 
 	return (0);
 }
 
 /*
  * Calculates IPv6 path mtu for destination @dst.
  * Resulting MTU is stored in @mtup.
  *
  * Returns 0 on success.
  */
 static int
 ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup)
 {
 	struct nhop6_extended nh6;
 	struct in6_addr kdst;
 	uint32_t scopeid;
 	struct ifnet *ifp;
 	u_long mtu;
 	int error;
 
 	in6_splitscope(dst, &kdst, &scopeid);
 	if (fib6_lookup_nh_ext(fibnum, &kdst, scopeid, NHR_REF, 0, &nh6) != 0)
 		return (EHOSTUNREACH);
 
 	ifp = nh6.nh_ifp;
 	mtu = nh6.nh_mtu;
 
 	error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL, 0);
 	fib6_free_nh_ext(fibnum, &nh6);
 
 	return (error);
 }
 
 /*
  * Calculates IPv6 path MTU for @dst based on transmit @ifp,
  * and cached data in @ro_pmtu.
  * MTU from (successful) route lookup is saved (along with dst)
  * inside @ro_pmtu to avoid subsequent route lookups after packet
  * filter processing.
  *
  * Stores mtu and always-frag value into @mtup and @alwaysfragp.
  * Returns 0 on success.
  */
 static int
 ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup,
     struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup,
     int *alwaysfragp, u_int fibnum, u_int proto)
 {
 	struct nhop6_basic nh6;
 	struct in6_addr kdst;
 	uint32_t scopeid;
 	struct sockaddr_in6 *sa6_dst;
 	u_long mtu;
 
 	mtu = 0;
 	if (do_lookup) {
 
 		/*
 		 * Here ro_pmtu has final destination address, while
 		 * ro might represent immediate destination.
 		 * Use ro_pmtu destination since mtu might differ.
 		 */
 		sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst;
 		if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))
 			ro_pmtu->ro_mtu = 0;
 
 		if (ro_pmtu->ro_mtu == 0) {
 			bzero(sa6_dst, sizeof(*sa6_dst));
 			sa6_dst->sin6_family = AF_INET6;
 			sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
 			sa6_dst->sin6_addr = *dst;
 
 			in6_splitscope(dst, &kdst, &scopeid);
 			if (fib6_lookup_nh_basic(fibnum, &kdst, scopeid, 0, 0,
 			    &nh6) == 0)
 				ro_pmtu->ro_mtu = nh6.nh_mtu;
 		}
 
 		mtu = ro_pmtu->ro_mtu;
 	}
 
 	if (ro_pmtu->ro_rt)
 		mtu = ro_pmtu->ro_rt->rt_mtu;
 
 	return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto));
 }
 
 /*
  * Calculate MTU based on transmit @ifp, route mtu @rt_mtu and
  * hostcache data for @dst.
  * Stores mtu and always-frag value into @mtup and @alwaysfragp.
  *
  * Returns 0 on success.
  */
 static int
 ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu,
     u_long *mtup, int *alwaysfragp, u_int proto)
 {
 	u_long mtu = 0;
 	int alwaysfrag = 0;
 	int error = 0;
 
 	if (rt_mtu > 0) {
 		u_int32_t ifmtu;
 		struct in_conninfo inc;
 
 		bzero(&inc, sizeof(inc));
 		inc.inc_flags |= INC_ISIPV6;
 		inc.inc6_faddr = *dst;
 
 		ifmtu = IN6_LINKMTU(ifp);
 
 		/* TCP is known to react to pmtu changes so skip hc */
 		if (proto != IPPROTO_TCP)
 			mtu = tcp_hc_getmtu(&inc);
 
 		if (mtu)
 			mtu = min(mtu, rt_mtu);
 		else
 			mtu = rt_mtu;
 		if (mtu == 0)
 			mtu = ifmtu;
 		else if (mtu < IPV6_MMTU) {
 			/*
 			 * RFC2460 section 5, last paragraph:
 			 * if we record ICMPv6 too big message with
 			 * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
 			 * or smaller, with framgent header attached.
 			 * (fragment header is needed regardless from the
 			 * packet size, for translators to identify packets)
 			 */
 			alwaysfrag = 1;
 			mtu = IPV6_MMTU;
 		}
 	} else if (ifp) {
 		mtu = IN6_LINKMTU(ifp);
 	} else
 		error = EHOSTUNREACH; /* XXX */
 
 	*mtup = mtu;
 	if (alwaysfragp)
 		*alwaysfragp = alwaysfrag;
 	return (error);
 }
 
 /*
  * IP6 socket option processing.
  */
 int
 ip6_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int optdatalen, uproto;
 	void *optdata;
 	struct inpcb *in6p = sotoinpcb(so);
 	int error, optval;
 	int level, op, optname;
 	int optlen;
 	struct thread *td;
 #ifdef	RSS
 	uint32_t rss_bucket;
 	int retval;
 #endif
 
 	level = sopt->sopt_level;
 	op = sopt->sopt_dir;
 	optname = sopt->sopt_name;
 	optlen = sopt->sopt_valsize;
 	td = sopt->sopt_td;
 	error = 0;
 	optval = 0;
 	uproto = (int)so->so_proto->pr_protocol;
 
 	if (level != IPPROTO_IPV6) {
 		error = EINVAL;
 
 		if (sopt->sopt_level == SOL_SOCKET &&
 		    sopt->sopt_dir == SOPT_SET) {
 			switch (sopt->sopt_name) {
 			case SO_REUSEADDR:
 				INP_WLOCK(in6p);
 				if ((so->so_options & SO_REUSEADDR) != 0)
 					in6p->inp_flags2 |= INP_REUSEADDR;
 				else
 					in6p->inp_flags2 &= ~INP_REUSEADDR;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			case SO_REUSEPORT:
 				INP_WLOCK(in6p);
 				if ((so->so_options & SO_REUSEPORT) != 0)
 					in6p->inp_flags2 |= INP_REUSEPORT;
 				else
 					in6p->inp_flags2 &= ~INP_REUSEPORT;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			case SO_SETFIB:
 				INP_WLOCK(in6p);
 				in6p->inp_inc.inc_fibnum = so->so_fibnum;
 				INP_WUNLOCK(in6p);
 				error = 0;
 				break;
 			default:
 				break;
 			}
 		}
 	} else {		/* level == IPPROTO_IPV6 */
 		switch (op) {
 
 		case SOPT_SET:
 			switch (optname) {
 			case IPV6_2292PKTOPTIONS:
 #ifdef IPV6_PKTOPTIONS
 			case IPV6_PKTOPTIONS:
 #endif
 			{
 				struct mbuf *m;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				error = ip6_pcbopts(&in6p->in6p_outputopts,
 						    m, so, sopt);
 				m_freem(m); /* XXX */
 				break;
 			}
 
 			/*
 			 * Use of some Hop-by-Hop options or some
 			 * Destination options, might require special
 			 * privilege.  That is, normal applications
 			 * (without special privilege) might be forbidden
 			 * from setting certain options in outgoing packets,
 			 * and might never see certain options in received
 			 * packets. [RFC 2292 Section 6]
 			 * KAME specific note:
 			 *  KAME prevents non-privileged users from sending or
 			 *  receiving ANY hbh/dst options in order to avoid
 			 *  overhead of parsing options in the kernel.
 			 */
 			case IPV6_RECVHOPOPTS:
 			case IPV6_RECVDSTOPTS:
 			case IPV6_RECVRTHDRDSTOPTS:
 				if (td != NULL) {
 					error = priv_check(td,
 					    PRIV_NETINET_SETHDROPTS);
 					if (error)
 						break;
 				}
 				/* FALLTHROUGH */
 			case IPV6_UNICAST_HOPS:
 			case IPV6_HOPLIMIT:
 
 			case IPV6_RECVPKTINFO:
 			case IPV6_RECVHOPLIMIT:
 			case IPV6_RECVRTHDR:
 			case IPV6_RECVPATHMTU:
 			case IPV6_RECVTCLASS:
 			case IPV6_RECVFLOWID:
 #ifdef	RSS
 			case IPV6_RECVRSSBUCKETID:
 #endif
 			case IPV6_V6ONLY:
 			case IPV6_AUTOFLOWLABEL:
 			case IPV6_BINDANY:
 			case IPV6_BINDMULTI:
 #ifdef	RSS
 			case IPV6_RSS_LISTEN_BUCKET:
 #endif
 				if (optname == IPV6_BINDANY && td != NULL) {
 					error = priv_check(td,
 					    PRIV_NETINET_BINDANY);
 					if (error)
 						break;
 				}
 
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 
 				case IPV6_UNICAST_HOPS:
 					if (optval < -1 || optval >= 256)
 						error = EINVAL;
 					else {
 						/* -1 = kernel default */
 						in6p->in6p_hops = optval;
 						if ((in6p->inp_vflag &
 						     INP_IPV4) != 0)
 							in6p->inp_ip_ttl = optval;
 					}
 					break;
 #define OPTSET(bit) \
 do { \
 	INP_WLOCK(in6p); \
 	if (optval) \
 		in6p->inp_flags |= (bit); \
 	else \
 		in6p->inp_flags &= ~(bit); \
 	INP_WUNLOCK(in6p); \
 } while (/*CONSTCOND*/ 0)
 #define OPTSET2292(bit) \
 do { \
 	INP_WLOCK(in6p); \
 	in6p->inp_flags |= IN6P_RFC2292; \
 	if (optval) \
 		in6p->inp_flags |= (bit); \
 	else \
 		in6p->inp_flags &= ~(bit); \
 	INP_WUNLOCK(in6p); \
 } while (/*CONSTCOND*/ 0)
 #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0)
 
 #define OPTSET2(bit, val) do {						\
 	INP_WLOCK(in6p);						\
 	if (val)							\
 		in6p->inp_flags2 |= bit;				\
 	else								\
 		in6p->inp_flags2 &= ~bit;				\
 	INP_WUNLOCK(in6p);						\
 } while (0)
 #define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0)
 
 				case IPV6_RECVPKTINFO:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_PKTINFO);
 					break;
 
 				case IPV6_HOPLIMIT:
 				{
 					struct ip6_pktopts **optp;
 
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					optp = &in6p->in6p_outputopts;
 					error = ip6_pcbopt(IPV6_HOPLIMIT,
 					    (u_char *)&optval, sizeof(optval),
 					    optp, (td != NULL) ? td->td_ucred :
 					    NULL, uproto);
 					break;
 				}
 
 				case IPV6_RECVHOPLIMIT:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_HOPLIMIT);
 					break;
 
 				case IPV6_RECVHOPOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_HOPOPTS);
 					break;
 
 				case IPV6_RECVDSTOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_DSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDRDSTOPTS:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_RTHDRDSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDR:
 					/* cannot mix with RFC2292 */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_RTHDR);
 					break;
 
 				case IPV6_RECVPATHMTU:
 					/*
 					 * We ignore this option for TCP
 					 * sockets.
 					 * (RFC3542 leaves this case
 					 * unspecified.)
 					 */
 					if (uproto != IPPROTO_TCP)
 						OPTSET(IN6P_MTU);
 					break;
 
 				case IPV6_RECVFLOWID:
 					OPTSET2(INP_RECVFLOWID, optval);
 					break;
 
 #ifdef	RSS
 				case IPV6_RECVRSSBUCKETID:
 					OPTSET2(INP_RECVRSSBUCKETID, optval);
 					break;
 #endif
 
 				case IPV6_V6ONLY:
 					/*
 					 * make setsockopt(IPV6_V6ONLY)
 					 * available only prior to bind(2).
 					 * see ipng mailing list, Jun 22 2001.
 					 */
 					if (in6p->inp_lport ||
 					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_IPV6_V6ONLY);
 					if (optval)
 						in6p->inp_vflag &= ~INP_IPV4;
 					else
 						in6p->inp_vflag |= INP_IPV4;
 					break;
 				case IPV6_RECVTCLASS:
 					/* cannot mix with RFC2292 XXX */
 					if (OPTBIT(IN6P_RFC2292)) {
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_TCLASS);
 					break;
 				case IPV6_AUTOFLOWLABEL:
 					OPTSET(IN6P_AUTOFLOWLABEL);
 					break;
 
 				case IPV6_BINDANY:
 					OPTSET(INP_BINDANY);
 					break;
 
 				case IPV6_BINDMULTI:
 					OPTSET2(INP_BINDMULTI, optval);
 					break;
 #ifdef	RSS
 				case IPV6_RSS_LISTEN_BUCKET:
 					if ((optval >= 0) &&
 					    (optval < rss_getnumbuckets())) {
 						in6p->inp_rss_listen_bucket = optval;
 						OPTSET2(INP_RSS_BUCKET_SET, 1);
 					} else {
 						error = EINVAL;
 					}
 					break;
 #endif
 				}
 				break;
 
 			case IPV6_TCLASS:
 			case IPV6_DONTFRAG:
 			case IPV6_USE_MIN_MTU:
 			case IPV6_PREFER_TEMPADDR:
 				if (optlen != sizeof(optval)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				{
 					struct ip6_pktopts **optp;
 					optp = &in6p->in6p_outputopts;
 					error = ip6_pcbopt(optname,
 					    (u_char *)&optval, sizeof(optval),
 					    optp, (td != NULL) ? td->td_ucred :
 					    NULL, uproto);
 					break;
 				}
 
 			case IPV6_2292PKTINFO:
 			case IPV6_2292HOPLIMIT:
 			case IPV6_2292HOPOPTS:
 			case IPV6_2292DSTOPTS:
 			case IPV6_2292RTHDR:
 				/* RFC 2292 */
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 				case IPV6_2292PKTINFO:
 					OPTSET2292(IN6P_PKTINFO);
 					break;
 				case IPV6_2292HOPLIMIT:
 					OPTSET2292(IN6P_HOPLIMIT);
 					break;
 				case IPV6_2292HOPOPTS:
 					/*
 					 * Check super-user privilege.
 					 * See comments for IPV6_RECVHOPOPTS.
 					 */
 					if (td != NULL) {
 						error = priv_check(td,
 						    PRIV_NETINET_SETHDROPTS);
 						if (error)
 							return (error);
 					}
 					OPTSET2292(IN6P_HOPOPTS);
 					break;
 				case IPV6_2292DSTOPTS:
 					if (td != NULL) {
 						error = priv_check(td,
 						    PRIV_NETINET_SETHDROPTS);
 						if (error)
 							return (error);
 					}
 					OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
 					break;
 				case IPV6_2292RTHDR:
 					OPTSET2292(IN6P_RTHDR);
 					break;
 				}
 				break;
 			case IPV6_PKTINFO:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDRDSTOPTS:
 			case IPV6_NEXTHOP:
 			{
 				/* new advanced API (RFC3542) */
 				u_char *optbuf;
 				u_char optbuf_storage[MCLBYTES];
 				int optlen;
 				struct ip6_pktopts **optp;
 
 				/* cannot mix with RFC2292 */
 				if (OPTBIT(IN6P_RFC2292)) {
 					error = EINVAL;
 					break;
 				}
 
 				/*
 				 * We only ensure valsize is not too large
 				 * here.  Further validation will be done
 				 * later.
 				 */
 				error = sooptcopyin(sopt, optbuf_storage,
 				    sizeof(optbuf_storage), 0);
 				if (error)
 					break;
 				optlen = sopt->sopt_valsize;
 				optbuf = optbuf_storage;
 				optp = &in6p->in6p_outputopts;
 				error = ip6_pcbopt(optname, optbuf, optlen,
 				    optp, (td != NULL) ? td->td_ucred : NULL,
 				    uproto);
 				break;
 			}
 #undef OPTSET
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_JOIN_GROUP:
 			case IPV6_LEAVE_GROUP:
 			case IPV6_MSFILTER:
 			case MCAST_BLOCK_SOURCE:
 			case MCAST_UNBLOCK_SOURCE:
 			case MCAST_JOIN_GROUP:
 			case MCAST_LEAVE_GROUP:
 			case MCAST_JOIN_SOURCE_GROUP:
 			case MCAST_LEAVE_SOURCE_GROUP:
 				error = ip6_setmoptions(in6p, sopt);
 				break;
 
 			case IPV6_PORTRANGE:
 				error = sooptcopyin(sopt, &optval,
 				    sizeof optval, sizeof optval);
 				if (error)
 					break;
 
 				INP_WLOCK(in6p);
 				switch (optval) {
 				case IPV6_PORTRANGE_DEFAULT:
 					in6p->inp_flags &= ~(INP_LOWPORT);
 					in6p->inp_flags &= ~(INP_HIGHPORT);
 					break;
 
 				case IPV6_PORTRANGE_HIGH:
 					in6p->inp_flags &= ~(INP_LOWPORT);
 					in6p->inp_flags |= INP_HIGHPORT;
 					break;
 
 				case IPV6_PORTRANGE_LOW:
 					in6p->inp_flags &= ~(INP_HIGHPORT);
 					in6p->inp_flags |= INP_LOWPORT;
 					break;
 
 				default:
 					error = EINVAL;
 					break;
 				}
 				INP_WUNLOCK(in6p);
 				break;
 
 #ifdef IPSEC
 			case IPV6_IPSEC_POLICY:
 			{
 				caddr_t req;
 				struct mbuf *m;
 
 				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 					break;
 				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 					break;
 				req = mtod(m, caddr_t);
 				error = ipsec_set_policy(in6p, optname, req,
 				    m->m_len, (sopt->sopt_td != NULL) ?
 				    sopt->sopt_td->td_ucred : NULL);
 				m_freem(m);
 				break;
 			}
 #endif /* IPSEC */
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 
 		case SOPT_GET:
 			switch (optname) {
 
 			case IPV6_2292PKTOPTIONS:
 #ifdef IPV6_PKTOPTIONS
 			case IPV6_PKTOPTIONS:
 #endif
 				/*
 				 * RFC3542 (effectively) deprecated the
 				 * semantics of the 2292-style pktoptions.
 				 * Since it was not reliable in nature (i.e.,
 				 * applications had to expect the lack of some
 				 * information after all), it would make sense
 				 * to simplify this part by always returning
 				 * empty data.
 				 */
 				sopt->sopt_valsize = 0;
 				break;
 
 			case IPV6_RECVHOPOPTS:
 			case IPV6_RECVDSTOPTS:
 			case IPV6_RECVRTHDRDSTOPTS:
 			case IPV6_UNICAST_HOPS:
 			case IPV6_RECVPKTINFO:
 			case IPV6_RECVHOPLIMIT:
 			case IPV6_RECVRTHDR:
 			case IPV6_RECVPATHMTU:
 
 			case IPV6_V6ONLY:
 			case IPV6_PORTRANGE:
 			case IPV6_RECVTCLASS:
 			case IPV6_AUTOFLOWLABEL:
 			case IPV6_BINDANY:
 			case IPV6_FLOWID:
 			case IPV6_FLOWTYPE:
 			case IPV6_RECVFLOWID:
 #ifdef	RSS
 			case IPV6_RSSBUCKETID:
 			case IPV6_RECVRSSBUCKETID:
 #endif
 			case IPV6_BINDMULTI:
 				switch (optname) {
 
 				case IPV6_RECVHOPOPTS:
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 
 				case IPV6_RECVDSTOPTS:
 					optval = OPTBIT(IN6P_DSTOPTS);
 					break;
 
 				case IPV6_RECVRTHDRDSTOPTS:
 					optval = OPTBIT(IN6P_RTHDRDSTOPTS);
 					break;
 
 				case IPV6_UNICAST_HOPS:
 					optval = in6p->in6p_hops;
 					break;
 
 				case IPV6_RECVPKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 
 				case IPV6_RECVHOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 
 				case IPV6_RECVRTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 
 				case IPV6_RECVPATHMTU:
 					optval = OPTBIT(IN6P_MTU);
 					break;
 
 				case IPV6_V6ONLY:
 					optval = OPTBIT(IN6P_IPV6_V6ONLY);
 					break;
 
 				case IPV6_PORTRANGE:
 				    {
 					int flags;
 					flags = in6p->inp_flags;
 					if (flags & INP_HIGHPORT)
 						optval = IPV6_PORTRANGE_HIGH;
 					else if (flags & INP_LOWPORT)
 						optval = IPV6_PORTRANGE_LOW;
 					else
 						optval = 0;
 					break;
 				    }
 				case IPV6_RECVTCLASS:
 					optval = OPTBIT(IN6P_TCLASS);
 					break;
 
 				case IPV6_AUTOFLOWLABEL:
 					optval = OPTBIT(IN6P_AUTOFLOWLABEL);
 					break;
 
 				case IPV6_BINDANY:
 					optval = OPTBIT(INP_BINDANY);
 					break;
 
 				case IPV6_FLOWID:
 					optval = in6p->inp_flowid;
 					break;
 
 				case IPV6_FLOWTYPE:
 					optval = in6p->inp_flowtype;
 					break;
 
 				case IPV6_RECVFLOWID:
 					optval = OPTBIT2(INP_RECVFLOWID);
 					break;
 #ifdef	RSS
 				case IPV6_RSSBUCKETID:
 					retval =
 					    rss_hash2bucket(in6p->inp_flowid,
 					    in6p->inp_flowtype,
 					    &rss_bucket);
 					if (retval == 0)
 						optval = rss_bucket;
 					else
 						error = EINVAL;
 					break;
 
 				case IPV6_RECVRSSBUCKETID:
 					optval = OPTBIT2(INP_RECVRSSBUCKETID);
 					break;
 #endif
 
 				case IPV6_BINDMULTI:
 					optval = OPTBIT2(INP_BINDMULTI);
 					break;
 
 				}
 				if (error)
 					break;
 				error = sooptcopyout(sopt, &optval,
 					sizeof optval);
 				break;
 
 			case IPV6_PATHMTU:
 			{
 				u_long pmtu = 0;
 				struct ip6_mtuinfo mtuinfo;
 
 				if (!(so->so_state & SS_ISCONNECTED))
 					return (ENOTCONN);
 				/*
 				 * XXX: we dot not consider the case of source
 				 * routing, or optional information to specify
 				 * the outgoing interface.
 				 */
 				error = ip6_getpmtu_ctl(so->so_fibnum,
 				    &in6p->in6p_faddr, &pmtu);
 				if (error)
 					break;
 				if (pmtu > IPV6_MAXPACKET)
 					pmtu = IPV6_MAXPACKET;
 
 				bzero(&mtuinfo, sizeof(mtuinfo));
 				mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
 				optdata = (void *)&mtuinfo;
 				optdatalen = sizeof(mtuinfo);
 				error = sooptcopyout(sopt, optdata,
 				    optdatalen);
 				break;
 			}
 
 			case IPV6_2292PKTINFO:
 			case IPV6_2292HOPLIMIT:
 			case IPV6_2292HOPOPTS:
 			case IPV6_2292RTHDR:
 			case IPV6_2292DSTOPTS:
 				switch (optname) {
 				case IPV6_2292PKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 				case IPV6_2292HOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 				case IPV6_2292HOPOPTS:
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 				case IPV6_2292RTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 				case IPV6_2292DSTOPTS:
 					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
 					break;
 				}
 				error = sooptcopyout(sopt, &optval,
 				    sizeof optval);
 				break;
 			case IPV6_PKTINFO:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDRDSTOPTS:
 			case IPV6_NEXTHOP:
 			case IPV6_TCLASS:
 			case IPV6_DONTFRAG:
 			case IPV6_USE_MIN_MTU:
 			case IPV6_PREFER_TEMPADDR:
 				error = ip6_getpcbopt(in6p->in6p_outputopts,
 				    optname, sopt);
 				break;
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_MSFILTER:
 				error = ip6_getmoptions(in6p, sopt);
 				break;
 
 #ifdef IPSEC
 			case IPV6_IPSEC_POLICY:
 			  {
 				caddr_t req = NULL;
 				size_t len = 0;
 				struct mbuf *m = NULL;
 				struct mbuf **mp = &m;
 				size_t ovalsize = sopt->sopt_valsize;
 				caddr_t oval = (caddr_t)sopt->sopt_val;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				sopt->sopt_valsize = ovalsize;
 				sopt->sopt_val = oval;
 				if (m) {
 					req = mtod(m, caddr_t);
 					len = m->m_len;
 				}
 				error = ipsec_get_policy(in6p, req, len, mp);
 				if (error == 0)
 					error = soopt_mcopyout(sopt, m); /* XXX */
 				if (error == 0 && m)
 					m_freem(m);
 				break;
 			  }
 #endif /* IPSEC */
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 		}
 	}
 	return (error);
 }
 
 int
 ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int error = 0, optval, optlen;
 	const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
 	struct inpcb *in6p = sotoinpcb(so);
 	int level, op, optname;
 
 	level = sopt->sopt_level;
 	op = sopt->sopt_dir;
 	optname = sopt->sopt_name;
 	optlen = sopt->sopt_valsize;
 
 	if (level != IPPROTO_IPV6) {
 		return (EINVAL);
 	}
 
 	switch (optname) {
 	case IPV6_CHECKSUM:
 		/*
 		 * For ICMPv6 sockets, no modification allowed for checksum
 		 * offset, permit "no change" values to help existing apps.
 		 *
 		 * RFC3542 says: "An attempt to set IPV6_CHECKSUM
 		 * for an ICMPv6 socket will fail."
 		 * The current behavior does not meet RFC3542.
 		 */
 		switch (op) {
 		case SOPT_SET:
 			if (optlen != sizeof(int)) {
 				error = EINVAL;
 				break;
 			}
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 					    sizeof(optval));
 			if (error)
 				break;
 			if ((optval % 2) != 0) {
 				/* the API assumes even offset values */
 				error = EINVAL;
 			} else if (so->so_proto->pr_protocol ==
 			    IPPROTO_ICMPV6) {
 				if (optval != icmp6off)
 					error = EINVAL;
 			} else
 				in6p->in6p_cksum = optval;
 			break;
 
 		case SOPT_GET:
 			if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
 				optval = icmp6off;
 			else
 				optval = in6p->in6p_cksum;
 
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 
 	default:
 		error = ENOPROTOOPT;
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Set up IP6 options in pcb for insertion in output packets or
  * specifying behavior of outgoing packets.
  */
 static int
 ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
     struct socket *so, struct sockopt *sopt)
 {
 	struct ip6_pktopts *opt = *pktopt;
 	int error = 0;
 	struct thread *td = sopt->sopt_td;
 
 	/* turn off any old options. */
 	if (opt) {
 #ifdef DIAGNOSTIC
 		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
 		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
 		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			printf("ip6_pcbopts: all specified options are cleared.\n");
 #endif
 		ip6_clearpktopts(opt, -1);
 	} else
 		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
 	*pktopt = NULL;
 
 	if (!m || m->m_len == 0) {
 		/*
 		 * Only turning off any previous options, regardless of
 		 * whether the opt is just created or given.
 		 */
 		free(opt, M_IP6OPT);
 		return (0);
 	}
 
 	/*  set options specified by user. */
 	if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
 	    td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
 		ip6_clearpktopts(opt, -1); /* XXX: discard all options */
 		free(opt, M_IP6OPT);
 		return (error);
 	}
 	*pktopt = opt;
 	return (0);
 }
 
 /*
  * initialize ip6_pktopts.  beware that there are non-zero default values in
  * the struct.
  */
 void
 ip6_initpktopts(struct ip6_pktopts *opt)
 {
 
 	bzero(opt, sizeof(*opt));
 	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
 	opt->ip6po_tclass = -1;	/* -1 means default traffic class */
 	opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
 	opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
 }
 
 static int
 ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
     struct ucred *cred, int uproto)
 {
 	struct ip6_pktopts *opt;
 
 	if (*pktopt == NULL) {
 		*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
 		    M_WAITOK);
 		ip6_initpktopts(*pktopt);
 	}
 	opt = *pktopt;
 
 	return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
 }
 
 static int
 ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
 {
 	void *optdata = NULL;
 	int optdatalen = 0;
 	struct ip6_ext *ip6e;
 	int error = 0;
 	struct in6_pktinfo null_pktinfo;
 	int deftclass = 0, on;
 	int defminmtu = IP6PO_MINMTU_MCASTONLY;
 	int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
 
 	switch (optname) {
 	case IPV6_PKTINFO:
 		optdata = (void *)&null_pktinfo;
 		if (pktopt && pktopt->ip6po_pktinfo) {
 			bcopy(pktopt->ip6po_pktinfo, &null_pktinfo,
 			    sizeof(null_pktinfo));
 			in6_clearscope(&null_pktinfo.ipi6_addr);
 		} else {
 			/* XXX: we don't have to do this every time... */
 			bzero(&null_pktinfo, sizeof(null_pktinfo));
 		}
 		optdatalen = sizeof(struct in6_pktinfo);
 		break;
 	case IPV6_TCLASS:
 		if (pktopt && pktopt->ip6po_tclass >= 0)
 			optdata = (void *)&pktopt->ip6po_tclass;
 		else
 			optdata = (void *)&deftclass;
 		optdatalen = sizeof(int);
 		break;
 	case IPV6_HOPOPTS:
 		if (pktopt && pktopt->ip6po_hbh) {
 			optdata = (void *)pktopt->ip6po_hbh;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_RTHDR:
 		if (pktopt && pktopt->ip6po_rthdr) {
 			optdata = (void *)pktopt->ip6po_rthdr;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_RTHDRDSTOPTS:
 		if (pktopt && pktopt->ip6po_dest1) {
 			optdata = (void *)pktopt->ip6po_dest1;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_DSTOPTS:
 		if (pktopt && pktopt->ip6po_dest2) {
 			optdata = (void *)pktopt->ip6po_dest2;
 			ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
 			optdatalen = (ip6e->ip6e_len + 1) << 3;
 		}
 		break;
 	case IPV6_NEXTHOP:
 		if (pktopt && pktopt->ip6po_nexthop) {
 			optdata = (void *)pktopt->ip6po_nexthop;
 			optdatalen = pktopt->ip6po_nexthop->sa_len;
 		}
 		break;
 	case IPV6_USE_MIN_MTU:
 		if (pktopt)
 			optdata = (void *)&pktopt->ip6po_minmtu;
 		else
 			optdata = (void *)&defminmtu;
 		optdatalen = sizeof(int);
 		break;
 	case IPV6_DONTFRAG:
 		if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
 			on = 1;
 		else
 			on = 0;
 		optdata = (void *)&on;
 		optdatalen = sizeof(on);
 		break;
 	case IPV6_PREFER_TEMPADDR:
 		if (pktopt)
 			optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
 		else
 			optdata = (void *)&defpreftemp;
 		optdatalen = sizeof(int);
 		break;
 	default:		/* should not happen */
 #ifdef DIAGNOSTIC
 		panic("ip6_getpcbopt: unexpected option\n");
 #endif
 		return (ENOPROTOOPT);
 	}
 
 	error = sooptcopyout(sopt, optdata, optdatalen);
 
 	return (error);
 }
 
 void
 ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
 {
 	if (pktopt == NULL)
 		return;
 
 	if (optname == -1 || optname == IPV6_PKTINFO) {
 		if (pktopt->ip6po_pktinfo)
 			free(pktopt->ip6po_pktinfo, M_IP6OPT);
 		pktopt->ip6po_pktinfo = NULL;
 	}
 	if (optname == -1 || optname == IPV6_HOPLIMIT)
 		pktopt->ip6po_hlim = -1;
 	if (optname == -1 || optname == IPV6_TCLASS)
 		pktopt->ip6po_tclass = -1;
 	if (optname == -1 || optname == IPV6_NEXTHOP) {
 		if (pktopt->ip6po_nextroute.ro_rt) {
 			RTFREE(pktopt->ip6po_nextroute.ro_rt);
 			pktopt->ip6po_nextroute.ro_rt = NULL;
 		}
 		if (pktopt->ip6po_nexthop)
 			free(pktopt->ip6po_nexthop, M_IP6OPT);
 		pktopt->ip6po_nexthop = NULL;
 	}
 	if (optname == -1 || optname == IPV6_HOPOPTS) {
 		if (pktopt->ip6po_hbh)
 			free(pktopt->ip6po_hbh, M_IP6OPT);
 		pktopt->ip6po_hbh = NULL;
 	}
 	if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
 		if (pktopt->ip6po_dest1)
 			free(pktopt->ip6po_dest1, M_IP6OPT);
 		pktopt->ip6po_dest1 = NULL;
 	}
 	if (optname == -1 || optname == IPV6_RTHDR) {
 		if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
 		if (pktopt->ip6po_route.ro_rt) {
 			RTFREE(pktopt->ip6po_route.ro_rt);
 			pktopt->ip6po_route.ro_rt = NULL;
 		}
 	}
 	if (optname == -1 || optname == IPV6_DSTOPTS) {
 		if (pktopt->ip6po_dest2)
 			free(pktopt->ip6po_dest2, M_IP6OPT);
 		pktopt->ip6po_dest2 = NULL;
 	}
 }
 
 #define PKTOPT_EXTHDRCPY(type) \
 do {\
 	if (src->type) {\
 		int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
 		dst->type = malloc(hlen, M_IP6OPT, canwait);\
 		if (dst->type == NULL && canwait == M_NOWAIT)\
 			goto bad;\
 		bcopy(src->type, dst->type, hlen);\
 	}\
 } while (/*CONSTCOND*/ 0)
 
 static int
 copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
 {
 	if (dst == NULL || src == NULL)  {
 		printf("ip6_clearpktopts: invalid argument\n");
 		return (EINVAL);
 	}
 
 	dst->ip6po_hlim = src->ip6po_hlim;
 	dst->ip6po_tclass = src->ip6po_tclass;
 	dst->ip6po_flags = src->ip6po_flags;
 	dst->ip6po_minmtu = src->ip6po_minmtu;
 	dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr;
 	if (src->ip6po_pktinfo) {
 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
 		    M_IP6OPT, canwait);
 		if (dst->ip6po_pktinfo == NULL)
 			goto bad;
 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
 	}
 	if (src->ip6po_nexthop) {
 		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
 		    M_IP6OPT, canwait);
 		if (dst->ip6po_nexthop == NULL)
 			goto bad;
 		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
 		    src->ip6po_nexthop->sa_len);
 	}
 	PKTOPT_EXTHDRCPY(ip6po_hbh);
 	PKTOPT_EXTHDRCPY(ip6po_dest1);
 	PKTOPT_EXTHDRCPY(ip6po_dest2);
 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
 	return (0);
 
   bad:
 	ip6_clearpktopts(dst, -1);
 	return (ENOBUFS);
 }
 #undef PKTOPT_EXTHDRCPY
 
 struct ip6_pktopts *
 ip6_copypktopts(struct ip6_pktopts *src, int canwait)
 {
 	int error;
 	struct ip6_pktopts *dst;
 
 	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
 	if (dst == NULL)
 		return (NULL);
 	ip6_initpktopts(dst);
 
 	if ((error = copypktopts(dst, src, canwait)) != 0) {
 		free(dst, M_IP6OPT);
 		return (NULL);
 	}
 
 	return (dst);
 }
 
 void
 ip6_freepcbopts(struct ip6_pktopts *pktopt)
 {
 	if (pktopt == NULL)
 		return;
 
 	ip6_clearpktopts(pktopt, -1);
 
 	free(pktopt, M_IP6OPT);
 }
 
 /*
  * Set IPv6 outgoing packet options based on advanced API.
  */
 int
 ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
     struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
 {
 	struct cmsghdr *cm = NULL;
 
 	if (control == NULL || opt == NULL)
 		return (EINVAL);
 
 	ip6_initpktopts(opt);
 	if (stickyopt) {
 		int error;
 
 		/*
 		 * If stickyopt is provided, make a local copy of the options
 		 * for this particular packet, then override them by ancillary
 		 * objects.
 		 * XXX: copypktopts() does not copy the cached route to a next
 		 * hop (if any).  This is not very good in terms of efficiency,
 		 * but we can allow this since this option should be rarely
 		 * used.
 		 */
 		if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
 			return (error);
 	}
 
 	/*
 	 * XXX: Currently, we assume all the optional information is stored
 	 * in a single mbuf.
 	 */
 	if (control->m_next)
 		return (EINVAL);
 
 	for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
 	    control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 		int error;
 
 		if (control->m_len < CMSG_LEN(0))
 			return (EINVAL);
 
 		cm = mtod(control, struct cmsghdr *);
 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
 			return (EINVAL);
 		if (cm->cmsg_level != IPPROTO_IPV6)
 			continue;
 
 		error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
 		    cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
 		if (error)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Set a particular packet option, as a sticky option or an ancillary data
  * item.  "len" can be 0 only when it's a sticky option.
  * We have 4 cases of combination of "sticky" and "cmsg":
  * "sticky=0, cmsg=0": impossible
  * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
  * "sticky=1, cmsg=0": RFC3542 socket option
  * "sticky=1, cmsg=1": RFC2292 socket option
  */
 static int
 ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
     struct ucred *cred, int sticky, int cmsg, int uproto)
 {
 	int minmtupolicy, preftemp;
 	int error;
 
 	if (!sticky && !cmsg) {
 #ifdef DIAGNOSTIC
 		printf("ip6_setpktopt: impossible case\n");
 #endif
 		return (EINVAL);
 	}
 
 	/*
 	 * IPV6_2292xxx is for backward compatibility to RFC2292, and should
 	 * not be specified in the context of RFC3542.  Conversely,
 	 * RFC3542 types should not be specified in the context of RFC2292.
 	 */
 	if (!cmsg) {
 		switch (optname) {
 		case IPV6_2292PKTINFO:
 		case IPV6_2292HOPLIMIT:
 		case IPV6_2292NEXTHOP:
 		case IPV6_2292HOPOPTS:
 		case IPV6_2292DSTOPTS:
 		case IPV6_2292RTHDR:
 		case IPV6_2292PKTOPTIONS:
 			return (ENOPROTOOPT);
 		}
 	}
 	if (sticky && cmsg) {
 		switch (optname) {
 		case IPV6_PKTINFO:
 		case IPV6_HOPLIMIT:
 		case IPV6_NEXTHOP:
 		case IPV6_HOPOPTS:
 		case IPV6_DSTOPTS:
 		case IPV6_RTHDRDSTOPTS:
 		case IPV6_RTHDR:
 		case IPV6_USE_MIN_MTU:
 		case IPV6_DONTFRAG:
 		case IPV6_TCLASS:
 		case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
 			return (ENOPROTOOPT);
 		}
 	}
 
 	switch (optname) {
 	case IPV6_2292PKTINFO:
 	case IPV6_PKTINFO:
 	{
 		struct ifnet *ifp = NULL;
 		struct in6_pktinfo *pktinfo;
 
 		if (len != sizeof(struct in6_pktinfo))
 			return (EINVAL);
 
 		pktinfo = (struct in6_pktinfo *)buf;
 
 		/*
 		 * An application can clear any sticky IPV6_PKTINFO option by
 		 * doing a "regular" setsockopt with ipi6_addr being
 		 * in6addr_any and ipi6_ifindex being zero.
 		 * [RFC 3542, Section 6]
 		 */
 		if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
 		    pktinfo->ipi6_ifindex == 0 &&
 		    IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			ip6_clearpktopts(opt, optname);
 			break;
 		}
 
 		if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
 		    sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			return (EINVAL);
 		}
 		if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr))
 			return (EINVAL);
 		/* validate the interface index if specified. */
 		if (pktinfo->ipi6_ifindex > V_if_index)
 			 return (ENXIO);
 		if (pktinfo->ipi6_ifindex) {
 			ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
 			if (ifp == NULL)
 				return (ENXIO);
 		}
 		if (ifp != NULL && (ifp->if_afdata[AF_INET6] == NULL ||
 		    (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0))
 			return (ENETDOWN);
 
 		if (ifp != NULL &&
 		    !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
 			struct in6_ifaddr *ia;
 
 			in6_setscope(&pktinfo->ipi6_addr, ifp, NULL);
 			ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr);
 			if (ia == NULL)
 				return (EADDRNOTAVAIL);
 			ifa_free(&ia->ia_ifa);
 		}
 		/*
 		 * We store the address anyway, and let in6_selectsrc()
 		 * validate the specified address.  This is because ipi6_addr
 		 * may not have enough information about its scope zone, and
 		 * we may need additional information (such as outgoing
 		 * interface or the scope zone of a destination address) to
 		 * disambiguate the scope.
 		 * XXX: the delay of the validation may confuse the
 		 * application when it is used as a sticky option.
 		 */
 		if (opt->ip6po_pktinfo == NULL) {
 			opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
 			    M_IP6OPT, M_NOWAIT);
 			if (opt->ip6po_pktinfo == NULL)
 				return (ENOBUFS);
 		}
 		bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
 		break;
 	}
 
 	case IPV6_2292HOPLIMIT:
 	case IPV6_HOPLIMIT:
 	{
 		int *hlimp;
 
 		/*
 		 * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
 		 * to simplify the ordering among hoplimit options.
 		 */
 		if (optname == IPV6_HOPLIMIT && sticky)
 			return (ENOPROTOOPT);
 
 		if (len != sizeof(int))
 			return (EINVAL);
 		hlimp = (int *)buf;
 		if (*hlimp < -1 || *hlimp > 255)
 			return (EINVAL);
 
 		opt->ip6po_hlim = *hlimp;
 		break;
 	}
 
 	case IPV6_TCLASS:
 	{
 		int tclass;
 
 		if (len != sizeof(int))
 			return (EINVAL);
 		tclass = *(int *)buf;
 		if (tclass < -1 || tclass > 255)
 			return (EINVAL);
 
 		opt->ip6po_tclass = tclass;
 		break;
 	}
 
 	case IPV6_2292NEXTHOP:
 	case IPV6_NEXTHOP:
 		if (cred != NULL) {
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {	/* just remove the option */
 			ip6_clearpktopts(opt, IPV6_NEXTHOP);
 			break;
 		}
 
 		/* check if cmsg_len is large enough for sa_len */
 		if (len < sizeof(struct sockaddr) || len < *buf)
 			return (EINVAL);
 
 		switch (((struct sockaddr *)buf)->sa_family) {
 		case AF_INET6:
 		{
 			struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
 			int error;
 
 			if (sa6->sin6_len != sizeof(struct sockaddr_in6))
 				return (EINVAL);
 
 			if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
 			    IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
 				return (EINVAL);
 			}
 			if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
 			    != 0) {
 				return (error);
 			}
 			break;
 		}
 		case AF_LINK:	/* should eventually be supported */
 		default:
 			return (EAFNOSUPPORT);
 		}
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, IPV6_NEXTHOP);
 		opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_nexthop == NULL)
 			return (ENOBUFS);
 		bcopy(buf, opt->ip6po_nexthop, *buf);
 		break;
 
 	case IPV6_2292HOPOPTS:
 	case IPV6_HOPOPTS:
 	{
 		struct ip6_hbh *hbh;
 		int hbhlen;
 
 		/*
 		 * XXX: We don't allow a non-privileged user to set ANY HbH
 		 * options, since per-option restriction has too much
 		 * overhead.
 		 */
 		if (cred != NULL) {
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, IPV6_HOPOPTS);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_hbh))
 			return (EINVAL);
 		hbh = (struct ip6_hbh *)buf;
 		hbhlen = (hbh->ip6h_len + 1) << 3;
 		if (len != hbhlen)
 			return (EINVAL);
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, IPV6_HOPOPTS);
 		opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_hbh == NULL)
 			return (ENOBUFS);
 		bcopy(hbh, opt->ip6po_hbh, hbhlen);
 
 		break;
 	}
 
 	case IPV6_2292DSTOPTS:
 	case IPV6_DSTOPTS:
 	case IPV6_RTHDRDSTOPTS:
 	{
 		struct ip6_dest *dest, **newdest = NULL;
 		int destlen;
 
 		if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
 			error = priv_check_cred(cred,
 			    PRIV_NETINET_SETHDROPTS, 0);
 			if (error)
 				return (error);
 		}
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, optname);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_dest))
 			return (EINVAL);
 		dest = (struct ip6_dest *)buf;
 		destlen = (dest->ip6d_len + 1) << 3;
 		if (len != destlen)
 			return (EINVAL);
 
 		/*
 		 * Determine the position that the destination options header
 		 * should be inserted; before or after the routing header.
 		 */
 		switch (optname) {
 		case IPV6_2292DSTOPTS:
 			/*
 			 * The old advacned API is ambiguous on this point.
 			 * Our approach is to determine the position based
 			 * according to the existence of a routing header.
 			 * Note, however, that this depends on the order of the
 			 * extension headers in the ancillary data; the 1st
 			 * part of the destination options header must appear
 			 * before the routing header in the ancillary data,
 			 * too.
 			 * RFC3542 solved the ambiguity by introducing
 			 * separate ancillary data or option types.
 			 */
 			if (opt->ip6po_rthdr == NULL)
 				newdest = &opt->ip6po_dest1;
 			else
 				newdest = &opt->ip6po_dest2;
 			break;
 		case IPV6_RTHDRDSTOPTS:
 			newdest = &opt->ip6po_dest1;
 			break;
 		case IPV6_DSTOPTS:
 			newdest = &opt->ip6po_dest2;
 			break;
 		}
 
 		/* turn off the previous option, then set the new option. */
 		ip6_clearpktopts(opt, optname);
 		*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
 		if (*newdest == NULL)
 			return (ENOBUFS);
 		bcopy(dest, *newdest, destlen);
 
 		break;
 	}
 
 	case IPV6_2292RTHDR:
 	case IPV6_RTHDR:
 	{
 		struct ip6_rthdr *rth;
 		int rthlen;
 
 		if (len == 0) {
 			ip6_clearpktopts(opt, IPV6_RTHDR);
 			break;	/* just remove the option */
 		}
 
 		/* message length validation */
 		if (len < sizeof(struct ip6_rthdr))
 			return (EINVAL);
 		rth = (struct ip6_rthdr *)buf;
 		rthlen = (rth->ip6r_len + 1) << 3;
 		if (len != rthlen)
 			return (EINVAL);
 
 		switch (rth->ip6r_type) {
 		case IPV6_RTHDR_TYPE_0:
 			if (rth->ip6r_len == 0)	/* must contain one addr */
 				return (EINVAL);
 			if (rth->ip6r_len % 2) /* length must be even */
 				return (EINVAL);
 			if (rth->ip6r_len / 2 != rth->ip6r_segleft)
 				return (EINVAL);
 			break;
 		default:
 			return (EINVAL);	/* not supported */
 		}
 
 		/* turn off the previous option */
 		ip6_clearpktopts(opt, IPV6_RTHDR);
 		opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
 		if (opt->ip6po_rthdr == NULL)
 			return (ENOBUFS);
 		bcopy(rth, opt->ip6po_rthdr, rthlen);
 
 		break;
 	}
 
 	case IPV6_USE_MIN_MTU:
 		if (len != sizeof(int))
 			return (EINVAL);
 		minmtupolicy = *(int *)buf;
 		if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
 		    minmtupolicy != IP6PO_MINMTU_DISABLE &&
 		    minmtupolicy != IP6PO_MINMTU_ALL) {
 			return (EINVAL);
 		}
 		opt->ip6po_minmtu = minmtupolicy;
 		break;
 
 	case IPV6_DONTFRAG:
 		if (len != sizeof(int))
 			return (EINVAL);
 
 		if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
 			/*
 			 * we ignore this option for TCP sockets.
 			 * (RFC3542 leaves this case unspecified.)
 			 */
 			opt->ip6po_flags &= ~IP6PO_DONTFRAG;
 		} else
 			opt->ip6po_flags |= IP6PO_DONTFRAG;
 		break;
 
 	case IPV6_PREFER_TEMPADDR:
 		if (len != sizeof(int))
 			return (EINVAL);
 		preftemp = *(int *)buf;
 		if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
 		    preftemp != IP6PO_TEMPADDR_NOTPREFER &&
 		    preftemp != IP6PO_TEMPADDR_PREFER) {
 			return (EINVAL);
 		}
 		opt->ip6po_prefer_tempaddr = preftemp;
 		break;
 
 	default:
 		return (ENOPROTOOPT);
 	} /* end of switch */
 
 	return (0);
 }
 
 /*
  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be &loif -- easier than replicating that code here.
  */
 void
 ip6_mloopback(struct ifnet *ifp, struct mbuf *m)
 {
 	struct mbuf *copym;
 	struct ip6_hdr *ip6;
 
 	copym = m_copy(m, 0, M_COPYALL);
 	if (copym == NULL)
 		return;
 
 	/*
 	 * Make sure to deep-copy IPv6 header portion in case the data
 	 * is in an mbuf cluster, so that we can safely override the IPv6
 	 * header portion later.
 	 */
 	if (!M_WRITABLE(copym) ||
 	    copym->m_len < sizeof(struct ip6_hdr)) {
 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
 		if (copym == NULL)
 			return;
 	}
 	ip6 = mtod(copym, struct ip6_hdr *);
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 	if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) {
 		copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 |
 		    CSUM_PSEUDO_HDR;
 		copym->m_pkthdr.csum_data = 0xffff;
 	}
 	if_simloop(ifp, copym, AF_INET6, 0);
 }
 
 /*
  * Chop IPv6 header off from the payload.
  */
 static int
 ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
 {
 	struct mbuf *mh;
 	struct ip6_hdr *ip6;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (m->m_len > sizeof(*ip6)) {
 		mh = m_gethdr(M_NOWAIT, MT_DATA);
 		if (mh == NULL) {
 			m_freem(m);
 			return ENOBUFS;
 		}
 		m_move_pkthdr(mh, m);
 		M_ALIGN(mh, sizeof(*ip6));
 		m->m_len -= sizeof(*ip6);
 		m->m_data += sizeof(*ip6);
 		mh->m_next = m;
 		m = mh;
 		m->m_len = sizeof(*ip6);
 		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
 	}
 	exthdrs->ip6e_ip6 = m;
 	return 0;
 }
 
 /*
  * Compute IPv6 extension header length.
  */
 int
 ip6_optlen(struct inpcb *in6p)
 {
 	int len;
 
 	if (!in6p->in6p_outputopts)
 		return 0;
 
 	len = 0;
 #define elen(x) \
     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
 
 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
 	if (in6p->in6p_outputopts->ip6po_rthdr)
 		/* dest1 is valid with rthdr only */
 		len += elen(in6p->in6p_outputopts->ip6po_dest1);
 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
 	return len;
 #undef elen
 }
Index: user/alc/PQ_LAUNDRY/sys/riscv/conf/GENERIC
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/conf/GENERIC	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/conf/GENERIC	(revision 303667)
@@ -1,112 +1,113 @@
 #
 # GENERIC -- Generic kernel configuration file for FreeBSD/RISC-V
 #
 # For more information on this file, please read the config(5) manual page,
 # and/or the handbook section on Kernel Configuration Files:
 #
 #    http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
 #
 # The handbook is also available locally in /usr/share/doc/handbook
 # if you've installed the doc distribution, otherwise always see the
 # FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
 # latest information.
 #
 # An exhaustive list of options and more detailed explanations of the
 # device lines is also present in the ../../conf/NOTES and NOTES files.
 # If you are in doubt as to the purpose or necessity of a line, check first
 # in NOTES.
 #
 # $FreeBSD$
 
 cpu		RISCV
 ident		GENERIC
 
 makeoptions	DEBUG=-g		# Build kernel with gdb(1) debug symbols
 # makeoptions	WITH_CTF=1		# Run ctfconvert(1) for DTrace support
 
 # FIXME: linker error. "--relax and -r may not be used together"
 makeoptions	WITHOUT_MODULES="usb otusfw mwlfw ispfw mwlfw ralfw rtwnfw urtwnfw"
+# makeoptions	NO_MODULES
 
 options 	SCHED_ULE		# ULE scheduler
 options 	PREEMPTION		# Enable kernel thread preemption
 options 	INET			# InterNETworking
 options 	INET6			# IPv6 communications protocols
 options 	IPSEC			# IP (v4/v6) security
 options 	TCP_OFFLOAD		# TCP offload
 options 	SCTP			# Stream Control Transmission Protocol
 options 	FFS			# Berkeley Fast Filesystem
 options 	SOFTUPDATES		# Enable FFS soft updates support
 options 	UFS_ACL			# Support for access control lists
 options 	UFS_DIRHASH		# Improve performance on big directories
 options 	UFS_GJOURNAL		# Enable gjournal-based UFS journaling
 options 	QUOTA			# Enable disk quotas for UFS
-options 	MD_ROOT			# MD is a potential root device
 options 	NFSCL			# Network Filesystem Client
 options 	NFSD			# Network Filesystem Server
 options 	NFSLOCKD		# Network Lock Manager
 options 	NFS_ROOT		# NFS usable as /, requires NFSCL
 options 	MSDOSFS			# MSDOS Filesystem
 options 	CD9660			# ISO 9660 Filesystem
 options 	PROCFS			# Process filesystem (requires PSEUDOFS)
 options 	PSEUDOFS		# Pseudo-filesystem framework
 options 	GEOM_PART_GPT		# GUID Partition Tables.
 # options 	GEOM_RAID		# Soft RAID functionality.
 options 	GEOM_LABEL		# Provides labelization
 options 	SCSI_DELAY=5000		# Delay (in ms) before probing SCSI
 options 	KTRACE			# ktrace(1) support
 # options 	STACK			# stack(9) support
 options 	SYSVSHM			# SYSV-style shared memory
 options 	SYSVMSG			# SYSV-style message queues
 options 	SYSVSEM			# SYSV-style semaphores
 options 	_KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
 options 	PRINTF_BUFR_SIZE=128	# Prevent printf output being interspersed.
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 # options 	HWPMC_HOOKS		# Necessary kernel hooks for hwpmc(4)
 options 	AUDIT			# Security event auditing
 options 	CAPABILITY_MODE		# Capsicum capability mode
 options 	CAPABILITIES		# Capsicum capabilities
 options 	MAC			# TrustedBSD MAC Framework
 options 	KDTRACE_FRAME		# Ensure frames are compiled in
 options 	KDTRACE_HOOKS		# Kernel DTrace hooks
 # options 	VFP			# Floating-point support
 options 	RACCT			# Resource accounting framework
 options 	RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default
 options 	RCTL			# Resource limits
 options 	SMP
 
 # Uncomment for memory disk
 # options 	MD_ROOT
 # options 	MD_ROOT_SIZE=8192	# 8MB ram disk
 # makeoptions	MFS_IMAGE=/path/to/img
 # options 	ROOTDEVNAME=\"ufs:/dev/md0\"
 
 # Debugging support.  Always need this:
 options 	KDB			# Enable kernel debugger support.
 options 	KDB_TRACE		# Print a stack trace for a panic.
 # For full debugger support use (turn off in stable branch):
 options 	DDB			# Support DDB.
 # options 	GDB			# Support remote GDB.
 options 	DEADLKRES		# Enable the deadlock resolver
 options 	INVARIANTS		# Enable calls of extra sanity checking
 options 	INVARIANT_SUPPORT	# Extra sanity checks of internal structures, required by INVARIANTS
 # options 	WITNESS			# Enable checks to detect deadlocks and cycles
 # options 	WITNESS_SKIPSPIN	# Don't run witness on spinlocks for speed
 options 	MALLOC_DEBUG_MAXZONES=8	# Separate malloc(9) zones
 # options 	EARLY_PRINTF
+# options 	VERBOSE_SYSINIT
 
 # Pseudo devices.
 device		loop		# Network loopback
 device		random		# Entropy device
 device		ether		# Ethernet support
 device		vlan		# 802.1Q VLAN support
 device		tun		# Packet tunnel.
 device		md		# Memory "disks"
 device		gif		# IPv6 and IPv4 tunneling
 device		firmware	# firmware assist module
 
 # The `bpf' device enables the Berkeley Packet Filter.
 # Be aware of the administrative consequences of enabling this!
 # Note that 'bpf' is required for DHCP.
 device		bpf		# Berkeley packet filter
 
 options 	FDT
Index: user/alc/PQ_LAUNDRY/sys/riscv/htif/htif.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/htif/htif.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/htif/htif.c	(revision 303667)
@@ -1,283 +1,278 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #include <machine/bus.h>
 #include <machine/cpu.h>
 #include <machine/intr.h>
 #include <machine/asm.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 
 #include "htif.h"
 
 static struct resource_spec htif_spec[] = {
 	{ SYS_RES_IRQ,		0,	RF_ACTIVE | RF_SHAREABLE},
 	{ -1, 0 }
 };
 
 struct intr_entry {
 	void (*func) (void *, uint64_t);
 	void *arg;
 };
 
 struct intr_entry intrs[HTIF_NDEV];
 
 uint64_t
 htif_command(uint64_t arg)
 {
 
 	return (machine_command(ECALL_HTIF_CMD, arg));
 }
 
 int
 htif_setup_intr(int id, void *func, void *arg)
 {
 
 	if (id >= HTIF_NDEV)
 		return (-1);
 
 	intrs[id].func = func;
 	intrs[id].arg = arg;
 
 	return (0);
 }
 
 static void
 htif_handle_entry(struct htif_softc *sc)
 {
 	uint64_t entry;
 	uint8_t devcmd;
 	uint8_t devid;
 
 	entry = machine_command(ECALL_HTIF_GET_ENTRY, 0);
 	while (entry) {
 		devid = HTIF_DEV_ID(entry);
 		devcmd = HTIF_DEV_CMD(entry);
 
 		if (devcmd == HTIF_CMD_IDENTIFY) {
 			/* Enumeration interrupt */
 			if (devid == sc->identify_id)
 				sc->identify_done = 1;
 		} else {
 			/* Device interrupt */
 			if (intrs[devid].func != NULL)
 				intrs[devid].func(intrs[devid].arg, entry);
 		}
 
 		entry = machine_command(ECALL_HTIF_GET_ENTRY, 0);
 	}
 }
 
 static int
 htif_intr(void *arg)
 {
 	struct htif_softc *sc;
 
 	sc = arg;
 
 	csr_clear(sip, SIP_SSIP);
 
 	htif_handle_entry(sc);
 
 	return (FILTER_HANDLED);
 }
 
 static int
 htif_add_device(struct htif_softc *sc, int i, char *id, char *name)
 {
 	struct htif_dev_ivars *di;
 
 	di = malloc(sizeof(struct htif_dev_ivars), M_DEVBUF, M_WAITOK | M_ZERO);
 	di->sc = sc;
 	di->index = i;
 	di->id = malloc(HTIF_ID_LEN, M_DEVBUF, M_WAITOK | M_ZERO);
 	memcpy(di->id, id, HTIF_ID_LEN);
 
 	di->dev = device_add_child(sc->dev, name, -1);
 	device_set_ivars(di->dev, di);
 
 	return (0);
 }
 
 static int
 htif_enumerate(struct htif_softc *sc)
 {
 	char id[HTIF_ID_LEN] __aligned(HTIF_ALIGN);
 	uint64_t paddr;
 	uint64_t data;
 	uint64_t cmd;
 	int len;
 	int i;
 
 	device_printf(sc->dev, "Enumerating devices\n");
 
 	for (i = 0; i < HTIF_NDEV; i++) {
 		paddr = pmap_kextract((vm_offset_t)&id);
 		data = (paddr << IDENTIFY_PADDR_SHIFT);
 		data |= IDENTIFY_IDENT;
 
 		sc->identify_id = i;
 		sc->identify_done = 0;
 
 		cmd = i;
 		cmd <<= HTIF_DEV_ID_SHIFT;
 		cmd |= (HTIF_CMD_IDENTIFY << HTIF_CMD_SHIFT);
 		cmd |= data;
 
 		htif_command(cmd);
 
-		/* Do poll as interrupts are disabled yet */
-		while (sc->identify_done == 0) {
-			htif_handle_entry(sc);
-		}
-
 		len = strnlen(id, sizeof(id));
 		if (len <= 0)
 			break;
 
 		if (bootverbose)
 			printf(" %d %s\n", i, id);
 
 		if (strncmp(id, "disk", 4) == 0)
 			htif_add_device(sc, i, id, "htif_blk");
 		else if (strncmp(id, "bcd", 3) == 0)
 			htif_add_device(sc, i, id, "htif_console");
 		else if (strncmp(id, "syscall_proxy", 13) == 0)
 			htif_add_device(sc, i, id, "htif_syscall_proxy");
 	}
 
 	return (bus_generic_attach(sc->dev));
 }
 
 int
 htif_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
 {
 	struct htif_dev_ivars *ivars;
 
 	ivars = device_get_ivars(child);
 
 	switch (which) {
 	case HTIF_IVAR_INDEX:
                 *result = ivars->index;
 		break;
 	case HTIF_IVAR_ID:
 		*result = (uintptr_t)ivars->id;
 	default:
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 static int
 htif_probe(device_t dev)
 {
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
 	if (!ofw_bus_is_compatible(dev, "riscv,htif"))
 		return (ENXIO);
 
 	device_set_desc(dev, "HTIF bus device");
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
 htif_attach(device_t dev)
 {
 	struct htif_softc *sc;
 	int error;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	if (bus_alloc_resources(dev, htif_spec, sc->res)) {
 		device_printf(dev, "could not allocate resources\n");
 		return (ENXIO);
 	}
 
 	/* Setup IRQs handler */
 	error = bus_setup_intr(dev, sc->res[0], INTR_TYPE_CLK,
 	    htif_intr, NULL, sc, &sc->ihl[0]);
 	if (error) {
 		device_printf(dev, "Unable to alloc int resource.\n");
 		return (ENXIO);
 	}
 
 	csr_set(sie, SIE_SSIE);
 
 	return (htif_enumerate(sc));
 }
 
 static device_method_t htif_methods[] = {
 	DEVMETHOD(device_probe,		htif_probe),
 	DEVMETHOD(device_attach,	htif_attach),
 
 	/* Bus interface */
 	DEVMETHOD(bus_read_ivar,	htif_read_ivar),
 
 	DEVMETHOD_END
 };
 
 static driver_t htif_driver = {
 	"htif",
 	htif_methods,
 	sizeof(struct htif_softc)
 };
 
 static devclass_t htif_devclass;
 
 DRIVER_MODULE(htif, simplebus, htif_driver,
     htif_devclass, 0, 0);
Index: user/alc/PQ_LAUNDRY/sys/riscv/htif/htif_block.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/htif/htif_block.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/htif/htif_block.c	(revision 303667)
@@ -1,297 +1,299 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/selinfo.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/disk.h>
 #include <geom/geom_disk.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/md_var.h>
 #include <machine/bus.h>
 #include <machine/trap.h>
 #include <sys/rman.h>
 
 #include "htif.h"
 
 #define	SECTOR_SIZE_SHIFT	(9)
 #define	SECTOR_SIZE		(1 << SECTOR_SIZE_SHIFT)
 
 #define	HTIF_BLK_LOCK(_sc)	mtx_lock(&(_sc)->sc_mtx)
 #define	HTIF_BLK_UNLOCK(_sc)	mtx_unlock(&(_sc)->sc_mtx)
 #define	HTIF_BLK_LOCK_INIT(_sc) \
 	mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \
 	    "htif_blk", MTX_DEF)
 #define	HTIF_BLK_LOCK_DESTROY(_sc)	mtx_destroy(&_sc->sc_mtx);
 #define	HTIF_BLK_ASSERT_LOCKED(_sc)	mtx_assert(&_sc->sc_mtx, MA_OWNED);
 #define	HTIF_BLK_ASSERT_UNLOCKED(_sc)	mtx_assert(&_sc->sc_mtx, MA_NOTOWNED);
 
 static void htif_blk_task(void *arg);
 
 static disk_open_t	htif_blk_open;
 static disk_close_t	htif_blk_close;
 static disk_strategy_t	htif_blk_strategy;
 
 struct htif_blk_softc {
 	device_t	dev;
 	struct disk	*disk;
 	struct mtx	htif_io_mtx;
 	struct mtx	sc_mtx;
 	struct proc	*p;
 	struct bio_queue_head bio_queue;
 	int		running;
 	int		intr_chan;
 	int		cmd_done;
 	int		index;
 	uint16_t	curtag;
 };
 
 struct htif_blk_request {
 	uint64_t addr;
 	uint64_t offset;	/* offset in bytes */
 	uint64_t size;		/* length in bytes */
 	uint64_t tag;
 };
 
 static void
 htif_blk_intr(void *arg, uint64_t entry)
 {
 	struct htif_blk_softc *sc;
 	uint64_t devcmd;
 	uint64_t data;
 
 	sc = arg;
 
 	devcmd = HTIF_DEV_CMD(entry);
 	data = HTIF_DEV_DATA(entry);
 
 	if (sc->curtag == data) {
 		wmb();
 		sc->cmd_done = 1;
 		wakeup(&sc->intr_chan);
 	} else {
 		device_printf(sc->dev, "Unexpected tag %d (should be %d)\n",
 		    data, sc->curtag);
 	}
 }
 
 static int
 htif_blk_probe(device_t dev)
 {
 
 	return (0);
 }
 
 static int
 htif_blk_attach(device_t dev)
 {
 	struct htif_blk_softc *sc;
 	char prefix[] = " size=";
 	char *str;
 	long size;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	mtx_init(&sc->htif_io_mtx, device_get_nameunit(dev), "htif_blk", MTX_DEF);
 	HTIF_BLK_LOCK_INIT(sc);
 
 	str = strstr(htif_get_id(dev), prefix);
 
 	size = strtol((str + 6), NULL, 10);
 	if (size == 0) {
 		return (ENXIO);
 	}
 
 	sc->index = htif_get_index(dev);
 	if (sc->index < 0)
 		return (EINVAL);
 	htif_setup_intr(sc->index, htif_blk_intr, sc);
 
 	sc->disk = disk_alloc();
 	sc->disk->d_drv1 = sc;
 
 	sc->disk->d_maxsize = 4096; /* Max transfer */
 	sc->disk->d_name = "htif_blk";
 	sc->disk->d_open = htif_blk_open;
 	sc->disk->d_close = htif_blk_close;
 	sc->disk->d_strategy = htif_blk_strategy;
 	sc->disk->d_unit = 0;
 	sc->disk->d_sectorsize = SECTOR_SIZE;
 	sc->disk->d_mediasize = size;
 	disk_create(sc->disk, DISK_VERSION);
 
 	bioq_init(&sc->bio_queue);
 
 	sc->running = 1;
 
 	kproc_create(&htif_blk_task, sc, &sc->p, 0, 0, "%s: transfer", 
 	    device_get_nameunit(dev));
 
 	return (0);
 }
 
 static int
 htif_blk_open(struct disk *dp)
 {
 
 	return (0);
 }
 
 static int
 htif_blk_close(struct disk *dp)
 {
 
 	return (0);
 }
 
 static void
 htif_blk_task(void *arg)
 {
 	struct htif_blk_request req __aligned(HTIF_ALIGN);
 	struct htif_blk_softc *sc;
 	uint64_t req_paddr;
 	struct bio *bp;
 	uint64_t paddr;
+	uint64_t resp;
 	uint64_t cmd;
 	int i;
 
 	sc = (struct htif_blk_softc *)arg;
 
 	while (1) {
 		HTIF_BLK_LOCK(sc);
 		do {
 			bp = bioq_takefirst(&sc->bio_queue);
 			if (bp == NULL)
 				msleep(sc, &sc->sc_mtx, PRIBIO, "jobqueue", 0);
 		} while (bp == NULL);
 		HTIF_BLK_UNLOCK(sc);
 
 		if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
 			HTIF_BLK_LOCK(sc);
 
 			rmb();
 			req.offset = (bp->bio_pblkno * sc->disk->d_sectorsize);
 			req.size = bp->bio_bcount;
 			paddr = vtophys(bp->bio_data);
 			KASSERT(paddr != 0, ("paddr is 0"));
 			req.addr = paddr;
 			sc->curtag++;
 			req.tag = sc->curtag;
 
 			cmd = sc->index;
 			cmd <<= HTIF_DEV_ID_SHIFT;
 			if (bp->bio_cmd == BIO_READ)
 				cmd |= (HTIF_CMD_READ << HTIF_CMD_SHIFT);
 			else
 				cmd |= (HTIF_CMD_WRITE << HTIF_CMD_SHIFT);
 			req_paddr = vtophys(&req);
 			KASSERT(req_paddr != 0, ("req_paddr is 0"));
 			cmd |= req_paddr;
 
 			sc->cmd_done = 0;
-			htif_command(cmd);
+			resp = htif_command(cmd);
+			htif_blk_intr(sc, resp);
 
 			/* Wait for interrupt */
 			i = 0;
 			while (sc->cmd_done == 0) {
 				msleep(&sc->intr_chan, &sc->sc_mtx, PRIBIO, "intr", hz/2);
 
 				if (i++ > 2) {
 					/* TODO: try to re-issue operation on timeout ? */
 					bp->bio_error = EIO;
 					bp->bio_flags |= BIO_ERROR;
 					disk_err(bp, "hard error", -1, 1);
 					break;
 				}
 			}
 			HTIF_BLK_UNLOCK(sc);
 
 			biodone(bp);
 		} else {
 			printf("unknown op %d\n", bp->bio_cmd);
 		}
 	}
 }
 
 static void
 htif_blk_strategy(struct bio *bp)
 {
 	struct htif_blk_softc *sc;
 
 	sc = bp->bio_disk->d_drv1;
 
 	HTIF_BLK_LOCK(sc);
 	if (sc->running > 0) {
 		bioq_disksort(&sc->bio_queue, bp);
 		HTIF_BLK_UNLOCK(sc);
 		wakeup(sc);
 	} else {
 		HTIF_BLK_UNLOCK(sc);
 		biofinish(bp, NULL, ENXIO);
 	}
 }
 
 static device_method_t htif_blk_methods[] = {
 	DEVMETHOD(device_probe,		htif_blk_probe),
 	DEVMETHOD(device_attach,	htif_blk_attach),
 };
 
 static driver_t htif_blk_driver = {
 	"htif_blk",
 	htif_blk_methods,
 	sizeof(struct htif_blk_softc)
 };
 
 static devclass_t	htif_blk_devclass;
 
 DRIVER_MODULE(htif_blk, htif, htif_blk_driver, htif_blk_devclass, 0, 0);
Index: user/alc/PQ_LAUNDRY/sys/riscv/htif/htif_console.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/htif/htif_console.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/htif/htif_console.c	(revision 303667)
@@ -1,396 +1,349 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/priv.h>
 #include <sys/systm.h>
 #include <sys/types.h>
 #include <sys/conf.h>
 #include <sys/cons.h>
 #include <sys/consio.h>
 #include <sys/tty.h>
 #include <sys/bus.h>
 #include <sys/module.h>
 
 #include <machine/bus.h>
 #include <machine/trap.h>
 
 #include "htif.h"
 
 #include <dev/ofw/openfirm.h>
 
 #include <ddb/ddb.h>
 
-extern uint64_t console_intr;
-
 static tsw_outwakeup_t riscvtty_outwakeup;
 
 static struct ttydevsw riscv_ttydevsw = {
 	.tsw_flags	= TF_NOPREFIX,
 	.tsw_outwakeup	= riscvtty_outwakeup,
 };
 
 static int			polltime;
 static struct callout		riscv_callout;
 static struct tty 		*tp = NULL;
 
 #if defined(KDB)
 static int			alt_break_state;
 #endif
 
 static void	riscv_timeout(void *);
 
 static cn_probe_t	riscv_cnprobe;
 static cn_init_t	riscv_cninit;
 static cn_term_t	riscv_cnterm;
 static cn_getc_t	riscv_cngetc;
 static cn_putc_t	riscv_cnputc;
 static cn_grab_t	riscv_cngrab;
 static cn_ungrab_t	riscv_cnungrab;
 
 CONSOLE_DRIVER(riscv);
 
 #define	MAX_BURST_LEN		1
 #define	QUEUE_SIZE		256
 #define	CONSOLE_DEFAULT_ID	1ul
 #define	SPIN_IN_MACHINE_MODE	1
 
 struct queue_entry {
 	uint64_t data;
 	uint64_t used;
 	struct queue_entry *next;
 };
 
 struct queue_entry cnqueue[QUEUE_SIZE];
 struct queue_entry *entry_last;
 struct queue_entry *entry_served;
 
 static void
-htif_putc(int c)
+riscv_putc(int c)
 {
 	uint64_t cmd;
 
 	cmd = (HTIF_CMD_WRITE << HTIF_CMD_SHIFT);
 	cmd |= (CONSOLE_DEFAULT_ID << HTIF_DEV_ID_SHIFT);
 	cmd |= c;
 
-#ifdef SPIN_IN_MACHINE_MODE
-	machine_command(ECALL_HTIF_LOWPUTC, cmd);
-#else
-	htif_command(cmd);
-#endif
-
+	machine_command(ECALL_HTIF_CMD, cmd);
 }
 
-static uint8_t
-htif_getc(void)
-{
-	uint64_t cmd;
-	uint8_t res;
-
-	cmd = (HTIF_CMD_READ << HTIF_CMD_SHIFT);
-	cmd |= (CONSOLE_DEFAULT_ID << HTIF_DEV_ID_SHIFT);
-
-	res = htif_command(cmd);
-
-	return (res);
-}
-
-static void
-riscv_putc(int c)
-{
-	uint64_t counter;
-	uint64_t *cc;
-	uint64_t val;
-
-	val = 0;
-	counter = 0;
-
-	cc = (uint64_t*)&console_intr;
-	*cc = 0;
-
-	htif_putc(c);
-
-#ifndef SPIN_IN_MACHINE_MODE
-	/* Wait for an interrupt */
-	__asm __volatile(
-		"li	%0, 1\n"	/* counter = 1 */
-		"slli	%0, %0, 12\n"	/* counter <<= 12 */
-	"1:"
-		"addi	%0, %0, -1\n"	/* counter -= 1 */
-		"beqz	%0, 2f\n"	/* counter == 0 ? finish */
-		"ld	%1, 0(%2)\n"	/* val = *cc */
-		"beqz	%1, 1b\n"	/* val == 0 ? repeat */
-	"2:"
-		: "=&r"(counter), "=&r"(val) : "r"(cc)
-	);
-#endif
-}
-
 #ifdef EARLY_PRINTF
 early_putc_t *early_putc = riscv_putc;
 #endif
 
 static void
 cn_drvinit(void *unused)
 {
 
 	if (riscv_consdev.cn_pri != CN_DEAD &&
 	    riscv_consdev.cn_name[0] != '\0') {
 		tp = tty_alloc(&riscv_ttydevsw, NULL);
 		tty_init_console(tp, 0);
 		tty_makedev(tp, NULL, "%s", "rcons");
 
 		polltime = 1;
 
 		callout_init(&riscv_callout, 1);
 		callout_reset(&riscv_callout, polltime, riscv_timeout, NULL);
 	}
 }
 
 SYSINIT(cndev, SI_SUB_CONFIGURE, SI_ORDER_MIDDLE, cn_drvinit, NULL);
 
 static void
 riscvtty_outwakeup(struct tty *tp)
 {
 	u_char buf[MAX_BURST_LEN];
 	int len;
 	int i;
 
 	for (;;) {
 		len = ttydisc_getc(tp, buf, sizeof(buf));
 		if (len == 0)
 			break;
 
 		KASSERT(len == 1, ("tty error"));
 
 		for (i = 0; i < len; i++)
 			riscv_putc(buf[i]);
 	}
 }
 
 static void
 riscv_timeout(void *v)
 {
 	int c;
 
 	tty_lock(tp);
 	while ((c = riscv_cngetc(NULL)) != -1)
 		ttydisc_rint(tp, c, 0);
 	ttydisc_rint_done(tp);
 	tty_unlock(tp);
 
 	callout_reset(&riscv_callout, polltime, riscv_timeout, NULL);
 }
 
 static void
 riscv_cnprobe(struct consdev *cp)
 {
 
 	cp->cn_pri = CN_NORMAL;
 }
 
 static void
 riscv_cninit(struct consdev *cp)
 {
 	int i;
 
 	strcpy(cp->cn_name, "rcons");
 
 	for (i = 0; i < QUEUE_SIZE; i++) {
 		if (i == (QUEUE_SIZE - 1))
 			cnqueue[i].next = &cnqueue[0];
 		else
 			cnqueue[i].next = &cnqueue[i+1];
 		cnqueue[i].data = 0;
 		cnqueue[i].used = 0;
 	}
 
 	entry_last = &cnqueue[0];
 	entry_served = &cnqueue[0];
 }
 
 static void
 riscv_cnterm(struct consdev *cp)
 {
 
 }
 
 static void
 riscv_cngrab(struct consdev *cp)
 {
 
 }
 
 static void
 riscv_cnungrab(struct consdev *cp)
 {
 
 }
 
 static int
 riscv_cngetc(struct consdev *cp)
 {
 #if defined(KDB)
 	uint64_t devcmd;
 	uint64_t entry;
 	uint64_t devid;
 #endif
+	uint64_t cmd;
 	uint8_t data;
 	int ch;
 
-	htif_getc();
+	cmd = (HTIF_CMD_READ << HTIF_CMD_SHIFT);
+	cmd |= (CONSOLE_DEFAULT_ID << HTIF_DEV_ID_SHIFT);
 
+	machine_command(ECALL_HTIF_CMD_REQ, cmd);
+
 #if defined(KDB)
 	if (kdb_active) {
-		entry = machine_command(ECALL_HTIF_GET_ENTRY, 0);
+
+		entry = machine_command(ECALL_HTIF_CMD_RESP, 0);
 		while (entry) {
 			devid = HTIF_DEV_ID(entry);
 			devcmd = HTIF_DEV_CMD(entry);
 			data = HTIF_DEV_DATA(entry);
 
 			if (devid == CONSOLE_DEFAULT_ID && devcmd == 0) {
 				entry_last->data = data;
 				entry_last->used = 1;
 				entry_last = entry_last->next;
 			} else {
 				printf("Lost interrupt: devid %d\n",
 				    devid);
 			}
 
-			entry = machine_command(ECALL_HTIF_GET_ENTRY, 0);
+			entry = machine_command(ECALL_HTIF_CMD_RESP, 0);
 		}
 	}
 #endif
 
 	if (entry_served->used == 1) {
 		data = entry_served->data;
 		entry_served->used = 0;
 		entry_served = entry_served->next;
 		ch = (data & 0xff);
 		if (ch > 0 && ch < 0xff) {
 #if defined(KDB)
 			kdb_alt_break(ch, &alt_break_state);
 #endif
 			return (ch);
 		}
 	}
 
 	return (-1);
 }
 
 static void
 riscv_cnputc(struct consdev *cp, int c)
 {
 
 	riscv_putc(c);
 }
 
 /*
  * Bus interface.
  */
 
 struct htif_console_softc {
 	device_t	dev;
 	int		running;
 	int		intr_chan;
 	int		cmd_done;
 	int		curtag;
 	int		index;
 };
 
 static void
 htif_console_intr(void *arg, uint64_t entry)
 {
 	struct htif_console_softc *sc;
 	uint8_t devcmd;
 	uint64_t data;
 
 	sc = arg;
 
 	devcmd = HTIF_DEV_CMD(entry);
 	data = HTIF_DEV_DATA(entry);
 
 	if (devcmd == 0) {
 		entry_last->data = data;
 		entry_last->used = 1;
 		entry_last = entry_last->next;
 	}
 }
 
 static int
 htif_console_probe(device_t dev)
 {
 
 	return (0);
 }
 
 static int
 htif_console_attach(device_t dev)
 {
 	struct htif_console_softc *sc;
 
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	sc->index = htif_get_index(dev);
 	if (sc->index < 0)
 		return (EINVAL);
 
 	htif_setup_intr(sc->index, htif_console_intr, sc);
 
 	return (0);
 }
 
 static device_method_t htif_console_methods[] = {
 	DEVMETHOD(device_probe,		htif_console_probe),
 	DEVMETHOD(device_attach,	htif_console_attach),
 	DEVMETHOD_END
 };
 
 static driver_t htif_console_driver = {
 	"htif_console",
 	htif_console_methods,
 	sizeof(struct htif_console_softc)
 };
 
 static devclass_t htif_console_devclass;
 
 DRIVER_MODULE(htif_console, htif, htif_console_driver,
     htif_console_devclass, 0, 0);
Index: user/alc/PQ_LAUNDRY/sys/riscv/include/cpu.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/include/cpu.h	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/include/cpu.h	(revision 303667)
@@ -1,95 +1,94 @@
 /*-
- * Copyright (c) 2015 Ruslan Bukin <br@bsdpad.com>
+ * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_CPU_H_
 #define	_MACHINE_CPU_H_
 
 #include <machine/atomic.h>
 #include <machine/frame.h>
 
 #define	TRAPF_PC(tfp)		((tfp)->tf_ra)
 #define	TRAPF_USERMODE(tfp)	(((tfp)->tf_sepc & (1ul << 63)) == 0)
 
 #define	cpu_getstack(td)	((td)->td_frame->tf_sp)
 #define	cpu_setstack(td, sp)	((td)->td_frame->tf_sp = (sp))
 #define	cpu_spinwait()		/* nothing */
 
 #ifdef _KERNEL
 
 /*
  * 0x0000         CPU ID unimplemented
  * 0x0001         UC Berkeley Rocket repo
  * 0x0002­0x7FFE  Reserved for open-source repos
  * 0x7FFF         Reserved for extension
  * 0x8000         Reserved for anonymous source
  * 0x8001­0xFFFE  Reserved for proprietary implementations
  * 0xFFFF         Reserved for extension
  */
 
 #define	CPU_IMPL_SHIFT		0
 #define	CPU_IMPL_MASK		(0xffff << CPU_IMPL_SHIFT)
 #define	CPU_IMPL(mimpid)	((mimpid & CPU_IMPL_MASK) >> CPU_IMPL_SHIFT)
 #define	CPU_IMPL_UNIMPLEMEN	0x0
 #define	CPU_IMPL_UCB_ROCKET	0x1
 
 #define	CPU_PART_SHIFT		62
 #define	CPU_PART_MASK		(0x3ul << CPU_PART_SHIFT)
-#define	CPU_PART(mcpuid)	((mcpuid & CPU_PART_MASK) >> CPU_PART_SHIFT)
-#define	CPU_PART_RV32I		0x0
-#define	CPU_PART_RV32E		0x1
-#define	CPU_PART_RV64I		0x2
-#define	CPU_PART_RV128I		0x3
+#define	CPU_PART(misa)		((misa & CPU_PART_MASK) >> CPU_PART_SHIFT)
+#define	CPU_PART_RV32		0x1
+#define	CPU_PART_RV64		0x2
+#define	CPU_PART_RV128		0x3
 
 extern char btext[];
 extern char etext[];
 
 void	cpu_halt(void) __dead2;
 void	cpu_reset(void) __dead2;
 void	fork_trampoline(void);
 void	identify_cpu(void);
 void	swi_vm(void *v);
 
 static __inline uint64_t
 get_cyclecount(void)
 {
 
 	/* TODO: This is bogus */
 	return (1);
 }
 
 #endif
 
 #endif /* !_MACHINE_CPU_H_ */
Index: user/alc/PQ_LAUNDRY/sys/riscv/include/cpufunc.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/include/cpufunc.h	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/include/cpufunc.h	(revision 303667)
@@ -1,123 +1,124 @@
 /*-
- * Copyright (c) 2015 Ruslan Bukin <br@bsdpad.com>
+ * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_CPUFUNC_H_
 #define	_MACHINE_CPUFUNC_H_
 
 #ifdef _KERNEL
 
 #include <machine/riscvreg.h>
 
 static __inline void
 breakpoint(void)
 {
 
 	__asm("ebreak");
 }
 
 static __inline register_t
 intr_disable(void)
 {
 	uint64_t ret;
 
 	__asm __volatile(
-		"csrrci %0, sstatus, 1"
-		: "=&r" (ret)
+		"csrrci %0, sstatus, %1"
+		: "=&r" (ret) : "i" (SSTATUS_SIE)
 	);
 
-	return (ret & SSTATUS_IE);
+	return (ret & (SSTATUS_SIE));
 }
 
 static __inline void
 intr_restore(register_t s)
 {
 
 	__asm __volatile(
 		"csrs sstatus, %0"
 		:: "r" (s)
 	);
 }
 
 static __inline void
 intr_enable(void)
 {
 
 	__asm __volatile(
-		"csrsi sstatus, 1"
+		"csrsi sstatus, %0"
+		:: "i" (SSTATUS_SIE)
 	);
 }
 
 static __inline register_t
 machine_command(uint64_t cmd, uint64_t arg)
 {
 	uint64_t res;
 
 	__asm __volatile(
 		"mv	t5, %2\n"
 		"mv	t6, %1\n"
 		"ecall\n"
 		"mv	%0, t6" : "=&r"(res) : "r"(arg), "r"(cmd)
 	);
 
 	return (res);
 }
 
 #define	cpu_nullop()			riscv_nullop()
 #define	cpufunc_nullop()		riscv_nullop()
 #define	cpu_setttb(a)			riscv_setttb(a)
 
 #define	cpu_tlb_flushID()		riscv_tlb_flushID()
 #define	cpu_tlb_flushID_SE(e)		riscv_tlb_flushID_SE(e)
 
 #define	cpu_dcache_wbinv_range(a, s)	riscv_dcache_wbinv_range((a), (s))
 #define	cpu_dcache_inv_range(a, s)	riscv_dcache_inv_range((a), (s))
 #define	cpu_dcache_wb_range(a, s)	riscv_dcache_wb_range((a), (s))
 
 #define	cpu_idcache_wbinv_range(a, s)	riscv_idcache_wbinv_range((a), (s))
 #define	cpu_icache_sync_range(a, s)	riscv_icache_sync_range((a), (s))
 
 void riscv_nullop(void);
 void riscv_setttb(vm_offset_t);
 void riscv_tlb_flushID(void);
 void riscv_tlb_flushID_SE(vm_offset_t);
 void riscv_icache_sync_range(vm_offset_t, vm_size_t);
 void riscv_idcache_wbinv_range(vm_offset_t, vm_size_t);
 void riscv_dcache_wbinv_range(vm_offset_t, vm_size_t);
 void riscv_dcache_inv_range(vm_offset_t, vm_size_t);
 void riscv_dcache_wb_range(vm_offset_t, vm_size_t);
 
 #endif	/* _KERNEL */
 #endif	/* _MACHINE_CPUFUNC_H_ */
Index: user/alc/PQ_LAUNDRY/sys/riscv/include/db_machdep.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/include/db_machdep.h	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/include/db_machdep.h	(revision 303667)
@@ -1,91 +1,91 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_DB_MACHDEP_H_
 #define	_MACHINE_DB_MACHDEP_H_
 
 #include <machine/riscvreg.h>
 #include <machine/frame.h>
 #include <machine/trap.h>
 
-#define	T_BREAKPOINT	(EXCP_INSTR_BREAKPOINT)
+#define	T_BREAKPOINT	(EXCP_BREAKPOINT)
 #define	T_WATCHPOINT	(0)
 
 typedef vm_offset_t	db_addr_t;
 typedef long		db_expr_t;
 
 #define	PC_REGS()	((db_addr_t)kdb_thrctx->pcb_sepc)
 
 #define	BKPT_INST	(0x00100073)
 #define	BKPT_SIZE	(INSN_SIZE)
 #define	BKPT_SET(inst)	(BKPT_INST)
 
 #define	BKPT_SKIP do {				\
 	kdb_frame->tf_sepc += BKPT_SIZE;	\
 } while (0)
 
 #define	db_clear_single_step	kdb_cpu_clear_singlestep
 #define	db_set_single_step	kdb_cpu_set_singlestep
 
 #define	IS_BREAKPOINT_TRAP(type, code)	(type == T_BREAKPOINT)
 #define	IS_WATCHPOINT_TRAP(type, code)	(type == T_WATCHPOINT)
 
 #define	inst_trap_return(ins)	(ins == 0x10000073)	/* eret */
 #define	inst_return(ins)	(ins == 0x00008067)	/* ret */
 #define	inst_call(ins)		(((ins) & 0x7f) == 111 || \
 				 ((ins) & 0x7f) == 103) /* jal, jalr */
 
 #define	inst_load(ins) ({							\
 	uint32_t tmp_instr = db_get_value(PC_REGS(), sizeof(uint32_t), FALSE);	\
 	is_load_instr(tmp_instr);						\
 })
 
 #define	inst_store(ins) ({							\
 	uint32_t tmp_instr = db_get_value(PC_REGS(), sizeof(uint32_t), FALSE);	\
 	is_store_instr(tmp_instr);						\
 })
 
 #define	is_load_instr(ins)	(((ins) & 0x7f) == 3)
 #define	is_store_instr(ins)	(((ins) & 0x7f) == 35)
 
 #define	next_instr_address(pc, bd)	((bd) ? (pc) : ((pc) + 4))
 
 #define	DB_SMALL_VALUE_MAX	(0x7fffffff)
 #define	DB_SMALL_VALUE_MIN	(-0x40001)
 
 #define	DB_ELFSIZE		64
 
 #endif /* !_MACHINE_DB_MACHDEP_H_ */
Index: user/alc/PQ_LAUNDRY/sys/riscv/include/intr.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/include/intr.h	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/include/intr.h	(revision 303667)
@@ -1,68 +1,80 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_INTR_MACHDEP_H_
 #define	_MACHINE_INTR_MACHDEP_H_
 
 struct trapframe;
 
 void riscv_init_interrupts(void);
 int riscv_teardown_intr(void *);
 int riscv_config_intr(u_int, enum intr_trigger, enum intr_polarity);
 int riscv_setup_intr(const char *, driver_filter_t *, driver_intr_t *,
     void *, int, int, void **);
 void riscv_cpu_intr(struct trapframe *);
 
 typedef unsigned long * riscv_intrcnt_t;
 
 riscv_intrcnt_t riscv_intrcnt_create(const char *);
 void riscv_intrcnt_setname(riscv_intrcnt_t, const char *);
 
 #ifdef SMP
 void riscv_setup_ipihandler(driver_filter_t *);
 void riscv_unmask_ipi(void);
 #endif
 
 enum {
-	IRQ_SOFTWARE,
-	IRQ_TIMER,
-	IRQ_HTIF,
+	IRQ_SOFTWARE_USER,
+	IRQ_SOFTWARE_SUPERVISOR,
+	IRQ_SOFTWARE_HYPERVISOR,
+	IRQ_SOFTWARE_MACHINE,
+	IRQ_TIMER_USER,
+	IRQ_TIMER_SUPERVISOR,
+	IRQ_TIMER_HYPERVISOR,
+	IRQ_TIMER_MACHINE,
+	IRQ_EXTERNAL_USER,
+	IRQ_EXTERNAL_SUPERVISOR,
+	IRQ_EXTERNAL_HYPERVISOR,
+	IRQ_EXTERNAL_MACHINE,
+#if 0
+	/* lowRISC TODO */
 	IRQ_COP,	/* lowRISC only */
 	IRQ_UART,	/* lowRISC only */
+#endif
 	NIRQS
 };
 
 #endif /* !_MACHINE_INTR_MACHDEP_H_ */
Index: user/alc/PQ_LAUNDRY/sys/riscv/include/pte.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/include/pte.h	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/include/pte.h	(revision 303667)
@@ -1,101 +1,90 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
- * Copyright (c) 2015 Ruslan Bukin <br@bsdpad.com>
+ * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_PTE_H_
 #define	_MACHINE_PTE_H_
 
 #ifndef LOCORE
 typedef	uint64_t	pd_entry_t;		/* page directory entry */
 typedef	uint64_t	pt_entry_t;		/* page table entry */
 typedef	uint64_t	pn_t;			/* page number */
 #endif
 
 /* Level 0 table, 512GiB per entry */
 #define	L0_SHIFT	39
 
 /* Level 1 table, 1GiB per entry */
 #define	L1_SHIFT	30
 #define	L1_SIZE 	(1 << L1_SHIFT)
 #define	L1_OFFSET 	(L1_SIZE - 1)
 
 /* Level 2 table, 2MiB per entry */
 #define	L2_SHIFT	21
 #define	L2_SIZE 	(1 << L2_SHIFT)
 #define	L2_OFFSET 	(L2_SIZE - 1)
 
 /* Level 3 table, 4KiB per entry */
 #define	L3_SHIFT	12
 #define	L3_SIZE 	(1 << L3_SHIFT)
 #define	L3_OFFSET 	(L3_SIZE - 1)
 
 #define	Ln_ENTRIES	(1 << 9)
 #define	Ln_ADDR_MASK	(Ln_ENTRIES - 1)
 
 /* Bits 9:7 are reserved for software */
-#define	PTE_SW_MANAGED	(1 << 8)
-#define	PTE_SW_WIRED	(1 << 7)
-#define	PTE_DIRTY	(1 << 6) /* Virtual page is written */
-#define	PTE_REF		(1 << 5) /* Virtual page is referenced */
-#define	PTE_VALID	(1 << 0) /* Virtual page is valid */
-#define	PTE_TYPE_S	1
-#define	PTE_TYPE_M	(0xf << PTE_TYPE_S)
-#define	PTE_TYPE_PTR	0
-#define	PTE_TYPE_PTR_G	1
-#define	PTE_TYPE_SROURX	2	/* Supervisor read-only, user read-execute page. */
-#define	PTE_TYPE_SRWURWX 3	/* Supervisor read-write, user read-write-execute page. */
-#define	PTE_TYPE_SURO	4	/* Supervisor and user read-only page. */
-#define	PTE_TYPE_SURW	5	/* Supervisor and user read-write page. */
-#define	PTE_TYPE_SURX	6	/* Supervisor and user read-execute page. */
-#define	PTE_TYPE_SURWX	7	/* Supervisor and User Read Write Execute */
-#define	PTE_TYPE_SRO	8	/* Supervisor read-only page. */
-#define	PTE_TYPE_SRW	9	/* Supervisor read-write page. */
-#define	PTE_TYPE_SRX	10	/* Supervisor read-execute page. */
-#define	PTE_TYPE_SRWX	11	/* Supervisor read-write-execute page. */
-#define	PTE_TYPE_SRO_G	12	/* Supervisor read-only page--global mapping. */
-#define	PTE_TYPE_SRW_G	13	/* Supervisor read-write page--global mapping. */
-#define	PTE_TYPE_SRX_G	14	/* Supervisor read-execute page--global mapping. */
-#define	PTE_TYPE_SRWX_G	15	/* Supervisor Read Write Execute Global */
+#define	PTE_SW_MANAGED	(1 << 9)
+#define	PTE_SW_WIRED	(1 << 8)
+#define	PTE_D		(1 << 7) /* Dirty */
+#define	PTE_A		(1 << 6) /* Accessed */
+#define	PTE_G		(1 << 5) /* Global */
+#define	PTE_U		(1 << 4) /* User */
+#define	PTE_X		(1 << 3) /* Execute */
+#define	PTE_W		(1 << 2) /* Write */
+#define	PTE_R		(1 << 1) /* Read */
+#define	PTE_V		(1 << 0) /* Valid */
+#define	PTE_RWX		(PTE_R | PTE_W | PTE_X)
+#define	PTE_RX		(PTE_R | PTE_X)
 
 #define	PTE_PPN0_S	10
 #define	PTE_PPN1_S	19
 #define	PTE_PPN2_S	28
 #define	PTE_PPN3_S	37
 #define	PTE_SIZE	8
 
 #endif /* !_MACHINE_PTE_H_ */
 
 /* End of pte.h */
Index: user/alc/PQ_LAUNDRY/sys/riscv/include/riscvreg.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/include/riscvreg.h	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/include/riscvreg.h	(revision 303667)
@@ -1,171 +1,210 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_RISCVREG_H_
 #define	_MACHINE_RISCVREG_H_
 
 /* Machine mode requests */
 #define	ECALL_MTIMECMP		0x01
-#define	ECALL_CLEAR_PENDING	0x02
-#define	ECALL_HTIF_CMD		0x03
-#define	ECALL_HTIF_GET_ENTRY	0x04
-#define	ECALL_MCPUID_GET	0x05
-#define	ECALL_MIMPID_GET	0x06
-#define	ECALL_SEND_IPI		0x07
-#define	ECALL_CLEAR_IPI		0x08
-#define	ECALL_HTIF_LOWPUTC	0x09
-#define	ECALL_MIE_SET		0x0a
-#define	ECALL_IO_IRQ_MASK	0x0b
+#define	ECALL_HTIF_GET_ENTRY	0x02
+#define	ECALL_MCPUID_GET	0x03
+#define	ECALL_MIMPID_GET	0x04
+#define	ECALL_SEND_IPI		0x05
+#define	ECALL_CLEAR_IPI		0x06
+#define	ECALL_MIE_SET		0x07
+#define	ECALL_IO_IRQ_MASK	0x08
+#define	ECALL_HTIF_CMD		0x09
+#define	ECALL_HTIF_CMD_REQ	0x0a
+#define	ECALL_HTIF_CMD_RESP	0x0b
 
 #define	EXCP_SHIFT			0
 #define	EXCP_MASK			(0xf << EXCP_SHIFT)
-#define	EXCP_INSTR_ADDR_MISALIGNED	0
-#define	EXCP_INSTR_ACCESS_FAULT		1
-#define	EXCP_INSTR_ILLEGAL		2
-#define	EXCP_INSTR_BREAKPOINT		3
-#define	EXCP_LOAD_ADDR_MISALIGNED	4
-#define	EXCP_LOAD_ACCESS_FAULT		5
-#define	EXCP_STORE_ADDR_MISALIGNED	6
-#define	EXCP_STORE_ACCESS_FAULT		7
-#define	EXCP_UMODE_ENV_CALL		8
-#define	EXCP_SMODE_ENV_CALL		9
-#define	EXCP_HMODE_ENV_CALL		10
-#define	EXCP_MMODE_ENV_CALL		11
-#define	EXCP_INTR			(1 << 31)
+#define	EXCP_MISALIGNED_FETCH		0
+#define	EXCP_FAULT_FETCH		1
+#define	EXCP_ILLEGAL_INSTRUCTION	2
+#define	EXCP_BREAKPOINT			3
+#define	EXCP_MISALIGNED_LOAD		4
+#define	EXCP_FAULT_LOAD			5
+#define	EXCP_MISALIGNED_STORE		6
+#define	EXCP_FAULT_STORE		7
+#define	EXCP_USER_ECALL			8
+#define	EXCP_SUPERVISOR_ECALL		9
+#define	EXCP_HYPERVISOR_ECALL		10
+#define	EXCP_MACHINE_ECALL		11
+#define	EXCP_INTR			(1ul << 63)
 #define	EXCP_INTR_SOFTWARE		0
 #define	EXCP_INTR_TIMER			1
 #define	EXCP_INTR_HTIF			2
 
-#define	SSTATUS_IE			(1 << 0)
-#define	SSTATUS_PIE			(1 << 3)
-#define	SSTATUS_PS			(1 << 4)
+#define	SSTATUS_UIE			(1 << 0)
+#define	SSTATUS_SIE			(1 << 1)
+#define	SSTATUS_UPIE			(1 << 4)
+#define	SSTATUS_SPIE			(1 << 5)
+#define	SSTATUS_SPIE_SHIFT		5
+#define	SSTATUS_SPP			(1 << 8)
+#define	SSTATUS_SPP_SHIFT		8
+#define	SSTATUS_FS_MASK			0x3
+#define	SSTATUS_FS_SHIFT		13
+#define	SSTATUS_XS_MASK			0x3
+#define	SSTATUS_XS_SHIFT		15
+#define	SSTATUS_PUM			(1 << 18)
+#define	SSTATUS32_SD			(1 << 63)
+#define	SSTATUS64_SD			(1 << 31)
 
-#define	MSTATUS_MPRV		(1 << 16)
-#define	MSTATUS_PRV_SHIFT	1
-#define	MSTATUS_PRV1_SHIFT	4
-#define	MSTATUS_PRV2_SHIFT	7
-#define	MSTATUS_PRV_MASK	(0x3 << MSTATUS_PRV_SHIFT)
-#define	MSTATUS_PRV_U		0	/* user */
-#define	MSTATUS_PRV_S		1	/* supervisor */
-#define	MSTATUS_PRV_H		2	/* hypervisor */
-#define	MSTATUS_PRV_M		3	/* machine */
+#define	MSTATUS_UIE			(1 << 0)
+#define	MSTATUS_SIE			(1 << 1)
+#define	MSTATUS_HIE			(1 << 2)
+#define	MSTATUS_MIE			(1 << 3)
+#define	MSTATUS_UPIE			(1 << 4)
+#define	MSTATUS_SPIE			(1 << 5)
+#define	MSTATUS_SPIE_SHIFT		5
+#define	MSTATUS_HPIE			(1 << 6)
+#define	MSTATUS_MPIE			(1 << 7)
+#define	MSTATUS_MPIE_SHIFT		7
+#define	MSTATUS_SPP			(1 << 8)
+#define	MSTATUS_SPP_SHIFT		8
+#define	MSTATUS_HPP_MASK		0x3
+#define	MSTATUS_HPP_SHIFT		9
+#define	MSTATUS_MPP_MASK		0x3
+#define	MSTATUS_MPP_SHIFT		11
+#define	MSTATUS_FS_MASK			0x3
+#define	MSTATUS_FS_SHIFT		13
+#define	MSTATUS_XS_MASK			0x3
+#define	MSTATUS_XS_SHIFT		15
+#define	MSTATUS_MPRV			(1 << 17)
+#define	MSTATUS_PUM			(1 << 18)
+#define	MSTATUS_VM_MASK			0x1f
+#define	MSTATUS_VM_SHIFT		24
+#define	 MSTATUS_VM_MBARE		0
+#define	 MSTATUS_VM_MBB			1
+#define	 MSTATUS_VM_MBBID		2
+#define	 MSTATUS_VM_SV32		8
+#define	 MSTATUS_VM_SV39		9
+#define	 MSTATUS_VM_SV48		10
+#define	 MSTATUS_VM_SV57		11
+#define	 MSTATUS_VM_SV64		12
+#define	MSTATUS32_SD			(1 << 63)
+#define	MSTATUS64_SD			(1 << 31)
 
-#define	MSTATUS_VM_SHIFT	17
-#define	MSTATUS_VM_MASK		0x1f
-#define	MSTATUS_VM_MBARE	0
-#define	MSTATUS_VM_MBB		1
-#define	MSTATUS_VM_MBBID	2
-#define	MSTATUS_VM_SV32		8
-#define	MSTATUS_VM_SV39		9
-#define	MSTATUS_VM_SV48		10
+#define	MSTATUS_PRV_U			0	/* user */
+#define	MSTATUS_PRV_S			1	/* supervisor */
+#define	MSTATUS_PRV_H			2	/* hypervisor */
+#define	MSTATUS_PRV_M			3	/* machine */
 
+#define	MIE_USIE	(1 << 0)
 #define	MIE_SSIE	(1 << 1)
 #define	MIE_HSIE	(1 << 2)
 #define	MIE_MSIE	(1 << 3)
+#define	MIE_UTIE	(1 << 4)
 #define	MIE_STIE	(1 << 5)
 #define	MIE_HTIE	(1 << 6)
 #define	MIE_MTIE	(1 << 7)
 
+#define	MIP_USIP	(1 << 0)
 #define	MIP_SSIP	(1 << 1)
 #define	MIP_HSIP	(1 << 2)
 #define	MIP_MSIP	(1 << 3)
+#define	MIP_UTIP	(1 << 4)
 #define	MIP_STIP	(1 << 5)
 #define	MIP_HTIP	(1 << 6)
 #define	MIP_MTIP	(1 << 7)
 
-#define	SR_IE		(1 << 0)
-#define	SR_IE1		(1 << 3)
-#define	SR_IE2		(1 << 6)
-#define	SR_IE3		(1 << 9)
-
+#define	SIE_USIE	(1 << 0)
 #define	SIE_SSIE	(1 << 1)
+#define	SIE_UTIE	(1 << 4)
 #define	SIE_STIE	(1 << 5)
 
+#define	MIP_SEIP	(1 << 9)
+
 /* Note: sip register has no SIP_STIP bit in Spike simulator */
 #define	SIP_SSIP	(1 << 1)
 #define	SIP_STIP	(1 << 5)
 
+#if 0
+/* lowRISC TODO */
 #define	NCSRS		4096
 #define	CSR_IPI		0x783
 #define	CSR_IO_IRQ	0x7c0	/* lowRISC only? */
+#endif
+
 #define	XLEN		8
 #define	INSN_SIZE	4
 
 #define	RISCV_INSN_NOP		0x00000013
 #define	RISCV_INSN_BREAK	0x00100073
 #define	RISCV_INSN_RET		0x00008067
 
 #define	CSR_ZIMM(val)							\
 	(__builtin_constant_p(val) && ((u_long)(val) < 32))
 
 #define	csr_swap(csr, val)						\
 ({	if (CSR_ZIMM(val))  						\
 		__asm __volatile("csrrwi %0, " #csr ", %1"		\
 				: "=r" (val) : "i" (val));		\
 	else 								\
 		__asm __volatile("csrrw %0, " #csr ", %1"		\
 				: "=r" (val) : "r" (val));		\
 	val;								\
 })
 
 #define	csr_write(csr, val)						\
 ({	if (CSR_ZIMM(val)) 						\
 		__asm __volatile("csrwi " #csr ", %0" :: "i" (val));	\
 	else 								\
 		__asm __volatile("csrw " #csr ", %0" ::  "r" (val));	\
 })
 
 #define	csr_set(csr, val)						\
 ({	if (CSR_ZIMM(val)) 						\
 		__asm __volatile("csrsi " #csr ", %0" :: "i" (val));	\
 	else								\
 		__asm __volatile("csrs " #csr ", %0" :: "r" (val));	\
 })
 
 #define	csr_clear(csr, val)						\
 ({	if (CSR_ZIMM(val))						\
 		__asm __volatile("csrci " #csr ", %0" :: "i" (val));	\
 	else								\
 		__asm __volatile("csrc " #csr ", %0" :: "r" (val));	\
 })
 
 #define	csr_read(csr)							\
 ({	u_long val;							\
 	__asm __volatile("csrr %0, " #csr : "=r" (val));		\
 	val;								\
 })
 
 #endif /* !_MACHINE_RISCVREG_H_ */
Index: user/alc/PQ_LAUNDRY/sys/riscv/include/vmparam.h
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/include/vmparam.h	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/include/vmparam.h	(revision 303667)
@@ -1,244 +1,244 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vmparam.h     5.9 (Berkeley) 5/12/91
  *	from: FreeBSD: src/sys/i386/include/vmparam.h,v 1.33 2000/03/30
  * $FreeBSD$
  */
 
 #ifndef	_MACHINE_VMPARAM_H_
 #define	_MACHINE_VMPARAM_H_
 
 /*
  * Virtual memory related constants, all in bytes
  */
 #ifndef MAXTSIZ
 #define	MAXTSIZ		(1*1024*1024*1024)	/* max text size */
 #endif
 #ifndef DFLDSIZ
 #define	DFLDSIZ		(128*1024*1024)		/* initial data size limit */
 #endif
 #ifndef MAXDSIZ
 #define	MAXDSIZ		(1*1024*1024*1024)	/* max data size */
 #endif
 #ifndef DFLSSIZ
 #define	DFLSSIZ		(128*1024*1024)		/* initial stack size limit */
 #endif
 #ifndef MAXSSIZ
 #define	MAXSSIZ		(1*1024*1024*1024)	/* max stack size */
 #endif
 #ifndef SGROWSIZ
 #define	SGROWSIZ	(128*1024)		/* amount to grow stack */
 #endif
 
 /*
  * The physical address space is sparsely populated.
  */
 #define	VM_PHYSSEG_SPARSE
 
 /*
  * The number of PHYSSEG entries must be one greater than the number
  * of phys_avail entries because the phys_avail entry that spans the
  * largest physical address that is accessible by ISA DMA is split
  * into two PHYSSEG entries.
  */
 #define	VM_PHYSSEG_MAX		64
 
 /*
  * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
  * from which physical pages are allocated and VM_FREEPOOL_DIRECT is
  * the pool from which physical pages for small UMA objects are
  * allocated.
  */
 #define	VM_NFREEPOOL		2
 #define	VM_FREEPOOL_DEFAULT	0
 #define	VM_FREEPOOL_DIRECT	1
 
 /*
  * Create two free page lists: VM_FREELIST_DEFAULT is for physical
  * pages that are above the largest physical address that is
  * accessible by ISA DMA and VM_FREELIST_ISADMA is for physical pages
  * that are below that address.
  */
 #define	VM_NFREELIST		2
 #define	VM_FREELIST_DEFAULT	0
 #define	VM_FREELIST_ISADMA	1
 
 /*
  * An allocation size of 16MB is supported in order to optimize the
  * use of the direct map by UMA.  Specifically, a cache line contains
  * at most four TTEs, collectively mapping 16MB of physical memory.
  * By reducing the number of distinct 16MB "pages" that are used by UMA,
  * the physical memory allocator reduces the likelihood of both 4MB
  * page TLB misses and cache misses caused by 4MB page TLB misses.
  */
 #define	VM_NFREEORDER		12
 
 /*
  * Enable superpage reservations: 1 level.
  */
 #ifndef	VM_NRESERVLEVEL
 #define	VM_NRESERVLEVEL		1
 #endif
 
 /*
  * Level 0 reservations consist of 512 pages.
  */
 #ifndef	VM_LEVEL_0_ORDER
 #define	VM_LEVEL_0_ORDER	9
 #endif
 
 /**
  * Address space layout.
  *
  * RISC-V implements up to a 48 bit virtual address space. The address space is
  * split into 2 regions at each end of the 64 bit address space, with an
  * out of range "hole" in the middle.
  *
  * We limit the size of the two spaces to 39 bits each.
  *
  * Upper region:	0xffffffffffffffff
  *			0xffffff8000000000
  *
  * Hole:		0xffffff7fffffffff
  *			0x0000008000000000
  *
  * Lower region:	0x0000007fffffffff
  *			0x0000000000000000
  *
  * We use the upper region for the kernel, and the lower region for userland.
  *
  * We define some interesting address constants:
  *
  * VM_MIN_ADDRESS and VM_MAX_ADDRESS define the start and end of the entire
  * 64 bit address space, mostly just for convenience.
  *
  * VM_MIN_KERNEL_ADDRESS and VM_MAX_KERNEL_ADDRESS define the start and end of
  * mappable kernel virtual address space.
  *
  * VM_MIN_USER_ADDRESS and VM_MAX_USER_ADDRESS define the start and end of the
  * user address space.
  */
 #define	VM_MIN_ADDRESS		(0x0000000000000000UL)
 #define	VM_MAX_ADDRESS		(0xffffffffffffffffUL)
 
 /* 32 GiB of kernel addresses */
 #define	VM_MIN_KERNEL_ADDRESS	(0xffffffc000000000UL)
 #define	VM_MAX_KERNEL_ADDRESS	(0xffffffc800000000UL)
 
 /* Direct Map for 128 GiB of PA: 0x0 - 0x1fffffffff */
 #define	DMAP_MIN_ADDRESS	(0xffffffd000000000UL)
 #define	DMAP_MAX_ADDRESS	(0xffffffefffffffffUL)
 
 #define	DMAP_MIN_PHYSADDR	(0x0000000000000000UL)
 #define	DMAP_MAX_PHYSADDR	(DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS)
 
 /* True if pa is in the dmap range */
 #define	PHYS_IN_DMAP(pa)	((pa) >= DMAP_MIN_PHYSADDR && \
     (pa) <= DMAP_MAX_PHYSADDR)
 /* True if va is in the dmap range */
 #define	VIRT_IN_DMAP(va)	((va) >= DMAP_MIN_ADDRESS && \
     (va) <= DMAP_MAX_ADDRESS)
 
 #define	PHYS_TO_DMAP(pa)						\
 ({									\
 	KASSERT(PHYS_IN_DMAP(pa),					\
 	    ("%s: PA out of range, PA: 0x%lx", __func__,		\
 	    (vm_paddr_t)(pa)));						\
 	(pa) | DMAP_MIN_ADDRESS;					\
 })
 
 #define	DMAP_TO_PHYS(va)						\
 ({									\
 	KASSERT(VIRT_IN_DMAP(va),					\
 	    ("%s: VA out of range, VA: 0x%lx", __func__,		\
 	    (vm_offset_t)(va)));					\
 	(va) & ~DMAP_MIN_ADDRESS;					\
 })
 
 #define	VM_MIN_USER_ADDRESS	(0x0000000000000000UL)
 #define	VM_MAX_USER_ADDRESS	(0x0000004000000000UL)
 
 #define	VM_MINUSER_ADDRESS	(VM_MIN_USER_ADDRESS)
 #define	VM_MAXUSER_ADDRESS	(VM_MAX_USER_ADDRESS)
 
 #define	KERNBASE		(VM_MIN_KERNEL_ADDRESS)
 #define	SHAREDPAGE		(VM_MAXUSER_ADDRESS - PAGE_SIZE)
 #define	USRSTACK		SHAREDPAGE
 
-#define	KERNENTRY		(0x200)
+#define	KERNENTRY		(0x80000000)
 
 /*
  * How many physical pages per kmem arena virtual page.
  */
 #ifndef VM_KMEM_SIZE_SCALE
 #define	VM_KMEM_SIZE_SCALE	(3)
 #endif
 
 /*
  * Optional floor (in bytes) on the size of the kmem arena.
  */
 #ifndef VM_KMEM_SIZE_MIN
 #define	VM_KMEM_SIZE_MIN	(16 * 1024 * 1024)
 #endif
 
 /*
  * Optional ceiling (in bytes) on the size of the kmem arena: 60% of the
  * kernel map.
  */
 #ifndef VM_KMEM_SIZE_MAX
 #define	VM_KMEM_SIZE_MAX	((VM_MAX_KERNEL_ADDRESS - \
     VM_MIN_KERNEL_ADDRESS + 1) * 3 / 5)
 #endif
 
 /*
  * Initial pagein size of beginning of executable file.
  */
 #ifndef	VM_INITIAL_PAGEIN
 #define	VM_INITIAL_PAGEIN	16
 #endif
 
 /*
  * RISCVTODO
  * #define	UMA_MD_SMALL_ALLOC
  */
 
 extern u_int tsb_kernel_ldd_phys;
 extern vm_offset_t vm_max_kernel_address;
 extern vm_offset_t init_pt_va;
 
 #define	ZERO_REGION_SIZE	(64 * 1024)	/* 64KB */
 
 #define	DEVMAP_MAX_VADDR	VM_MAX_KERNEL_ADDRESS
 
 #endif /* !_MACHINE_VMPARAM_H_ */
Index: user/alc/PQ_LAUNDRY/sys/riscv/riscv/exception.S
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/riscv/exception.S	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/riscv/exception.S	(revision 303667)
@@ -1,611 +1,622 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <machine/asm.h>
 __FBSDID("$FreeBSD$");
 
 #include "assym.s"
 
 #include <machine/trap.h>
 #include <machine/riscvreg.h>
 
 .macro save_registers el
 	addi	sp, sp, -(TF_SIZE)
 
 	sd	ra, (TF_RA)(sp)
 	sd	tp, (TF_TP)(sp)
 
 .if \el == 0	/* We came from userspace. Load our pcpu */
 	sd	gp, (TF_GP)(sp)
 	ld	gp, (TF_SIZE)(sp)
 .endif
 
 	sd	t0, (TF_T + 0 * 8)(sp)
 	sd	t1, (TF_T + 1 * 8)(sp)
 	sd	t2, (TF_T + 2 * 8)(sp)
 	sd	t3, (TF_T + 3 * 8)(sp)
 	sd	t4, (TF_T + 4 * 8)(sp)
 	sd	t5, (TF_T + 5 * 8)(sp)
 	sd	t6, (TF_T + 6 * 8)(sp)
 
 	sd	s0, (TF_S + 0 * 8)(sp)
 	sd	s1, (TF_S + 1 * 8)(sp)
 	sd	s2, (TF_S + 2 * 8)(sp)
 	sd	s3, (TF_S + 3 * 8)(sp)
 	sd	s4, (TF_S + 4 * 8)(sp)
 	sd	s5, (TF_S + 5 * 8)(sp)
 	sd	s6, (TF_S + 6 * 8)(sp)
 	sd	s7, (TF_S + 7 * 8)(sp)
 	sd	s8, (TF_S + 8 * 8)(sp)
 	sd	s9, (TF_S + 9 * 8)(sp)
 	sd	s10, (TF_S + 10 * 8)(sp)
 	sd	s11, (TF_S + 11 * 8)(sp)
 
 	sd	a0, (TF_A + 0 * 8)(sp)
 	sd	a1, (TF_A + 1 * 8)(sp)
 	sd	a2, (TF_A + 2 * 8)(sp)
 	sd	a3, (TF_A + 3 * 8)(sp)
 	sd	a4, (TF_A + 4 * 8)(sp)
 	sd	a5, (TF_A + 5 * 8)(sp)
 	sd	a6, (TF_A + 6 * 8)(sp)
 	sd	a7, (TF_A + 7 * 8)(sp)
 
 #if 0
 	/* XXX: temporary test: spin if stack is not kernel one */
 .if \el == 1	/* kernel */
 	mv	t0, sp
 	srli	t0, t0, 63
 1:
 	beqz	t0, 1b
 .endif
 #endif
 
 .if \el == 1
 	/* Store kernel sp */
 	li	t1, TF_SIZE
 	add	t0, sp, t1
 	sd	t0, (TF_SP)(sp)
 .else
 	/* Store user sp */
 	csrr	t0, sscratch
 	sd	t0, (TF_SP)(sp)
 .endif
 	li	t0, 0
 	csrw	sscratch, t0
 	csrr	t0, sepc
 	sd	t0, (TF_SEPC)(sp)
 	csrr	t0, sstatus
 	sd	t0, (TF_SSTATUS)(sp)
 	csrr	t0, sbadaddr
 	sd	t0, (TF_SBADADDR)(sp)
 	csrr	t0, scause
 	sd	t0, (TF_SCAUSE)(sp)
 .endm
 
 .macro load_registers el
 	ld	t0, (TF_SSTATUS)(sp)
 .if \el == 0
 	/* Ensure user interrupts will be enabled on eret. */
-	ori	t0, t0, SSTATUS_PIE
+	li	t1, SSTATUS_SPIE
+	or	t0, t0, t1
 .else
 	/*
 	 * Disable interrupts for supervisor mode exceptions.
 	 * For user mode exceptions we have already done this
 	 * in do_ast.
 	 */
-	li	t1, ~SSTATUS_IE
+	li	t1, ~SSTATUS_SIE
 	and	t0, t0, t1
 .endif
 	csrw	sstatus, t0
 
 	ld	t0, (TF_SEPC)(sp)
 	csrw	sepc, t0
 
 .if \el == 0
 	/* We go to userspace. Load user sp */
 	ld	t0, (TF_SP)(sp)
 	csrw	sscratch, t0
 
 	/* And store our pcpu */
 	sd	gp, (TF_SIZE)(sp)
 	ld	gp, (TF_GP)(sp)
 .endif
 
 	ld	ra, (TF_RA)(sp)
 	ld	tp, (TF_TP)(sp)
 
 	ld	t0, (TF_T + 0 * 8)(sp)
 	ld	t1, (TF_T + 1 * 8)(sp)
 	ld	t2, (TF_T + 2 * 8)(sp)
 	ld	t3, (TF_T + 3 * 8)(sp)
 	ld	t4, (TF_T + 4 * 8)(sp)
 	ld	t5, (TF_T + 5 * 8)(sp)
 	ld	t6, (TF_T + 6 * 8)(sp)
 
 	ld	s0, (TF_S + 0 * 8)(sp)
 	ld	s1, (TF_S + 1 * 8)(sp)
 	ld	s2, (TF_S + 2 * 8)(sp)
 	ld	s3, (TF_S + 3 * 8)(sp)
 	ld	s4, (TF_S + 4 * 8)(sp)
 	ld	s5, (TF_S + 5 * 8)(sp)
 	ld	s6, (TF_S + 6 * 8)(sp)
 	ld	s7, (TF_S + 7 * 8)(sp)
 	ld	s8, (TF_S + 8 * 8)(sp)
 	ld	s9, (TF_S + 9 * 8)(sp)
 	ld	s10, (TF_S + 10 * 8)(sp)
 	ld	s11, (TF_S + 11 * 8)(sp)
 
 	ld	a0, (TF_A + 0 * 8)(sp)
 	ld	a1, (TF_A + 1 * 8)(sp)
 	ld	a2, (TF_A + 2 * 8)(sp)
 	ld	a3, (TF_A + 3 * 8)(sp)
 	ld	a4, (TF_A + 4 * 8)(sp)
 	ld	a5, (TF_A + 5 * 8)(sp)
 	ld	a6, (TF_A + 6 * 8)(sp)
 	ld	a7, (TF_A + 7 * 8)(sp)
 
 	addi	sp, sp, (TF_SIZE)
 .endm
 
 .macro	do_ast
 	/* Disable interrupts */
 	csrr	a4, sstatus
 1:
-	csrci	sstatus, SSTATUS_IE
+	csrci	sstatus, (SSTATUS_SIE)
 
 	ld	a1, PC_CURTHREAD(gp)
 	lw	a2, TD_FLAGS(a1)
 
 	li	a3, (TDF_ASTPENDING|TDF_NEEDRESCHED)
 	and	a2, a2, a3
 	beqz	a2, 2f
 
 	/* Restore interrupts */
-	andi	a4, a4, SSTATUS_IE
+	andi	a4, a4, (SSTATUS_SIE)
 	csrs	sstatus, a4
 
 	/* Handle the ast */
 	mv	a0, sp
 	call	_C_LABEL(ast)
 
 	/* Re-check for new ast scheduled */
 	j	1b
 2:
 .endm
 
+ENTRY(cpu_exception_handler)
+	csrrw	sp, sscratch, sp
+	beqz	sp, 1f
+	/* User mode detected */
+	csrrw	sp, sscratch, sp
+	j	cpu_exception_handler_user
+1:
+	/* Supervisor mode detected */
+	csrrw	sp, sscratch, sp
+	j	cpu_exception_handler_supervisor
+END(cpu_exception_handler)
+
 ENTRY(cpu_exception_handler_supervisor)
 	save_registers 1
 	mv	a0, sp
 	call	_C_LABEL(do_trap_supervisor)
 	load_registers 1
-	eret
+	sret
 END(cpu_exception_handler_supervisor)
 
 ENTRY(cpu_exception_handler_user)
 	csrrw	sp, sscratch, sp
 	save_registers 0
 	mv	a0, sp
 	call	_C_LABEL(do_trap_user)
 	do_ast
 	load_registers 0
 	csrrw	sp, sscratch, sp
-	eret
+	sret
 END(cpu_exception_handler_user)
 
 /*
  * Trap handlers
  */
 	.text
 bad_trap:
 	j bad_trap
 
-user_trap:
+machine_trap:
 	/* Save state */
 	csrrw	sp, mscratch, sp
 	addi	sp, sp, -64
 	sd	t0, (8 * 0)(sp)
 	sd	t1, (8 * 1)(sp)
 	sd	t2, (8 * 2)(sp)
 	sd	t3, (8 * 3)(sp)
 	sd	t4, (8 * 4)(sp)
 	sd	t5, (8 * 5)(sp)
 	sd	a0, (8 * 7)(sp)
 
-	la	t2, _C_LABEL(cpu_exception_handler_user)
-
-	csrr    t0, mcause
-	bltz    t0, machine_interrupt
-	j	exit_mrts
-
-supervisor_trap:
-	/* Save state */
-	csrrw	sp, mscratch, sp
-	addi	sp, sp, -64
-	sd	t0, (8 * 0)(sp)
-	sd	t1, (8 * 1)(sp)
-	sd	t2, (8 * 2)(sp)
-	sd	t3, (8 * 3)(sp)
-	sd	t4, (8 * 4)(sp)
-	sd	t5, (8 * 5)(sp)
-	sd	a0, (8 * 7)(sp)
-
-	la	t2, _C_LABEL(cpu_exception_handler_supervisor)
-
+	csrr	t3, mstatus	/* Required for debug */
 	csrr	t0, mcause
 	bltz	t0, machine_interrupt
 
-	li	t1, EXCP_SMODE_ENV_CALL
+	li	t1, EXCP_SUPERVISOR_ECALL
 	beq	t0, t1, supervisor_call
-	j	exit_mrts
+4:
+	/* NOT REACHED */
+	j	4b
 
 machine_interrupt:
 	/* Type of interrupt ? */
 	csrr	t0, mcause
 	andi	t0, t0, EXCP_MASK
-	li	t1, 0
-	beq	t1, t0, software_interrupt
-	li	t1, 1
-	beq	t1, t0, timer_interrupt
-	li	t1, 2
-	beq	t1, t0, htif_interrupt
+#if 0
+	/* lowRISC TODO */
 	li	t1, 4
 	beq	t1, t0, io_interrupt	/* lowRISC only */
+#endif
+	li	t1, 1
+	beq	t1, t0, supervisor_software_interrupt
+	li	t1, 3
+	beq	t1, t0, machine_software_interrupt
+	li	t1, 5
+	beq	t1, t0, supervisor_timer_interrupt
+	li	t1, 7
+	beq	t1, t0, machine_timer_interrupt
 
-	/* not reached */
+	/* NOT REACHED */
 1:
 	j	1b
 
+#if 0
+	/* lowRISC TODO */
 io_interrupt:
 	/* Disable IO interrupts so we can go to supervisor mode */
 	csrwi	CSR_IO_IRQ, 0
 
 	/* Handle the trap in supervisor mode */
 	j	exit_mrts
+#endif
 
-software_interrupt:
+supervisor_software_interrupt:
+1:
+	/* Nothing here as we are using mideleg feature */
+	j	1b
+
+machine_software_interrupt:
+	/* Clear IPI */
+	li	t0, 0x40001000
+	csrr	t2, mhartid
+	li	t3, 0x1000
+	mul	t2, t2, t3
+	add	t0, t0, t2
+	li	t2, 0
+	sd	t2, 0(t0)
+
+	/* Clear machine software pending bit */
 	li	t0, MIP_MSIP
 	csrc	mip, t0
+
+	/* Post supervisor software interrupt */
 	li	t0, MIP_SSIP
 	csrs	mip, t0
 
-	/* If PRV1 is PRV_U (user) then serve the trap */
-	csrr	t0, mstatus
-	li	t1, (MSTATUS_PRV_M << MSTATUS_PRV1_SHIFT)
-	and	t0, t0, t1
-	beqz	t0, 1f
-
-	/*
-	 * If PRV1 is supervisor and interrupts were enabled,
-	 * then serve the trap.
-	 */
-	csrr	t0, mstatus
-	li	t1, (SR_IE1 | (MSTATUS_PRV_M << MSTATUS_PRV1_SHIFT))
-	and	t0, t0, t1
-	li	t1, (SR_IE1 | (MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT))
-	beq	t0, t1, 1f
-
 	j	exit
 
+supervisor_timer_interrupt:
 1:
-	/* Handle the trap in supervisor mode */
-	j	exit_mrts
+	/* Nothing here as we are using mideleg feature */
+	j	1b
 
-timer_interrupt:
+machine_timer_interrupt:
 	/* Disable machine timer interrupts */
 	li	t0, MIE_MTIE
 	csrc	mie, t0
 
-	/* Clear machine pending */
+	/* Clear machine timer interrupt pending */
 	li	t0, MIP_MTIP
 	csrc	mip, t0
 
 	/* Post supervisor timer interrupt */
 	li	t0, MIP_STIP
 	csrs	mip, t0
 
-	/* If PRV1 is PRV_U (user) then serve the trap */
-	csrr	t0, mstatus
-	li	t1, (MSTATUS_PRV_M << MSTATUS_PRV1_SHIFT)
-	and	t0, t0, t1
-	beqz	t0, 1f
-
 	/*
-	 * If PRV1 is supervisor and interrupts were enabled,
-	 * then serve the trap.
+	 * Check for HTIF interrupts.
+	 * The only interrupt expected here is key press.
 	 */
-	csrr	t0, mstatus
-	li	t1, (SR_IE1 | (MSTATUS_PRV_M << MSTATUS_PRV1_SHIFT))
-	and	t0, t0, t1
-	li	t1, (SR_IE1 | (MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT))
-	beq	t0, t1, 1f
+	la	t0, htif_lock
+	li	t2, 1
+	amoswap.d t3, t2, 0(t0)
+	bnez	t3, 5f		/* Another operation in progress, give up */
 
-	j	exit
+	/* We have lock */
+	la	t1, fromhost
+	ld	t5, 0(t1)
+	beqz	t5, 4f
 
-1:
-	/* Serve a trap in supervisor mode */
-	j	exit_mrts
-
-htif_interrupt:
-1:
-	li	t5, 0
-	csrrw	t5, mfromhost, t5
-	beqz	t5, 3f
-
-	/* Console PUT intr ? */
+	/* Console GET intr ? */
 	mv	t1, t5
-	li	t0, 0x101
+	li	t0, 0x100
 	srli	t1, t1, 48
-	bne	t1, t0, 2f
-	/* Yes */
-	la	t0, console_intr
-	li	t1, 1
-	sd	t1, 0(t0)
-
-	/* Check if there is any other pending event */
+	beq	t1, t0, 2f
+1:
+	/* There is no interrupts except keypress */
 	j	1b
 
 2:
 	/* Save entry */
 	la	t0, htif_ring
-	csrr	t1, mhartid
-	li	t4, (HTIF_RING_SIZE + 16)
-	mulw	t4, t4, t1
-	add	t0, t0, t4
 	li	t4, (HTIF_RING_SIZE)
 	add	t0, t0, t4	/* t0 == htif_ring_cursor */
 
 	ld	t1, 0(t0)	/* load ptr to cursor */
 	sd	t5, 0(t1)	/* put entry */
 	li	t4, 1
 	sd	t4, 8(t1)	/* mark used */
 	ld	t4, 16(t1)	/* take next */
 	/* Update cursor */
 	sd	t4, 0(t0)
 
 	/* Post supervisor software interrupt */
 	li	t0, MIP_SSIP
 	csrs	mip, t0
 
-	/* Check if there is any other pending event */
-	j	1b
-
 3:
+	la	t1, fromhost
+	li	t5, 0
+	sd	t5, 0(t1)
+
+4:
+	/* Release lock */
+	la	t0, htif_lock
+	li	t2, 0
+	amoswap.d t3, t2, 0(t0)
+
+5:
 	j	exit
 
 supervisor_call:
 	csrr	t1, mepc
 	addi	t1, t1, 4	/* Next instruction in t1 */
 	li	t4, ECALL_HTIF_CMD
 	beq	t5, t4, htif_cmd
+	li	t4, ECALL_HTIF_CMD_REQ
+	beq	t5, t4, htif_cmd_req
+	li	t4, ECALL_HTIF_CMD_RESP
+	beq	t5, t4, htif_cmd_resp
 	li	t4, ECALL_HTIF_GET_ENTRY
 	beq	t5, t4, htif_get_entry
 	li	t4, ECALL_MTIMECMP
 	beq	t5, t4, set_mtimecmp
-	li	t4, ECALL_CLEAR_PENDING
-	beq	t5, t4, clear_pending
 	li	t4, ECALL_MCPUID_GET
 	beq	t5, t4, mcpuid_get
 	li	t4, ECALL_MIMPID_GET
 	beq	t5, t4, mimpid_get
 	li	t4, ECALL_SEND_IPI
 	beq	t5, t4, send_ipi
 	li	t4, ECALL_CLEAR_IPI
 	beq	t5, t4, clear_ipi
-	li	t4, ECALL_HTIF_LOWPUTC
-	beq	t5, t4, htif_lowputc
 	li	t4, ECALL_MIE_SET
 	beq	t5, t4, mie_set
+#if 0
+	/* lowRISC TODO */
 	li	t4, ECALL_IO_IRQ_MASK
 	beq	t5, t4, io_irq_mask
+#endif
 	j	exit_next_instr
 
+#if 0
+	/* lowRISC TODO */
 io_irq_mask:
 	csrw	CSR_IO_IRQ, t6
 	j	exit_next_instr
+#endif
 
 mie_set:
 	csrs	mie, t6
 	j	exit_next_instr
 
 mcpuid_get:
-	csrr	t6, mcpuid
+	csrr	t6, misa
 	j	exit_next_instr
 
 mimpid_get:
 	csrr	t6, mimpid
 	j	exit_next_instr
 
 send_ipi:
-	/* CPU mmio base in t6 */
+	/* CPU ipi MMIO register in t6 */
 	mv	t0, t6
-	li	t2, (CSR_IPI * XLEN)
-	add	t0, t0, t2	/* t0 = CSR_IPI */
 	li	t2, 1
 	sd	t2, 0(t0)
 	j	exit_next_instr
 
 clear_ipi:
 	/* Do only clear if there are no new entries in HTIF ring */
 	la	t0, htif_ring
-	csrr	t2, mhartid
-	li	t4, (HTIF_RING_SIZE + 16)
-	mulw	t4, t4, t2
-	add	t0, t0, t4
 	li	t4, (HTIF_RING_SIZE)
 	add	t0, t0, t4	/* t0  == ptr to htif_ring_cursor */
 	ld	t2, 8(t0)	/* load htif_ring_last */
 	ld	t2, 8(t2)	/* load used */
 	bnez	t2, 1f
 
 	/* Clear supervisor software interrupt pending bit */
 	li	t0, MIP_SSIP
 	csrc	mip, t0
 
 1:
 	j	exit_next_instr
 
 htif_get_entry:
 	/* Get a htif_ring for current core */
 	la	t0, htif_ring
-	csrr	t2, mhartid
-	li	t4, (HTIF_RING_SIZE + 16)
-	mulw	t4, t4, t2
-	add	t0, t0, t4
 	li	t4, (HTIF_RING_SIZE + 8)
 	add	t0, t0, t4	/* t0 == htif_ring_last */
 
 	/* Check for new entries */
 	li	t6, 0		/* preset return value */
 	ld	t2, 0(t0)	/* load ptr to last */
 	ld	t4, 8(t2)	/* get used */
 	beqz	t4, 1f		/* No new entries. Exit */
 
 	/* Get one */
 	ld	t6, 0(t2)	/* get entry */
 	li	t4, 0
 	sd	t4, 8(t2)	/* mark free */
 	sd	t4, 0(t2)	/* free entry, just in case */
 	ld	t4, 16(t2)	/* take next */
 	sd	t4, 0(t0)	/* update ptr to last */
 1:
 	/* Exit. Result is stored in t6 */
 	j	exit_next_instr
 
-htif_cmd:
+htif_cmd_resp:
+	la	t0, htif_lock
+	li	t2, 1
 1:
-	mv	t0, t6
-	csrrw	t0, mtohost, t0
-	bnez	t0, 1b
+	amoswap.d t3, t2, 0(t0)
+	bnez	t3, 1b
+
+	/* We have lock. Read for data */
+	la	t4, fromhost
+	ld	t6, 0(t4)
+	beqz	t6, 2f
+
+	/* Clear event */
+	li	t5, 0
+	sd	t5, 0(t4)
+
+2:
+	/* Release lock */
+	la	t0, htif_lock
+	li	t2, 0
+	amoswap.d t3, t2, 0(t0)
+
 	j	exit_next_instr
 
-htif_lowputc:
+htif_cmd_req:
+	la	t0, htif_lock
+	li	t2, 1
 1:
-	mv	t0, t6
-	csrrw	t0, mtohost, t0
-	bnez	t0, 1b
+	amoswap.d t3, t2, 0(t0)
+	bnez	t3, 1b
 
+	/* We have lock. Store new request */
+	la	t4, tohost
+	sd	t6, 0(t4)
+
+	/* Release lock */
+	la	t0, htif_lock
+	li	t2, 0
+	amoswap.d t3, t2, 0(t0)
+
+	j	exit_next_instr
+
+htif_cmd:
+	la	t0, htif_lock
+	li	t2, 1
+1:
+	amoswap.d t3, t2, 0(t0)
+	bnez	t3, 1b
+
+	mv	t3, t6
+
+	/* We have lock. Store new request */
+	la	t4, tohost
+	sd	t6, 0(t4)
 2:
-	li	t4, 0
-	csrrw	t5, mfromhost, t4
-	beqz	t5, 2b
+	/* Poll for result */
+	la	t4, fromhost
+	ld	t6, 0(t4)
+	beqz	t6, 2b
 
-	/* Console PUT intr ? */
-	mv	t2, t5
-	srli	t2, t2, 48
-	li	t3, 0x0101
-	beq	t2, t3, 3f
+	/* Check for unexpected event */
+	srli	t0, t6, 48
+	srli	t2, t3, 48
+	beq	t2, t0, 3f
 
-	/* Not a console PUT, so save entry */
+	/*
+	 * We have something unexpected (e.g. keyboard keypress)
+	 * Save entry.
+	 */
 	la	t0, htif_ring
-	csrr	t2, mhartid
-	li	t4, (HTIF_RING_SIZE + 16)
-	mulw	t4, t4, t2
-	add	t0, t0, t4
 	li	t4, (HTIF_RING_SIZE)
 	add	t0, t0, t4	/* t0 == htif_ring_cursor */
 
 	ld	t2, 0(t0)	/* load ptr to cursor */
-	sd	t5, 0(t2)	/* put entry */
+	sd	t6, 0(t2)	/* put entry */
 	li	t4, 1
 	sd	t4, 8(t2)	/* mark used */
 	ld	t4, 16(t2)	/* take next */
 	/* Update cursor */
 	sd	t4, 0(t0)
 
 	/* Post supervisor software interrupt */
 	li	t0, MIP_SSIP
 	csrs	mip, t0
 
-	/* Wait for console intr again */
+	/* Clear and look for response again */
+	la	t2, fromhost
+	li	t5, 0
+	sd	t5, 0(t2)
 	j	2b
 
 3:
+	la	t2, fromhost
+	li	t5, 0
+	sd	t5, 0(t2)
+
+	/* Release lock */
+	la	t0, htif_lock
+	li	t2, 0
+	amoswap.d t3, t2, 0(t0)
+
 	j	exit_next_instr
 
 set_mtimecmp:
-	csrr	t2, stime
-	add	t6, t6, t2
-	csrw	mtimecmp, t6
-
 	/* Enable interrupts */
 	li	t0, (MIE_MTIE | MIE_STIE)
 	csrs	mie, t0
 	j	exit_next_instr
 
-clear_pending:
-	li      t0, MIP_STIP
-	csrc    mip, t0
-	j	exit_next_instr
-
 /*
  * Trap exit functions
  */
 exit_next_instr:
 	/* Next instruction is in t1 */
 	csrw    mepc, t1
 exit:
 	/* Restore state */
 	ld	t0, (8 * 0)(sp)
 	ld	t1, (8 * 1)(sp)
 	ld	t2, (8 * 2)(sp)
 	ld	t3, (8 * 3)(sp)
 	ld	t4, (8 * 4)(sp)
 	ld	t5, (8 * 5)(sp)
 	ld	a0, (8 * 7)(sp)
 	addi	sp, sp, 64
 	csrrw	sp, mscratch, sp
-	eret
+	mret
 
-/*
- * Redirect to supervisor
- */
 exit_mrts:
-	/* Setup exception handler */
-	li	t1, KERNBASE
-	add	t2, t2, t1
-	csrw	stvec, t2
-
-	/* Restore state */
-	ld	t0, (8 * 0)(sp)
-	ld	t1, (8 * 1)(sp)
-	ld	t2, (8 * 2)(sp)
-	ld	t3, (8 * 3)(sp)
-	ld	t4, (8 * 4)(sp)
-	ld	t5, (8 * 5)(sp)
-	ld	a0, (8 * 7)(sp)
-	addi	sp, sp, 64
-	csrrw	sp, mscratch, sp
-
-	/* Redirect to supervisor */
-	mrts
+	j	exit_mrts
Index: user/alc/PQ_LAUNDRY/sys/riscv/riscv/genassym.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/riscv/genassym.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/riscv/genassym.c	(revision 303667)
@@ -1,99 +1,100 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/assym.h>
 #include <sys/proc.h>
 #include <sys/mbuf.h>
 #include <sys/vmmeter.h>
 #include <sys/bus.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 #include <machine/riscvreg.h>
 #include <machine/frame.h>
 #include <machine/pcb.h>
 #include <machine/cpu.h>
 #include <machine/proc.h>
 #include <machine/cpufunc.h>
 #include <machine/pte.h>
 #include <machine/intr.h>
 
 ASSYM(KERNBASE, KERNBASE);
+ASSYM(KERNENTRY, KERNENTRY);
 ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
 ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS);
 ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
 ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
 
 ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
 ASSYM(PCB_L1ADDR, offsetof(struct pcb, pcb_l1addr));
 ASSYM(PCB_SIZE, sizeof(struct pcb));
 ASSYM(PCB_RA, offsetof(struct pcb, pcb_ra));
 ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp));
 ASSYM(PCB_GP, offsetof(struct pcb, pcb_gp));
 ASSYM(PCB_TP, offsetof(struct pcb, pcb_tp));
 ASSYM(PCB_T, offsetof(struct pcb, pcb_t));
 ASSYM(PCB_S, offsetof(struct pcb, pcb_s));
 ASSYM(PCB_A, offsetof(struct pcb, pcb_a));
 
 ASSYM(SF_UC, offsetof(struct sigframe, sf_uc));
 
 ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
 ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
 
 ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
 ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
 ASSYM(TD_PROC, offsetof(struct thread, td_proc));
 ASSYM(TD_FRAME, offsetof(struct thread, td_frame));
 ASSYM(TD_MD, offsetof(struct thread, td_md));
 ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
 
 ASSYM(TF_SIZE, sizeof(struct trapframe));
 ASSYM(TF_RA, offsetof(struct trapframe, tf_ra));
 ASSYM(TF_SP, offsetof(struct trapframe, tf_sp));
 ASSYM(TF_GP, offsetof(struct trapframe, tf_gp));
 ASSYM(TF_TP, offsetof(struct trapframe, tf_tp));
 ASSYM(TF_T, offsetof(struct trapframe, tf_t));
 ASSYM(TF_S, offsetof(struct trapframe, tf_s));
 ASSYM(TF_A, offsetof(struct trapframe, tf_a));
 ASSYM(TF_SEPC, offsetof(struct trapframe, tf_sepc));
 ASSYM(TF_SBADADDR, offsetof(struct trapframe, tf_sbadaddr));
 ASSYM(TF_SCAUSE, offsetof(struct trapframe, tf_scause));
 ASSYM(TF_SSTATUS, offsetof(struct trapframe, tf_sstatus));
Index: user/alc/PQ_LAUNDRY/sys/riscv/riscv/identcpu.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/riscv/identcpu.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/riscv/identcpu.c	(revision 303667)
@@ -1,149 +1,136 @@
 /*-
- * Copyright (c) 2015 Ruslan Bukin <br@bsdpad.com>
+ * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/pcpu.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 #include <machine/trap.h>
 
 char machine[] = "riscv";
 
 SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0,
     "Machine class");
 
 struct cpu_desc {
 	u_int		cpu_impl;
 	u_int		cpu_part_num;
 	const char	*cpu_impl_name;
 	const char	*cpu_part_name;
 };
 
 struct cpu_desc cpu_desc[MAXCPU];
 
 struct cpu_parts {
 	u_int		part_id;
 	const char	*part_name;
 };
 #define	CPU_PART_NONE	{ -1, "Unknown Processor" }
 
 struct cpu_implementers {
 	u_int			impl_id;
 	const char		*impl_name;
-	/*
-	 * Part number is implementation defined
-	 * so each vendor will have its own set of values and names.
-	 */
-	const struct cpu_parts	*cpu_parts;
 };
-#define	CPU_IMPLEMENTER_NONE	{ 0, "Unknown Implementer", cpu_parts_none }
+#define	CPU_IMPLEMENTER_NONE	{ 0, "Unknown Implementer" }
 
 /*
- * Per-implementer table of (PartNum, CPU Name) pairs.
+ * CPU base
  */
-/* UC Berkeley */
-static const struct cpu_parts cpu_parts_ucb[] = {
-	{ CPU_PART_RV32I,	"RV32I" },
-	{ CPU_PART_RV32E,	"RV32E" },
-	{ CPU_PART_RV64I,	"RV64I" },
-	{ CPU_PART_RV128I,	"RV128I" },
+static const struct cpu_parts cpu_parts_std[] = {
+	{ CPU_PART_RV32,	"RV32" },
+	{ CPU_PART_RV64,	"RV64" },
+	{ CPU_PART_RV128,	"RV128" },
 	CPU_PART_NONE,
 };
 
-/* Unknown */
-static const struct cpu_parts cpu_parts_none[] = {
-	CPU_PART_NONE,
-};
-
 /*
  * Implementers table.
  */
 const struct cpu_implementers cpu_implementers[] = {
-	{ CPU_IMPL_UCB_ROCKET,	"UC Berkeley Rocket",	cpu_parts_ucb },
+	{ CPU_IMPL_UCB_ROCKET,	"UC Berkeley Rocket" },
 	CPU_IMPLEMENTER_NONE,
 };
 
 void
 identify_cpu(void)
 {
 	const struct cpu_parts *cpu_partsp;
 	uint32_t part_id;
 	uint32_t impl_id;
 	uint64_t mimpid;
-	uint64_t mcpuid;
+	uint64_t misa;
 	u_int cpu;
 	size_t i;
 
 	cpu_partsp = NULL;
 
 	mimpid = machine_command(ECALL_MIMPID_GET, 0);
-	mcpuid = machine_command(ECALL_MCPUID_GET, 0);
+	misa = machine_command(ECALL_MCPUID_GET, 0);
 
-	/* SMPTODO: use mhartid ? */
 	cpu = PCPU_GET(cpuid);
 
 	impl_id	= CPU_IMPL(mimpid);
 	for (i = 0; i < nitems(cpu_implementers); i++) {
 		if (impl_id == cpu_implementers[i].impl_id ||
 		    cpu_implementers[i].impl_id == 0) {
 			cpu_desc[cpu].cpu_impl = impl_id;
 			cpu_desc[cpu].cpu_impl_name = cpu_implementers[i].impl_name;
-			cpu_partsp = cpu_implementers[i].cpu_parts;
+			cpu_partsp = cpu_parts_std;
 			break;
 		}
 	}
 
-	part_id = CPU_PART(mcpuid);
+	part_id = CPU_PART(misa);
 	for (i = 0; &cpu_partsp[i] != NULL; i++) {
 		if (part_id == cpu_partsp[i].part_id ||
 		    cpu_partsp[i].part_id == -1) {
 			cpu_desc[cpu].cpu_part_num = part_id;
 			cpu_desc[cpu].cpu_part_name = cpu_partsp[i].part_name;
 			break;
 		}
 	}
 
 	/* Print details for boot CPU or if we want verbose output */
 	if (cpu == 0 || bootverbose) {
 		printf("CPU(%d): %s %s\n", cpu,
 		    cpu_desc[cpu].cpu_impl_name,
 		    cpu_desc[cpu].cpu_part_name);
 	}
 }
Index: user/alc/PQ_LAUNDRY/sys/riscv/riscv/intr_machdep.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/riscv/intr_machdep.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/riscv/intr_machdep.c	(revision 303667)
@@ -1,304 +1,315 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cpuset.h>
 #include <sys/interrupt.h>
 #include <sys/smp.h>
 
 #include <machine/clock.h>
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 #include <machine/frame.h>
 #include <machine/intr.h>
 
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 
 u_long intrcnt[NIRQS];
 size_t sintrcnt = sizeof(intrcnt);
 
 char intrnames[NIRQS * (MAXCOMLEN + 1) * 2];
 size_t sintrnames = sizeof(intrnames);
 
 static struct intr_event *intr_events[NIRQS];
 static riscv_intrcnt_t riscv_intr_counters[NIRQS];
 
 static int intrcnt_index;
 
 riscv_intrcnt_t
 riscv_intrcnt_create(const char* name)
 {
 	riscv_intrcnt_t counter;
 
 	counter = &intrcnt[intrcnt_index++];
 	riscv_intrcnt_setname(counter, name);
 
 	return (counter);
 }
 
 void
 riscv_intrcnt_setname(riscv_intrcnt_t counter, const char *name)
 {
 	int i;
 
 	i = (counter - intrcnt);
 
 	KASSERT(counter != NULL, ("riscv_intrcnt_setname: NULL counter"));
 
 	snprintf(intrnames + (MAXCOMLEN + 1) * i,
 	    MAXCOMLEN + 1, "%-*s", MAXCOMLEN, name);
 }
 
 static void
 riscv_mask_irq(void *source)
 {
 	uintptr_t irq;
 
 	irq = (uintptr_t)source;
 
 	switch (irq) {
-	case IRQ_TIMER:
+	case IRQ_TIMER_SUPERVISOR:
 		csr_clear(sie, SIE_STIE);
 		break;
-	case IRQ_SOFTWARE:
+	case IRQ_SOFTWARE_USER:
+		csr_clear(sie, SIE_USIE);
+	case IRQ_SOFTWARE_SUPERVISOR:
 		csr_clear(sie, SIE_SSIE);
 		break;
+#if 0
+	/* lowRISC TODO */
 	case IRQ_UART:
 		machine_command(ECALL_IO_IRQ_MASK, 0);
 		break;
+#endif
 	default:
 		panic("Unknown irq %d\n", irq);
 	}
 }
 
 static void
 riscv_unmask_irq(void *source)
 {
 	uintptr_t irq;
 
 	irq = (uintptr_t)source;
 
 	switch (irq) {
-	case IRQ_TIMER:
+	case IRQ_TIMER_SUPERVISOR:
 		csr_set(sie, SIE_STIE);
 		break;
-	case IRQ_SOFTWARE:
+	case IRQ_SOFTWARE_USER:
+		csr_set(sie, SIE_USIE);
+		break;
+	case IRQ_SOFTWARE_SUPERVISOR:
 		csr_set(sie, SIE_SSIE);
 		break;
+#if 0
+	/* lowRISC TODO */
 	case IRQ_UART:
 		machine_command(ECALL_IO_IRQ_MASK, 1);
 		break;
+#endif
 	default:
 		panic("Unknown irq %d\n", irq);
 	}
 }
 
 void
 riscv_init_interrupts(void)
 {
 	char name[MAXCOMLEN + 1];
 	int i;
 
 	for (i = 0; i < NIRQS; i++) {
 		snprintf(name, MAXCOMLEN + 1, "int%d:", i);
 		riscv_intr_counters[i] = riscv_intrcnt_create(name);
 	}
 }
 
 int
 riscv_setup_intr(const char *name, driver_filter_t *filt,
     void (*handler)(void*), void *arg, int irq, int flags, void **cookiep)
 {
 	struct intr_event *event;
 	int error;
 
 	if (irq < 0 || irq >= NIRQS)
 		panic("%s: unknown intr %d", __func__, irq);
 
 	event = intr_events[irq];
 	if (event == NULL) {
 		error = intr_event_create(&event, (void *)(uintptr_t)irq, 0,
 		    irq, riscv_mask_irq, riscv_unmask_irq,
 		    NULL, NULL, "int%d", irq);
 		if (error)
 			return (error);
 		intr_events[irq] = event;
 		riscv_unmask_irq((void*)(uintptr_t)irq);
 	}
 
 	error = intr_event_add_handler(event, name, filt, handler, arg,
 	    intr_priority(flags), flags, cookiep);
 	if (error) {
 		printf("Failed to setup intr: %d\n", irq);
 		return (error);
 	}
 
 	riscv_intrcnt_setname(riscv_intr_counters[irq],
 			     event->ie_fullname);
 
 	return (0);
 }
 
 int
 riscv_teardown_intr(void *ih)
 {
 
 	/* TODO */
 
 	return (0);
 }
 
 int
 riscv_config_intr(u_int irq, enum intr_trigger trig, enum intr_polarity pol)
 {
 
 	/* There is no configuration for interrupts */
 
 	return (0);
 }
 
 void
 riscv_cpu_intr(struct trapframe *frame)
 {
 	struct intr_event *event;
 	int active_irq;
 
 	critical_enter();
 
 	KASSERT(frame->tf_scause & EXCP_INTR,
 		("riscv_cpu_intr: wrong frame passed"));
 
 	active_irq = (frame->tf_scause & EXCP_MASK);
 
 	switch (active_irq) {
+#if 0
+	/* lowRISC TODO */
 	case IRQ_UART:
-	case IRQ_SOFTWARE:
-	case IRQ_TIMER:
+#endif
+	case IRQ_SOFTWARE_USER:
+	case IRQ_SOFTWARE_SUPERVISOR:
+	case IRQ_TIMER_SUPERVISOR:
 		event = intr_events[active_irq];
 		/* Update counters */
 		atomic_add_long(riscv_intr_counters[active_irq], 1);
 		PCPU_INC(cnt.v_intr);
 		break;
-	case IRQ_HTIF:
-		/* HTIF interrupts are only handled in machine mode */
-		panic("%s: HTIF interrupt", __func__);
-		break;
 	default:
 		event = NULL;
 	}
 
 	if (!event || TAILQ_EMPTY(&event->ie_handlers) ||
 	    (intr_event_handle(event, frame) != 0))
 		printf("stray interrupt %d\n", active_irq);
 
 	critical_exit();
 }
 
 #ifdef SMP
 void
 riscv_setup_ipihandler(driver_filter_t *filt)
 {
 
-	riscv_setup_intr("ipi", filt, NULL, NULL, IRQ_SOFTWARE,
+	riscv_setup_intr("ipi", filt, NULL, NULL, IRQ_SOFTWARE_SUPERVISOR,
 	    INTR_TYPE_MISC, NULL);
 }
 
 void
 riscv_unmask_ipi(void)
 {
 
 	csr_set(sie, SIE_SSIE);
 }
 
 /* Sending IPI */
 static void
 ipi_send(struct pcpu *pc, int ipi)
 {
 
 	CTR3(KTR_SMP, "%s: cpu=%d, ipi=%x", __func__, pc->pc_cpuid, ipi);
 
 	atomic_set_32(&pc->pc_pending_ipis, ipi);
 	machine_command(ECALL_SEND_IPI, pc->pc_reg);
 
 	CTR1(KTR_SMP, "%s: sent", __func__);
 }
 
 void
 ipi_all_but_self(u_int ipi)
 {
 	cpuset_t other_cpus;
 
 	other_cpus = all_cpus;
 	CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	ipi_selected(other_cpus, ipi);
 }
 
 void
 ipi_cpu(int cpu, u_int ipi)
 {
 	cpuset_t cpus;
 
 	CPU_ZERO(&cpus);
 	CPU_SET(cpu, &cpus);
 
 	CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x\n", __func__, cpu, ipi);
 	ipi_send(cpuid_to_pcpu[cpu], ipi);
 }
 
 void
 ipi_selected(cpuset_t cpus, u_int ipi)
 {
 	struct pcpu *pc;
 
 	CTR1(KTR_SMP, "ipi_selected: ipi: %x", ipi);
 
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 		if (CPU_ISSET(pc->pc_cpuid, &cpus)) {
 			CTR3(KTR_SMP, "%s: pc: %p, ipi: %x\n", __func__, pc,
 			    ipi);
 			ipi_send(pc, ipi);
 		}
 	}
 }
 
 #endif
Index: user/alc/PQ_LAUNDRY/sys/riscv/riscv/locore.S
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/riscv/locore.S	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/riscv/locore.S	(revision 303667)
@@ -1,382 +1,392 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "assym.s"
 
 #include <sys/syscall.h>
 #include <machine/asm.h>
 #include <machine/param.h>
 #include <machine/trap.h>
 #include <machine/riscvreg.h>
 #include <machine/pte.h>
 
 #define	HTIF_RING_NENTRIES	(512)
 #define	HTIF_RING_ENTRY_SZ	(24)
 #define	HTIF_RING_SIZE		(HTIF_RING_ENTRY_SZ * HTIF_RING_NENTRIES)
 #define	HW_STACK_SIZE		(96)
 
 /*
- * Event queue for each CPU core:
+ * Event queue:
  *
  * struct htif_ring {
  *     uint64_t data;
  *     uint64_t used;
  *     uint64_t next;
  * } htif_ring[HTIF_RING_NENTRIES];
  * uint64_t htif_ring_cursor;
  * uint64_t htif_ring_last;
  */
 
 .macro build_ring
 	la	t0, htif_ring
-#ifdef SMP
-	csrr	a0, mhartid
-	li	s0, (HTIF_RING_SIZE + 16)
-	mulw	s0, a0, s0
-	add	t0, t0, s0
-#endif
 	li	t1, 0
 	sd	t1, 0(t0)	/* zero data */
 	sd	t1, 8(t0)	/* zero used */
 	mv	t2, t0
 	mv	t3, t0
 	li	t5, (HTIF_RING_SIZE)
 	li	t6, 0
 	add	t4, t0, t5
 1:
 	addi	t3, t3, HTIF_RING_ENTRY_SZ	/* pointer to next */
 	beq	t3, t4, 2f			/* finish */
 	sd	t3, 16(t2)			/* store pointer */
 	addi	t2, t2, HTIF_RING_ENTRY_SZ	/* next entry */
 	addi	t6, t6, 1			/* counter */
 	j	1b
 2:
 	addi	t3, t3, -HTIF_RING_ENTRY_SZ
 	sd	t0, 16(t3)			/* last -> first */
 
 	li	t2, (HTIF_RING_SIZE)
 	add	s0, t0, t2
 	sd	t0, 0(s0)	/* cursor */
 	sd	t0, 8(s0)	/* last */
 	/* finish building ring */
 .endm
 
 	.globl	kernbase
 	.set	kernbase, KERNBASE
 
 	/* Trap entries */
 	.text
 
 mentry:
-	/* User mode entry point (mtvec + 0x000) */
-	.align 6
-	j	user_trap
+	/* Vectors */
+	j	_start		/* reset */
+	j	bad_trap	/* NMI (non-maskable interrupt) */
+	j	machine_trap
 
-	/* Supervisor mode entry point (mtvec + 0x040) */
-	.align 6
-	j	supervisor_trap
-
-	/* Hypervisor mode entry point (mtvec + 0x080) */
-	.align 6
-	j	bad_trap
-
-	/* Machine mode entry point (mtvec + 0x0C0) */
-	.align 6
-	j	bad_trap
-
 	/* Reset vector */
 	.text
-	.align 8
 	.globl _start
 _start:
+	/* Setup machine trap vector */
+	la	t0, machine_trap
+	csrw	mtvec, t0
+
+	/* Delegate interrupts to supervisor mode */
+	li	t0, (MIP_SSIP | MIP_STIP | MIP_SEIP)
+	csrw	mideleg, t0
+
+	/* Delegate exceptions to supervisor mode */
+	li	t0,	(1 << EXCP_MISALIGNED_FETCH)	| \
+			(1 << EXCP_FAULT_FETCH)		| \
+			(1 << EXCP_ILLEGAL_INSTRUCTION)	| \
+			(1 << EXCP_FAULT_LOAD)		| \
+			(1 << EXCP_FAULT_STORE)		| \
+			(1 << EXCP_BREAKPOINT)		| \
+			(1 << EXCP_USER_ECALL)
+	csrw	medeleg, t0
+
+	la	t0, cpu_exception_handler
+	li	t1, KERNBASE
+	add	t0, t0, t1
+	csrw	stvec, t0
+
 	/* Direct secondary cores to mpentry */
 	csrr	a0, mhartid
 	bnez	a0, mpentry
 
+	li	t1, 0
+	la	t0, tohost
+	sd	t1, 0(t0)
+	la	t0, fromhost
+	sd	t1, 0(t0)
+
 	/* Build event queue for current core */
 	build_ring
 
 	/* Setup machine-mode stack for CPU 0 */
 	la	t0, hardstack_end
 	csrw	mscratch, t0
 
 	li	t0, 0
 	csrw	sscratch, t0
 
 	li	s10, PAGE_SIZE
 	li	s9, (PAGE_SIZE * KSTACK_PAGES)
 
 	/* Page tables */
 
 	/* Create an L1 page for early devmap */
 	la	s1, pagetable_l1
 	la	s2, pagetable_l2_devmap	/* Link to next level PN */
 	srli	s2, s2, PAGE_SHIFT
 
 	li	a5, (VM_MAX_KERNEL_ADDRESS - L2_SIZE)
 	srli	a5, a5, L1_SHIFT	/* >> L1_SHIFT */
 	andi	a5, a5, 0x1ff		/* & 0x1ff */
-	li	t4, (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S))
+	li	t4, PTE_V
 	slli	t5, s2, PTE_PPN0_S	/* (s2 << PTE_PPN0_S) */
 	or	t6, t4, t5
 
 	/* Store single level1 PTE entry to position */
 	li	a6, PTE_SIZE
 	mulw	a5, a5, a6
 	add	t0, s1, a5
 	sd	t6, (t0)
 
 	/* Add single Level 1 entry for kernel */
 	la	s1, pagetable_l1
 	la	s2, pagetable_l2	/* Link to next level PN */
 	srli	s2, s2, PAGE_SHIFT
 
-	li	a5, KERNBASE
+	li	a5, (KERNBASE + KERNENTRY)
 	srli	a5, a5, L1_SHIFT	/* >> L1_SHIFT */
 	andi	a5, a5, 0x1ff		/* & 0x1ff */
-	li	t4, (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S))
+	li	t4, PTE_V
 	slli	t5, s2, PTE_PPN0_S	/* (s2 << PTE_PPN0_S) */
 	or	t6, t4, t5
 
 	/* Store single level1 PTE entry to position */
 	li	a6, PTE_SIZE
 	mulw	a5, a5, a6
 	add	t0, s1, a5
 	sd	t6, (t0)
 
 	/* Level 2 superpages (512 x 2MiB) */
 	la	s1, pagetable_l2
-	li	t3, 512			/* Build 512 entries */
-	li	t4, 0			/* Counter */
+	li	t4, KERNENTRY
+	srli	t4, t4, 21		/* Div by 2 MiB */
+	li	t2, 512			/* Build 512 entries */
+	add	t3, t4, t2
 	li	t5, 0
 2:
-	li	t0, (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S))
+	li	t0, (PTE_V | PTE_RWX)
 	slli	t2, t4, PTE_PPN1_S	/* << PTE_PPN1_S */
 	or	t5, t0, t2
 	sd	t5, (s1)		/* Store PTE entry to position */
 	addi	s1, s1, PTE_SIZE
 
 	addi	t4, t4, 1
 	bltu	t4, t3, 2b
 
 	/* Set page tables base register */
 	la	s1, pagetable_l1
+	srli	s1, s1, PAGE_SHIFT
 	csrw	sptbr, s1
 
 	/* Page tables END */
 
 	/* Enter supervisor mode */
 	li	s0, ((MSTATUS_VM_SV39 << MSTATUS_VM_SHIFT) | \
-		     (MSTATUS_PRV_M << MSTATUS_PRV_SHIFT) | \
-		     (MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT) | \
-		     (MSTATUS_PRV_U << MSTATUS_PRV2_SHIFT));
+		     (MSTATUS_PRV_S << MSTATUS_MPP_SHIFT));
 	csrw	mstatus, s0
 
 	/*
 	 * Enable machine-mode software interrupts
 	 * so we can deliver IPI to this core.
 	 */
 	li	t0, MIE_MSIE
 	csrs	mie, t0
 
 	/* Exit from machine mode */
 	la	t0, .Lmmu_on
 	li	s11, KERNBASE
 	add	t0, t0, s11
 	csrw	mepc, t0
-	eret
+	mret
 
 .Lmmu_on:
 	/* Initialize stack pointer */
 	la	s3, initstack_end
 	mv	sp, s3
 	addi	sp, sp, -PCB_SIZE
 
 	/* Clear BSS  */
 	la	a0, _C_LABEL(__bss_start)
 	la	s1, _C_LABEL(_end)
 1:
 	sd	zero, 0(a0)
 	addi	a0, a0, 8
 	bltu	a0, s1, 1b
 
 	/* Fill riscv_bootparams */
 	addi	sp, sp, -16
 	la	t0, pagetable_l1
 	sd	t0, 0(sp) /* kern_l1pt */
 	la	t0, initstack_end
 	sd	t0, 8(sp) /* kern_stack */
 
 	mv	a0, sp
 	call	_C_LABEL(initriscv)	/* Off we go */
 	call	_C_LABEL(mi_startup)
 
 	.align  4
 initstack:
 	.space  (PAGE_SIZE * KSTACK_PAGES)
 initstack_end:
 hardstack:
 	.space  (HW_STACK_SIZE * MAXCPU)
 hardstack_end:
 
 	.globl htif_ring
 htif_ring:
-	.space ((HTIF_RING_SIZE + 16) * MAXCPU)
-
-	.globl console_intr
-console_intr:
+	.space (HTIF_RING_SIZE + 16)
+htif_lock:
 	.space (8)
+tohost:
+	.space (8)
+fromhost:
+	.space (8)
 
 ENTRY(sigcode)
 	mv	a0, sp
 	addi	a0, a0, SF_UC
 
 1:
 	li	t0, SYS_sigreturn
 	ecall
 
 	/* sigreturn failed, exit */
 	li	t0, SYS_exit
 	ecall
 
 	j	1b
 END(sigcode)
 	/* This may be copied to the stack, keep it 16-byte aligned */
 	.align	3
 esigcode:
 
 	.data
 	.align	3
 	.global	szsigcode
 szsigcode:
 	.quad	esigcode - sigcode
 
 	.align	12
 pagetable_l1:
 	.space	PAGE_SIZE
 pagetable_l2:
 	.space	PAGE_SIZE
 pagetable_l2_devmap:
 	.space	PAGE_SIZE
 
 	.globl init_pt_va
 init_pt_va:
 	.quad pagetable_l2	/* XXX: Keep page tables VA */
 
 #ifndef SMP
 ENTRY(mpentry)
 1:
 	wfi
 	j	1b
 END(mpentry)
 #else
 /*
  * mpentry(unsigned long)
  *
  * Called by a core when it is being brought online.
  * The data in x0 is passed straight to init_secondary.
  */
 ENTRY(mpentry)
 	/*
 	 * Calculate the offset to __riscv_boot_ap
 	 * for current core, cpuid in a0.
 	 */
 	li	t1, 4
 	mulw	t1, t1, a0
 	/* Get pointer */
 	la	t0, __riscv_boot_ap
 	add	t0, t0, t1
 
 1:
 	/* Wait the kernel to be ready */
 	lw	t1, 0(t0)
 	beqz	t1, 1b
 
-	/* Build event queue ring for this core */
-	build_ring
-
 	/* Set page tables base register */
 	la	t0, pagetable_l1
+	srli	t0, t0, PAGE_SHIFT
 	csrw	sptbr, t0
 
 	/* Configure mstatus */
 	li	s0, ((MSTATUS_VM_SV39 << MSTATUS_VM_SHIFT) | \
-		     (MSTATUS_PRV_M << MSTATUS_PRV_SHIFT) | \
-		     (MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT) | \
-		     (MSTATUS_PRV_U << MSTATUS_PRV2_SHIFT));
+		     (MSTATUS_PRV_S << MSTATUS_MPP_SHIFT));
 	csrw	mstatus, s0
 
 	/* Setup stack for machine mode exceptions */
 	la	t0, hardstack_end
 	li	t1, HW_STACK_SIZE
 	mulw	t1, t1, a0
 	sub	t0, t0, t1
 	csrw	mscratch, t0
 
 	li	t0, 0
 	csrw	sscratch, t0
 
 	/*
 	 * Enable machine-mode software interrupts
 	 * so we can deliver IPI to this core.
 	 */
 	li	t0, MIE_MSIE
 	csrs	mie, t0
 
 	/*
 	 * Exit from machine mode and go to
 	 * the virtual address space.
 	 */
 	la	t0, mp_virtdone
 	li	s11, KERNBASE
 	add	t0, t0, s11
 	csrw	mepc, t0
-	eret
+	mret
 
 mp_virtdone:
 	/* We are now in virtual address space */
 
 	/* Setup stack pointer */
 	la	t0, secondary_stacks
 	li	t1, (PAGE_SIZE * KSTACK_PAGES)
 	mulw	t1, t1, a0
 	add	sp, t0, t1
 
 	call	init_secondary
 END(mpentry)
 #endif
 
 #include "exception.S"
Index: user/alc/PQ_LAUNDRY/sys/riscv/riscv/machdep.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/riscv/machdep.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/riscv/machdep.c	(revision 303667)
@@ -1,797 +1,798 @@
 /*-
  * Copyright (c) 2014 Andrew Turner
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_platform.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/cons.h>
 #include <sys/cpu.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/msgbuf.h>
 #include <sys/pcpu.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/reboot.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/ucontext.h>
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 
 #include <machine/riscvreg.h>
 #include <machine/cpu.h>
 #include <machine/kdb.h>
 #include <machine/machdep.h>
 #include <machine/pcb.h>
 #include <machine/reg.h>
 #include <machine/trap.h>
 #include <machine/vmparam.h>
 #include <machine/intr.h>
 
 #include <machine/asm.h>
 
 #ifdef VFP
 #include <machine/vfp.h>
 #endif
 
 #ifdef FDT
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #endif
 
 struct pcpu __pcpu[MAXCPU];
 
 static struct trapframe proc0_tf;
 
 vm_paddr_t phys_avail[PHYS_AVAIL_SIZE + 2];
 vm_paddr_t dump_avail[PHYS_AVAIL_SIZE + 2];
 
 int early_boot = 1;
 int cold = 1;
 long realmem = 0;
 long Maxmem = 0;
 
 #define	PHYSMAP_SIZE	(2 * (VM_PHYSSEG_MAX - 1))
 vm_paddr_t physmap[PHYSMAP_SIZE];
 u_int physmap_idx;
 
 struct kva_md_info kmi;
 
 int64_t dcache_line_size;	/* The minimum D cache line size */
 int64_t icache_line_size;	/* The minimum I cache line size */
 int64_t idcache_line_size;	/* The minimum cache line size */
 
 extern int *end;
 extern int *initstack_end;
 
 struct pcpu *pcpup;
 
 uintptr_t mcall_trap(uintptr_t mcause, uintptr_t* regs);
 
 uintptr_t
 mcall_trap(uintptr_t mcause, uintptr_t* regs)
 {
 
 	return (0);
 }
 
 static void
 cpu_startup(void *dummy)
 {
 
 	identify_cpu();
 
 	vm_ksubmap_init(&kmi);
 	bufinit();
 	vm_pager_bufferinit();
 }
 
 SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
 
 int
 cpu_idle_wakeup(int cpu)
 {
 
 	return (0);
 }
 
 void
 bzero(void *buf, size_t len)
 {
 	uint8_t *p;
 
 	p = buf;
 	while(len-- > 0)
 		*p++ = 0;
 }
 
 int
 fill_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	regs->sepc = frame->tf_sepc;
 	regs->sstatus = frame->tf_sstatus;
 	regs->ra = frame->tf_ra;
 	regs->sp = frame->tf_sp;
 	regs->gp = frame->tf_gp;
 	regs->tp = frame->tf_tp;
 
 	memcpy(regs->t, frame->tf_t, sizeof(regs->t));
 	memcpy(regs->s, frame->tf_s, sizeof(regs->s));
 	memcpy(regs->a, frame->tf_a, sizeof(regs->a));
 
 	return (0);
 }
 
 int
 set_regs(struct thread *td, struct reg *regs)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 	frame->tf_sepc = regs->sepc;
 	frame->tf_sstatus = regs->sstatus;
 	frame->tf_ra = regs->ra;
 	frame->tf_sp = regs->sp;
 	frame->tf_gp = regs->gp;
 	frame->tf_tp = regs->tp;
 
 	memcpy(frame->tf_t, regs->t, sizeof(frame->tf_t));
 	memcpy(frame->tf_s, regs->s, sizeof(frame->tf_s));
 	memcpy(frame->tf_a, regs->a, sizeof(frame->tf_a));
 
 	return (0);
 }
 
 int
 fill_fpregs(struct thread *td, struct fpreg *regs)
 {
 
 	/* TODO */
 	bzero(regs, sizeof(*regs));
 	return (0);
 }
 
 int
 set_fpregs(struct thread *td, struct fpreg *regs)
 {
 
 	/* TODO */
 	return (0);
 }
 
 int
 fill_dbregs(struct thread *td, struct dbreg *regs)
 {
 
 	panic("fill_dbregs");
 }
 
 int
 set_dbregs(struct thread *td, struct dbreg *regs)
 {
 
 	panic("set_dbregs");
 }
 
 int
 ptrace_set_pc(struct thread *td, u_long addr)
 {
 
 	panic("ptrace_set_pc");
 	return (0);
 }
 
 int
 ptrace_single_step(struct thread *td)
 {
 
 	/* TODO; */
 	return (0);
 }
 
 int
 ptrace_clear_single_step(struct thread *td)
 {
 
 	/* TODO; */
 	return (0);
 }
 
 void
 exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 
 	memset(tf, 0, sizeof(struct trapframe));
 
 	/*
 	 * We need to set a0 for init as it doesn't call
 	 * cpu_set_syscall_retval to copy the value. We also
 	 * need to set td_retval for the cases where we do.
 	 */
 	tf->tf_a[0] = td->td_retval[0] = stack;
 	tf->tf_sp = STACKALIGN(stack);
 	tf->tf_ra = imgp->entry_addr;
 	tf->tf_sepc = imgp->entry_addr;
 }
 
 /* Sanity check these are the same size, they will be memcpy'd to and fro */
 CTASSERT(sizeof(((struct trapframe *)0)->tf_a) ==
     sizeof((struct gpregs *)0)->gp_a);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_s) ==
     sizeof((struct gpregs *)0)->gp_s);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_t) ==
     sizeof((struct gpregs *)0)->gp_t);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_a) ==
     sizeof((struct reg *)0)->a);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_s) ==
     sizeof((struct reg *)0)->s);
 CTASSERT(sizeof(((struct trapframe *)0)->tf_t) ==
     sizeof((struct reg *)0)->t);
 
 int
 get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret)
 {
 	struct trapframe *tf = td->td_frame;
 
 	memcpy(mcp->mc_gpregs.gp_t, tf->tf_t, sizeof(mcp->mc_gpregs.gp_t));
 	memcpy(mcp->mc_gpregs.gp_s, tf->tf_s, sizeof(mcp->mc_gpregs.gp_s));
 	memcpy(mcp->mc_gpregs.gp_a, tf->tf_a, sizeof(mcp->mc_gpregs.gp_a));
 
 	if (clear_ret & GET_MC_CLEAR_RET) {
 		mcp->mc_gpregs.gp_a[0] = 0;
 		mcp->mc_gpregs.gp_t[0] = 0; /* clear syscall error */
 	}
 
 	mcp->mc_gpregs.gp_ra = tf->tf_ra;
 	mcp->mc_gpregs.gp_sp = tf->tf_sp;
 	mcp->mc_gpregs.gp_gp = tf->tf_gp;
 	mcp->mc_gpregs.gp_tp = tf->tf_tp;
 	mcp->mc_gpregs.gp_sepc = tf->tf_sepc;
 	mcp->mc_gpregs.gp_sstatus = tf->tf_sstatus;
 
 	return (0);
 }
 
 int
 set_mcontext(struct thread *td, mcontext_t *mcp)
 {
 	struct trapframe *tf;
 
 	tf = td->td_frame;
 
 	memcpy(tf->tf_t, mcp->mc_gpregs.gp_t, sizeof(tf->tf_t));
 	memcpy(tf->tf_s, mcp->mc_gpregs.gp_s, sizeof(tf->tf_s));
 	memcpy(tf->tf_a, mcp->mc_gpregs.gp_a, sizeof(tf->tf_a));
 
 	tf->tf_ra = mcp->mc_gpregs.gp_ra;
 	tf->tf_sp = mcp->mc_gpregs.gp_sp;
 	tf->tf_gp = mcp->mc_gpregs.gp_gp;
 	tf->tf_tp = mcp->mc_gpregs.gp_tp;
 	tf->tf_sepc = mcp->mc_gpregs.gp_sepc;
 	tf->tf_sstatus = mcp->mc_gpregs.gp_sstatus;
 
 	return (0);
 }
 
 static void
 get_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 	/* TODO */
 }
 
 static void
 set_fpcontext(struct thread *td, mcontext_t *mcp)
 {
 	/* TODO */
 }
 
 void
 cpu_idle(int busy)
 {
 
 	spinlock_enter();
 	if (!busy)
 		cpu_idleclock();
 	if (!sched_runnable())
 		__asm __volatile(
 		    "fence \n"
 		    "wfi   \n");
 	if (!busy)
 		cpu_activeclock();
 	spinlock_exit();
 }
 
 void
 cpu_halt(void)
 {
 
 	panic("cpu_halt");
 }
 
 /*
  * Flush the D-cache for non-DMA I/O so that the I-cache can
  * be made coherent later.
  */
 void
 cpu_flush_dcache(void *ptr, size_t len)
 {
 
 	/* TBD */
 }
 
 /* Get current clock frequency for the given CPU ID. */
 int
 cpu_est_clockrate(int cpu_id, uint64_t *rate)
 {
 
 	panic("cpu_est_clockrate");
 }
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 {
 }
 
 void
 spinlock_enter(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	if (td->td_md.md_spinlock_count == 0) {
 		td->td_md.md_spinlock_count = 1;
 		td->td_md.md_saved_sstatus_ie = intr_disable();
 	} else
 		td->td_md.md_spinlock_count++;
 	critical_enter();
 }
 
 void
 spinlock_exit(void)
 {
 	struct thread *td;
 	register_t sstatus_ie;
 
 	td = curthread;
 	critical_exit();
 	sstatus_ie = td->td_md.md_saved_sstatus_ie;
 	td->td_md.md_spinlock_count--;
 	if (td->td_md.md_spinlock_count == 0)
 		intr_restore(sstatus_ie);
 }
 
 #ifndef	_SYS_SYSPROTO_H_
 struct sigreturn_args {
 	ucontext_t *ucp;
 };
 #endif
 
 int
 sys_sigreturn(struct thread *td, struct sigreturn_args *uap)
 {
 	uint64_t sstatus;
 	ucontext_t uc;
 	int error;
 
 	if (uap == NULL)
 		return (EFAULT);
 	if (copyin(uap->sigcntxp, &uc, sizeof(uc)))
 		return (EFAULT);
 
 	/*
 	 * Make sure the processor mode has not been tampered with and
 	 * interrupts have not been disabled.
+	 * Supervisor interrupts in user mode are always enabled.
 	 */
 	sstatus = uc.uc_mcontext.mc_gpregs.gp_sstatus;
-	if ((sstatus & SSTATUS_PS) != 0 ||
-	    (sstatus & SSTATUS_PIE) == 0)
+	if ((sstatus & SSTATUS_SPP) != 0)
 		return (EINVAL);
 
 	error = set_mcontext(td, &uc.uc_mcontext);
 	if (error != 0)
 		return (error);
 
 	set_fpcontext(td, &uc.uc_mcontext);
 
 	/* Restore signal mask. */
 	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
 
 	return (EJUSTRETURN);
 }
 
 /*
  * Construct a PCB from a trapframe. This is called from kdb_trap() where
  * we want to start a backtrace from the function that caused us to enter
  * the debugger. We have the context in the trapframe, but base the trace
  * on the PCB. The PCB doesn't have to be perfect, as long as it contains
  * enough for a backtrace.
  */
 void
 makectx(struct trapframe *tf, struct pcb *pcb)
 {
 
 	memcpy(pcb->pcb_t, tf->tf_t, sizeof(tf->tf_t));
 	memcpy(pcb->pcb_s, tf->tf_s, sizeof(tf->tf_s));
 	memcpy(pcb->pcb_a, tf->tf_a, sizeof(tf->tf_a));
 
 	pcb->pcb_ra = tf->tf_ra;
 	pcb->pcb_sp = tf->tf_sp;
 	pcb->pcb_gp = tf->tf_gp;
 	pcb->pcb_tp = tf->tf_tp;
 	pcb->pcb_sepc = tf->tf_sepc;
 }
 
 void
 sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
 {
 	struct sigframe *fp, frame;
 	struct sysentvec *sysent;
 	struct trapframe *tf;
 	struct sigacts *psp;
 	struct thread *td;
 	struct proc *p;
 	int onstack;
 	int code;
 	int sig;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sig = ksi->ksi_signo;
 	code = ksi->ksi_code;
 	psp = p->p_sigacts;
 	mtx_assert(&psp->ps_mtx, MA_OWNED);
 
 	tf = td->td_frame;
 	onstack = sigonstack(tf->tf_sp);
 
 	CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm,
 	    catcher, sig);
 
 	/* Allocate and validate space for the signal handler context. */
 	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack &&
 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
 		fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
 		    td->td_sigstk.ss_size);
 	} else {
 		fp = (struct sigframe *)td->td_frame->tf_sp;
 	}
 
 	/* Make room, keeping the stack aligned */
 	fp--;
 	fp = (struct sigframe *)STACKALIGN(fp);
 
 	/* Fill in the frame to copy out */
 	get_mcontext(td, &frame.sf_uc.uc_mcontext, 0);
 	get_fpcontext(td, &frame.sf_uc.uc_mcontext);
 	frame.sf_si = ksi->ksi_info;
 	frame.sf_uc.uc_sigmask = *mask;
 	frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ?
 	    ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	frame.sf_uc.uc_stack = td->td_sigstk;
 	mtx_unlock(&psp->ps_mtx);
 	PROC_UNLOCK(td->td_proc);
 
 	/* Copy the sigframe out to the user's stack. */
 	if (copyout(&frame, fp, sizeof(*fp)) != 0) {
 		/* Process has trashed its stack. Kill it. */
 		CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp);
 		PROC_LOCK(p);
 		sigexit(td, SIGILL);
 	}
 
 	tf->tf_a[0] = sig;
 	tf->tf_a[1] = (register_t)&fp->sf_si;
 	tf->tf_a[2] = (register_t)&fp->sf_uc;
 
 	tf->tf_sepc = (register_t)catcher;
 	tf->tf_sp = (register_t)fp;
 
 	sysent = p->p_sysent;
 	if (sysent->sv_sigcode_base != 0)
 		tf->tf_ra = (register_t)sysent->sv_sigcode_base;
 	else
 		tf->tf_ra = (register_t)(sysent->sv_psstrings -
 		    *(sysent->sv_szsigcode));
 
 	CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_sepc,
 	    tf->tf_sp);
 
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
 
 static void
 init_proc0(vm_offset_t kstack)
 {
 
 	pcpup = &__pcpu[0];
 
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_kstack = kstack;
 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack) - 1;
 	thread0.td_frame = &proc0_tf;
 	pcpup->pc_curpcb = thread0.td_pcb;
 }
 
 static int
 add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap,
     u_int *physmap_idxp)
 {
 	u_int i, insert_idx, _physmap_idx;
 
 	_physmap_idx = *physmap_idxp;
 
 	if (length == 0)
 		return (1);
 
 	/*
 	 * Find insertion point while checking for overlap.  Start off by
 	 * assuming the new entry will be added to the end.
 	 */
 	insert_idx = _physmap_idx;
 	for (i = 0; i <= _physmap_idx; i += 2) {
 		if (base < physmap[i + 1]) {
 			if (base + length <= physmap[i]) {
 				insert_idx = i;
 				break;
 			}
 			if (boothowto & RB_VERBOSE)
 				printf(
 		    "Overlapping memory regions, ignoring second region\n");
 			return (1);
 		}
 	}
 
 	/* See if we can prepend to the next entry. */
 	if (insert_idx <= _physmap_idx &&
 	    base + length == physmap[insert_idx]) {
 		physmap[insert_idx] = base;
 		return (1);
 	}
 
 	/* See if we can append to the previous entry. */
 	if (insert_idx > 0 && base == physmap[insert_idx - 1]) {
 		physmap[insert_idx - 1] += length;
 		return (1);
 	}
 
 	_physmap_idx += 2;
 	*physmap_idxp = _physmap_idx;
 	if (_physmap_idx == PHYSMAP_SIZE) {
 		printf(
 		"Too many segments in the physical address map, giving up\n");
 		return (0);
 	}
 
 	/*
 	 * Move the last 'N' entries down to make room for the new
 	 * entry if needed.
 	 */
 	for (i = _physmap_idx; i > insert_idx; i -= 2) {
 		physmap[i] = physmap[i - 2];
 		physmap[i + 1] = physmap[i - 1];
 	}
 
 	/* Insert the new entry. */
 	physmap[insert_idx] = base;
 	physmap[insert_idx + 1] = base + length;
 
 	printf("physmap[%d] = 0x%016lx\n", insert_idx, base);
 	printf("physmap[%d] = 0x%016lx\n", insert_idx + 1, base + length);
 	return (1);
 }
 
 #ifdef FDT
 static void
 try_load_dtb(caddr_t kmdp)
 {
 	vm_offset_t dtbp;
 
 	dtbp = (vm_offset_t)&fdt_static_dtb;
 	if (dtbp == (vm_offset_t)NULL) {
 		printf("ERROR loading DTB\n");
 		return;
 	}
 
 	if (OF_install(OFW_FDT, 0) == FALSE)
 		panic("Cannot install FDT");
 
 	if (OF_init((void *)dtbp) != 0)
 		panic("OF_init failed with the found device tree");
 }
 #endif
 
 static void
 cache_setup(void)
 {
 
 	/* TODO */
 }
 
 /*
  * Fake up a boot descriptor table.
  * RISCVTODO: This needs to be done via loader (when it's available).
  */
 vm_offset_t
 fake_preload_metadata(struct riscv_bootparams *rvbp __unused)
 {
 #ifdef DDB
 	vm_offset_t zstart = 0, zend = 0;
 #endif
 	vm_offset_t lastaddr;
 	int i = 0;
 	static uint32_t fake_preload[35];
 
 	fake_preload[i++] = MODINFO_NAME;
 	fake_preload[i++] = strlen("kernel") + 1;
 	strcpy((char*)&fake_preload[i++], "kernel");
 	i += 1;
 	fake_preload[i++] = MODINFO_TYPE;
 	fake_preload[i++] = strlen("elf64 kernel") + 1;
 	strcpy((char*)&fake_preload[i++], "elf64 kernel");
 	i += 3;
 	fake_preload[i++] = MODINFO_ADDR;
 	fake_preload[i++] = sizeof(vm_offset_t);
 	fake_preload[i++] = (uint64_t)(KERNBASE + KERNENTRY);
 	i += 1;
 	fake_preload[i++] = MODINFO_SIZE;
 	fake_preload[i++] = sizeof(uint64_t);
 	printf("end is 0x%016lx\n", (uint64_t)&end);
 	fake_preload[i++] = (uint64_t)&end - (uint64_t)(KERNBASE + KERNENTRY);
 	i += 1;
 #ifdef DDB
 #if 0
 	/* RISCVTODO */
 	if (*(uint32_t *)KERNVIRTADDR == MAGIC_TRAMP_NUMBER) {
 		fake_preload[i++] = MODINFO_METADATA|MODINFOMD_SSYM;
 		fake_preload[i++] = sizeof(vm_offset_t);
 		fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 4);
 		fake_preload[i++] = MODINFO_METADATA|MODINFOMD_ESYM;
 		fake_preload[i++] = sizeof(vm_offset_t);
 		fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 8);
 		lastaddr = *(uint32_t *)(KERNVIRTADDR + 8);
 		zend = lastaddr;
 		zstart = *(uint32_t *)(KERNVIRTADDR + 4);
 		db_fetch_ksymtab(zstart, zend);
 	} else
 #endif
 #endif
 		lastaddr = (vm_offset_t)&end;
 	fake_preload[i++] = 0;
 	fake_preload[i] = 0;
 	preload_metadata = (void *)fake_preload;
 
 	return (lastaddr);
 }
 
 void
 initriscv(struct riscv_bootparams *rvbp)
 {
 	struct mem_region mem_regions[FDT_MEM_REGIONS];
 	vm_offset_t lastaddr;
 	int mem_regions_sz;
 	vm_size_t kernlen;
 	caddr_t kmdp;
 	int i;
 
 	/* Set the module data location */
 	lastaddr = fake_preload_metadata(rvbp);
 
 	/* Find the kernel address */
 	kmdp = preload_search_by_type("elf kernel");
 	if (kmdp == NULL)
 		kmdp = preload_search_by_type("elf64 kernel");
 
-	boothowto = 0;
+	boothowto = RB_VERBOSE | RB_SINGLE;
+	boothowto = RB_VERBOSE;
 
 	kern_envp = NULL;
 
 #ifdef FDT
 	try_load_dtb(kmdp);
 #endif
 
 	/* Load the physical memory ranges */
 	physmap_idx = 0;
 
 	/* Grab physical memory regions information from device tree. */
 	if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, NULL) != 0)
 		panic("Cannot get physical memory regions");
 	for (i = 0; i < mem_regions_sz; i++)
 		add_physmap_entry(mem_regions[i].mr_start,
 		    mem_regions[i].mr_size, physmap, &physmap_idx);
 
 	/* Set the pcpu data, this is needed by pmap_bootstrap */
 	pcpup = &__pcpu[0];
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
 
 	/* Set the pcpu pointer */
 	__asm __volatile("mv gp, %0" :: "r"(pcpup));
 
 	PCPU_SET(curthread, &thread0);
 
 	/* Do basic tuning, hz etc */
 	init_param1();
 
 	cache_setup();
 
-	/* Bootstrap enough of pmap  to enter the kernel proper */
+	/* Bootstrap enough of pmap to enter the kernel proper */
 	kernlen = (lastaddr - KERNBASE);
 	pmap_bootstrap(rvbp->kern_l1pt, KERNENTRY, kernlen);
 
 	cninit();
 
 	init_proc0(rvbp->kern_stack);
 
 	/* set page table base register for thread0 */
 	thread0.td_pcb->pcb_l1addr = (rvbp->kern_l1pt - KERNBASE);
 
 	msgbufinit(msgbufp, msgbufsize);
 	mutex_init();
 	init_param2(physmem);
 	kdb_init();
 
 	riscv_init_interrupts();
 
 	early_boot = 0;
 }
Index: user/alc/PQ_LAUNDRY/sys/riscv/riscv/pmap.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/riscv/pmap.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/riscv/pmap.c	(revision 303667)
@@ -1,3278 +1,3282 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2003 Peter Wemm
  * All rights reserved.
  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  * Copyright (c) 2014 Andrew Turner
  * All rights reserved.
  * Copyright (c) 2014 The FreeBSD Foundation
  * All rights reserved.
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Portions of this software were developed by Andrew Turner under
  * sponsorship from The FreeBSD Foundation.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Manages physical address maps.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/vmem.h>
 #include <sys/vmmeter.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 #include <machine/machdep.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 
 #define	NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define	NUPDE			(NPDEPG * NPDEPG)
 #define	NUSERPGTBLS		(NUPDE + NPDEPG)
 
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
 #else
 #define PMAP_INLINE	extern inline
 #endif
 #else
 #define PMAP_INLINE
 #endif
 
 #ifdef PV_STATS
 #define PV_STAT(x)	do { x ; } while (0)
 #else
 #define PV_STAT(x)	do { } while (0)
 #endif
 
 #define	pmap_l2_pindex(v)	((v) >> L2_SHIFT)
 
 #define	NPV_LIST_LOCKS	MAXCPU
 
 #define	PHYS_TO_PV_LIST_LOCK(pa)	\
 			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
 
 #define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
 	struct rwlock **_lockp = (lockp);		\
 	struct rwlock *_new_lock;			\
 							\
 	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
 	if (_new_lock != *_lockp) {			\
 		if (*_lockp != NULL)			\
 			rw_wunlock(*_lockp);		\
 		*_lockp = _new_lock;			\
 		rw_wlock(*_lockp);			\
 	}						\
 } while (0)
 
 #define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
 			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
 
 #define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
 	struct rwlock **_lockp = (lockp);		\
 							\
 	if (*_lockp != NULL) {				\
 		rw_wunlock(*_lockp);			\
 		*_lockp = NULL;				\
 	}						\
 } while (0)
 
 #define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
 			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
 
 /* The list of all the user pmaps */
 LIST_HEAD(pmaplist, pmap);
 static struct pmaplist allpmaps;
 
 static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1");
 
 struct pmap kernel_pmap_store;
 
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 vm_offset_t kernel_vm_end = 0;
 
 struct msgbuf *msgbufp = NULL;
 
 static struct rwlock_padalign pvh_global_lock;
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
 static struct mtx pv_chunks_mutex;
 static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
 
 static void	free_pv_chunk(struct pv_chunk *pc);
 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
 		    vm_offset_t va);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
     pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m, struct rwlock **lockp);
 
 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
 		struct rwlock **lockp);
 
 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct spglist *free);
 static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
 
 /*
  * These load the old table data and store the new value.
  * They need to be atomic as the System MMU may write to the table at
  * the same time as the CPU.
  */
 #define	pmap_load_store(table, entry) atomic_swap_64(table, entry)
 #define	pmap_set(table, mask) atomic_set_64(table, mask)
 #define	pmap_load_clear(table) atomic_swap_64(table, 0)
 #define	pmap_load(table) (*table)
 
 /********************/
 /* Inline functions */
 /********************/
 
 static __inline void
 pagecopy(void *s, void *d)
 {
 
 	memcpy(d, s, PAGE_SIZE);
 }
 
 static __inline void
 pagezero(void *p)
 {
 
 	bzero(p, PAGE_SIZE);
 }
 
 #define	pmap_l1_index(va)	(((va) >> L1_SHIFT) & Ln_ADDR_MASK)
 #define	pmap_l2_index(va)	(((va) >> L2_SHIFT) & Ln_ADDR_MASK)
 #define	pmap_l3_index(va)	(((va) >> L3_SHIFT) & Ln_ADDR_MASK)
 
 #define	PTE_TO_PHYS(pte)	((pte >> PTE_PPN0_S) * PAGE_SIZE)
 
 static __inline pd_entry_t *
 pmap_l1(pmap_t pmap, vm_offset_t va)
 {
 
 	return (&pmap->pm_l1[pmap_l1_index(va)]);
 }
 
 static __inline pd_entry_t *
 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
 {
 	vm_paddr_t phys;
 	pd_entry_t *l2;
 
 	phys = PTE_TO_PHYS(pmap_load(l1));
 	l2 = (pd_entry_t *)PHYS_TO_DMAP(phys);
 
 	return (&l2[pmap_l2_index(va)]);
 }
 
 static __inline pd_entry_t *
 pmap_l2(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *l1;
 
 	l1 = pmap_l1(pmap, va);
 	if (l1 == NULL)
 		return (NULL);
-	if ((pmap_load(l1) & PTE_VALID) == 0)
+	if ((pmap_load(l1) & PTE_V) == 0)
 		return (NULL);
-	if ((pmap_load(l1) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S))
+	if ((pmap_load(l1) & PTE_RX) != 0)
 		return (NULL);
 
 	return (pmap_l1_to_l2(l1, va));
 }
 
 static __inline pt_entry_t *
 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
 {
 	vm_paddr_t phys;
 	pt_entry_t *l3;
 
 	phys = PTE_TO_PHYS(pmap_load(l2));
 	l3 = (pd_entry_t *)PHYS_TO_DMAP(phys);
 
 	return (&l3[pmap_l3_index(va)]);
 }
 
 static __inline pt_entry_t *
 pmap_l3(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *l2;
 
 	l2 = pmap_l2(pmap, va);
 	if (l2 == NULL)
 		return (NULL);
-	if ((pmap_load(l2) & PTE_VALID) == 0)
+	if ((pmap_load(l2) & PTE_V) == 0)
 		return (NULL);
-	if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S))
+	if ((pmap_load(l2) & PTE_RX) != 0)
 		return (NULL);
 
 	return (pmap_l2_to_l3(l2, va));
 }
 
 
 static __inline int
 pmap_is_write(pt_entry_t entry)
 {
 
-	if (entry & (1 << PTE_TYPE_S))
-		return (1);
-
-	return (0);
+	return (entry & PTE_W);
 }
 
 static __inline int
 pmap_is_current(pmap_t pmap)
 {
 
 	return ((pmap == pmap_kernel()) ||
 	    (pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
 }
 
 static __inline int
 pmap_l3_valid(pt_entry_t l3)
 {
 
-	return (l3 & PTE_VALID);
+	return (l3 & PTE_V);
 }
 
 static __inline int
 pmap_l3_valid_cacheable(pt_entry_t l3)
 {
 
 	/* TODO */
 
 	return (0);
 }
 
 #define	PTE_SYNC(pte)	cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
 
 /* Checks if the page is dirty. */
 static inline int
 pmap_page_dirty(pt_entry_t pte)
 {
 
-	return (pte & PTE_DIRTY);
+	return (pte & PTE_D);
 }
 
 static __inline void
 pmap_resident_count_inc(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pmap->pm_stats.resident_count += count;
 }
 
 static __inline void
 pmap_resident_count_dec(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT(pmap->pm_stats.resident_count >= count,
 	    ("pmap %p resident count underflow %ld %d", pmap,
 	    pmap->pm_stats.resident_count, count));
 	pmap->pm_stats.resident_count -= count;
 }
 
 static void
 pmap_distribute_l1(struct pmap *pmap, vm_pindex_t l1index,
     pt_entry_t entry)
 {
 	struct pmap *user_pmap;
 	pd_entry_t *l1;
 
 	/* Distribute new kernel L1 entry to all the user pmaps */
 	if (pmap != kernel_pmap)
 		return;
 
 	LIST_FOREACH(user_pmap, &allpmaps, pm_list) {
 		l1 = &user_pmap->pm_l1[l1index];
 		if (entry)
 			pmap_load_store(l1, entry);
 		else
 			pmap_load_clear(l1);
 	}
 }
 
 static pt_entry_t *
 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
     u_int *l2_slot)
 {
 	pt_entry_t *l2;
 	pd_entry_t *l1;
 
 	l1 = (pd_entry_t *)l1pt;
 	*l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
 
 	/* Check locore has used a table L1 map */
-	KASSERT((l1[*l1_slot] & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S),
+	KASSERT((l1[*l1_slot] & PTE_RX) == 0,
 		("Invalid bootstrap L1 table"));
 
 	/* Find the address of the L2 table */
 	l2 = (pt_entry_t *)init_pt_va;
 	*l2_slot = pmap_l2_index(va);
 
 	return (l2);
 }
 
 static vm_paddr_t
 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
 {
 	u_int l1_slot, l2_slot;
 	pt_entry_t *l2;
 	u_int ret;
 
 	l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
 
 	/* L2 is superpages */
 	ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT;
 	ret += (va & L2_OFFSET);
 
 	return (ret);
 }
 
 static void
 pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart)
 {
 	vm_offset_t va;
 	vm_paddr_t pa;
 	pd_entry_t *l1;
 	u_int l1_slot;
 	pt_entry_t entry;
 	pn_t pn;
 
+	/*
+	 * Initialize DMAP starting from zero physical address.
+	 * TODO: remove this once machine-mode code splitted out.
+	 */
+	kernstart = 0;
+	printf("%s: l1pt 0x%016lx kernstart 0x%016lx\n", __func__, l1pt, kernstart);
+
 	pa = kernstart & ~L1_OFFSET;
 	va = DMAP_MIN_ADDRESS;
 	l1 = (pd_entry_t *)l1pt;
 	l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS);
 
 	for (; va < DMAP_MAX_ADDRESS;
 	    pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
 		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
 
 		/* superpages */
 		pn = (pa / PAGE_SIZE);
-		entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S));
+		entry = (PTE_V | PTE_RWX);
 		entry |= (pn << PTE_PPN0_S);
 		pmap_load_store(&l1[l1_slot], entry);
 	}
 
 	cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
 	cpu_tlb_flushID();
 }
 
 static vm_offset_t
 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
 {
 	vm_offset_t l2pt, l3pt;
 	pt_entry_t entry;
 	pd_entry_t *l2;
 	vm_paddr_t pa;
 	u_int l2_slot;
 	pn_t pn;
 
 	KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
 
 	l2 = pmap_l2(kernel_pmap, va);
 	l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1));
 	l2pt = (vm_offset_t)l2;
 	l2_slot = pmap_l2_index(va);
 	l3pt = l3_start;
 
 	for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
 		KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
 
 		pa = pmap_early_vtophys(l1pt, l3pt);
 		pn = (pa / PAGE_SIZE);
-		entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
+		entry = (PTE_V);
 		entry |= (pn << PTE_PPN0_S);
 		pmap_load_store(&l2[l2_slot], entry);
 		l3pt += PAGE_SIZE;
 	}
 
+
 	/* Clean the L2 page table */
 	memset((void *)l3_start, 0, l3pt - l3_start);
 	cpu_dcache_wb_range(l3_start, l3pt - l3_start);
 
 	cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
 
-	return l3pt;
+	return (l3pt);
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  */
 void
 pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
 {
 	u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
 	uint64_t kern_delta;
 	pt_entry_t *l2;
 	vm_offset_t va, freemempos;
 	vm_offset_t dpcpu, msgbufpv;
 	vm_paddr_t pa, min_pa;
 	int i;
 
 	kern_delta = KERNBASE - kernstart;
 	physmem = 0;
 
 	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
 	printf("%lx\n", l1pt);
 	printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
 
 	/* Set this early so we can use the pagetable walking functions */
 	kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt;
 	PMAP_LOCK_INIT(kernel_pmap);
 
  	/*
 	 * Initialize the global pv list lock.
 	 */
 	rw_init(&pvh_global_lock, "pmap pv global");
 
 	LIST_INIT(&allpmaps);
 
 	/* Assume the address we were loaded to is a valid physical address */
 	min_pa = KERNBASE - kern_delta;
 
 	/*
 	 * Find the minimum physical address. physmap is sorted,
 	 * but may contain empty ranges.
 	 */
 	for (i = 0; i < (physmap_idx * 2); i += 2) {
 		if (physmap[i] == physmap[i + 1])
 			continue;
 		if (physmap[i] <= min_pa)
 			min_pa = physmap[i];
 		break;
 	}
 
 	/* Create a direct map region early so we can use it for pa -> va */
 	pmap_bootstrap_dmap(l1pt, min_pa);
 
 	va = KERNBASE;
 	pa = KERNBASE - kern_delta;
 
 	/*
 	 * Start to initialize phys_avail by copying from physmap
 	 * up to the physical address KERNBASE points at.
 	 */
 	map_slot = avail_slot = 0;
 	for (; map_slot < (physmap_idx * 2); map_slot += 2) {
 		if (physmap[map_slot] == physmap[map_slot + 1])
 			continue;
 
+		if (physmap[map_slot] <= pa &&
+		    physmap[map_slot + 1] > pa)
+			break;
+
 		phys_avail[avail_slot] = physmap[map_slot];
 		phys_avail[avail_slot + 1] = physmap[map_slot + 1];
 		physmem += (phys_avail[avail_slot + 1] -
 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
 		avail_slot += 2;
 	}
 
 	/* Add the memory before the kernel */
 	if (physmap[avail_slot] < pa) {
 		phys_avail[avail_slot] = physmap[map_slot];
 		phys_avail[avail_slot + 1] = pa;
 		physmem += (phys_avail[avail_slot + 1] -
 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
 		avail_slot += 2;
 	}
 	used_map_slot = map_slot;
 
 	/*
 	 * Read the page table to find out what is already mapped.
 	 * This assumes we have mapped a block of memory from KERNBASE
 	 * using a single L1 entry.
 	 */
 	l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
 
 	/* Sanity check the index, KERNBASE should be the first VA */
 	KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
 
 	/* Find how many pages we have mapped */
 	for (; l2_slot < Ln_ENTRIES; l2_slot++) {
-		if ((l2[l2_slot] & PTE_VALID) == 0)
+		if ((l2[l2_slot] & PTE_V) == 0)
 			break;
 
 		/* Check locore used L2 superpages */
-		KASSERT((l2[l2_slot] & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S),
+		KASSERT((l2[l2_slot] & PTE_RX) != 0,
 		    ("Invalid bootstrap L2 table"));
 
 		va += L2_SIZE;
 		pa += L2_SIZE;
 	}
 
 	va = roundup2(va, L2_SIZE);
 
 	freemempos = KERNBASE + kernlen;
 	freemempos = roundup2(freemempos, PAGE_SIZE);
 
 	/* Create the l3 tables for the early devmap */
 	freemempos = pmap_bootstrap_l3(l1pt,
 	    VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
 
 	cpu_tlb_flushID();
 
 #define alloc_pages(var, np)						\
 	(var) = freemempos;						\
 	freemempos += (np * PAGE_SIZE);					\
 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	/* Allocate dynamic per-cpu area. */
 	alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
 	dpcpu_init((void *)dpcpu, 0);
 
 	/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
 	alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
 	msgbufp = (void *)msgbufpv;
 
 	virtual_avail = roundup2(freemempos, L2_SIZE);
 	virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
 	kernel_vm_end = virtual_avail;
 	
 	pa = pmap_early_vtophys(l1pt, freemempos);
 
 	/* Finish initialising physmap */
 	map_slot = used_map_slot;
 	for (; avail_slot < (PHYS_AVAIL_SIZE - 2) &&
 	    map_slot < (physmap_idx * 2); map_slot += 2) {
-		if (physmap[map_slot] == physmap[map_slot + 1])
+		if (physmap[map_slot] == physmap[map_slot + 1]) {
 			continue;
+		}
 
 		/* Have we used the current range? */
-		if (physmap[map_slot + 1] <= pa)
+		if (physmap[map_slot + 1] <= pa) {
 			continue;
+		}
 
 		/* Do we need to split the entry? */
 		if (physmap[map_slot] < pa) {
 			phys_avail[avail_slot] = pa;
 			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
 		} else {
 			phys_avail[avail_slot] = physmap[map_slot];
 			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
 		}
 		physmem += (phys_avail[avail_slot + 1] -
 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
 
 		avail_slot += 2;
 	}
 	phys_avail[avail_slot] = 0;
 	phys_avail[avail_slot + 1] = 0;
 
 	/*
 	 * Maxmem isn't the "maximum memory", it's one larger than the
 	 * highest page of the physical address space.  It should be
 	 * called something like "Maxphyspage".
 	 */
 	Maxmem = atop(phys_avail[avail_slot - 1]);
 
 	cpu_tlb_flushID();
 }
 
 /*
  *	Initialize a vm_page's machine-dependent fields.
  */
 void
 pmap_page_init(vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.pv_list);
 	m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
 }
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  */
 void
 pmap_init(void)
 {
 	int i;
 
 	/*
 	 * Initialize the pv chunk list mutex.
 	 */
 	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
 
 	/*
 	 * Initialize the pool of pv list locks.
 	 */
 	for (i = 0; i < NPV_LIST_LOCKS; i++)
 		rw_init(&pv_list_locks[i], "pmap pv list");
 }
 
 /*
  * Normal, non-SMP, invalidation functions.
  * We inline these within pmap.c for speed.
  */
 PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
 	/* TODO */
 
 	sched_pin();
 	__asm __volatile("sfence.vm");
 	sched_unpin();
 }
 
 PMAP_INLINE void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 
 	/* TODO */
 
 	sched_pin();
 	__asm __volatile("sfence.vm");
 	sched_unpin();
 }
 
 PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
 	/* TODO */
 
 	sched_pin();
 	__asm __volatile("sfence.vm");
 	sched_unpin();
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_paddr_t 
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *l2p, l2;
 	pt_entry_t *l3p, l3;
 	vm_paddr_t pa;
 
 	pa = 0;
 	PMAP_LOCK(pmap);
 	/*
 	 * Start with the l2 tabel. We are unable to allocate
 	 * pages in the l1 table.
 	 */
 	l2p = pmap_l2(pmap, va);
 	if (l2p != NULL) {
 		l2 = pmap_load(l2p);
-		if ((l2 & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S)) {
+		if ((l2 & PTE_RX) == 0) {
 			l3p = pmap_l2_to_l3(l2p, va);
 			if (l3p != NULL) {
 				l3 = pmap_load(l3p);
 				pa = PTE_TO_PHYS(l3);
 				pa |= (va & L3_OFFSET);
 			}
 		} else {
 			/* L2 is superpages */
 			pa = (l2 >> PTE_PPN1_S) << L2_SHIFT;
 			pa |= (va & L2_OFFSET);
 		}
 	}
 	PMAP_UNLOCK(pmap);
 	return (pa);
 }
 
 /*
  *	Routine:	pmap_extract_and_hold
  *	Function:
  *		Atomically extract and hold the physical page
  *		with the given pmap and virtual address pair
  *		if that mapping permits the given protection.
  */
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	pt_entry_t *l3p, l3;
 	vm_paddr_t phys;
 	vm_paddr_t pa;
 	vm_page_t m;
 
 	pa = 0;
 	m = NULL;
 	PMAP_LOCK(pmap);
 retry:
 	l3p = pmap_l3(pmap, va);
 	if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) {
 		if ((pmap_is_write(l3)) || ((prot & VM_PROT_WRITE) == 0)) {
 			phys = PTE_TO_PHYS(l3);
 			if (vm_page_pa_tryrelock(pmap, phys, &pa))
 				goto retry;
 			m = PHYS_TO_VM_PAGE(phys);
 			vm_page_hold(m);
 		}
 	}
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	pd_entry_t *l2;
 	pt_entry_t *l3;
 	vm_paddr_t pa;
 
 	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
 		pa = DMAP_TO_PHYS(va);
 	} else {
 		l2 = pmap_l2(kernel_pmap, va);
 		if (l2 == NULL)
 			panic("pmap_kextract: No l2");
-		if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) {
+		if ((pmap_load(l2) & PTE_RX) != 0) {
 			/* superpages */
 			pa = (pmap_load(l2) >> PTE_PPN1_S) << L2_SHIFT;
 			pa |= (va & L2_OFFSET);
 			return (pa);
 		}
 
 		l3 = pmap_l2_to_l3(l2, va);
 		if (l3 == NULL)
 			panic("pmap_kextract: No l3...");
 		pa = PTE_TO_PHYS(pmap_load(l3));
 		pa |= (va & PAGE_MASK);
 	}
 	return (pa);
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 void
 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
 {
 	pt_entry_t entry;
 	pt_entry_t *l3;
 	vm_offset_t va;
 	pn_t pn;
 
 	KASSERT((pa & L3_OFFSET) == 0,
 	   ("pmap_kenter_device: Invalid physical address"));
 	KASSERT((sva & L3_OFFSET) == 0,
 	   ("pmap_kenter_device: Invalid virtual address"));
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("pmap_kenter_device: Mapping is not page-sized"));
 
 	va = sva;
 	while (size != 0) {
 		l3 = pmap_l3(kernel_pmap, va);
 		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
 
 		pn = (pa / PAGE_SIZE);
-		entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S));
+		entry = (PTE_V | PTE_RWX);
 		entry |= (pn << PTE_PPN0_S);
 		pmap_load_store(l3, entry);
 
 		PTE_SYNC(l3);
 
 		va += PAGE_SIZE;
 		pa += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  * Remove a page from the kernel pagetables.
  * Note: not SMP coherent.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt_entry_t *l3;
 
 	l3 = pmap_l3(kernel_pmap, va);
 	KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
 
 	if (pmap_l3_valid_cacheable(pmap_load(l3)))
 		cpu_dcache_wb_range(va, L3_SIZE);
 	pmap_load_clear(l3);
 	PTE_SYNC(l3);
 	pmap_invalidate_page(kernel_pmap, va);
 }
 
 void
 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
 {
 	pt_entry_t *l3;
 	vm_offset_t va;
 
 	KASSERT((sva & L3_OFFSET) == 0,
 	   ("pmap_kremove_device: Invalid virtual address"));
 	KASSERT((size & PAGE_MASK) == 0,
 	    ("pmap_kremove_device: Mapping is not page-sized"));
 
 	va = sva;
 	while (size != 0) {
 		l3 = pmap_l3(kernel_pmap, va);
 		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
 		pmap_load_clear(l3);
 		PTE_SYNC(l3);
 
 		va += PAGE_SIZE;
 		size -= PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 
 	return PHYS_TO_DMAP(start);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
 	pt_entry_t *l3, pa;
 	vm_offset_t va;
 	vm_page_t m;
 	pt_entry_t entry;
 	pn_t pn;
 	int i;
 
 	va = sva;
 	for (i = 0; i < count; i++) {
 		m = ma[i];
 		pa = VM_PAGE_TO_PHYS(m);
 		pn = (pa / PAGE_SIZE);
 		l3 = pmap_l3(kernel_pmap, va);
 
-		entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S));
+		entry = (PTE_V | PTE_RWX);
 		entry |= (pn << PTE_PPN0_S);
 		pmap_load_store(l3, entry);
 
 		PTE_SYNC(l3);
 		va += L3_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	pt_entry_t *l3;
 	vm_offset_t va;
 
 	KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
 
 	va = sva;
 	while (count-- > 0) {
 		l3 = pmap_l3(kernel_pmap, va);
 		KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
 
 		if (pmap_l3_valid_cacheable(pmap_load(l3)))
 			cpu_dcache_wb_range(va, L3_SIZE);
 		pmap_load_clear(l3);
 		PTE_SYNC(l3);
 
 		va += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 static __inline void
 pmap_free_zero_pages(struct spglist *free)
 {
 	vm_page_t m;
 
 	while ((m = SLIST_FIRST(free)) != NULL) {
 		SLIST_REMOVE_HEAD(free, plinks.s.ss);
 		/* Preserve the page's PG_ZERO setting. */
 		vm_page_free_toq(m);
 	}
 }
 
 /*
  * Schedule the specified unused page table page to be freed.  Specifically,
  * add the page to the specified list of pages that will be released to the
  * physical memory manager after the TLB has been updated.
  */
 static __inline void
 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
     boolean_t set_PG_ZERO)
 {
 
 	if (set_PG_ZERO)
 		m->flags |= PG_ZERO;
 	else
 		m->flags &= ~PG_ZERO;
 	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
 }
 	
 /*
  * Decrements a page table page's wire count, which is used to record the
  * number of valid page table entries within the page.  If the wire count
  * drops to zero, then the page table page is unmapped.  Returns TRUE if the
  * page table page was unmapped and FALSE otherwise.
  */
 static inline boolean_t
 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 
 	--m->wire_count;
 	if (m->wire_count == 0) {
 		_pmap_unwire_l3(pmap, va, m, free);
 		return (TRUE);
 	} else {
 		return (FALSE);
 	}
 }
 
 static void
 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 	vm_paddr_t phys;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/*
 	 * unmap the page table page
 	 */
 	if (m->pindex >= NUPDE) {
 		/* PD page */
 		pd_entry_t *l1;
 		l1 = pmap_l1(pmap, va);
 		pmap_load_clear(l1);
 		pmap_distribute_l1(pmap, pmap_l1_index(va), 0);
 		PTE_SYNC(l1);
 	} else {
 		/* PTE page */
 		pd_entry_t *l2;
 		l2 = pmap_l2(pmap, va);
 		pmap_load_clear(l2);
 		PTE_SYNC(l2);
 	}
 	pmap_resident_count_dec(pmap, 1);
 	if (m->pindex < NUPDE) {
 		pd_entry_t *l1;
 		/* We just released a PT, unhold the matching PD */
 		vm_page_t pdpg;
 
 		l1 = pmap_l1(pmap, va);
 		phys = PTE_TO_PHYS(pmap_load(l1));
 		pdpg = PHYS_TO_VM_PAGE(phys);
 		pmap_unwire_l3(pmap, va, pdpg, free);
 	}
 	pmap_invalidate_page(pmap, va);
 
 	/*
 	 * This is a release store so that the ordinary store unmapping
 	 * the page table page is globally performed before TLB shoot-
 	 * down is begun.
 	 */
 	atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
 
 	/* 
 	 * Put page on a list so that it is released after
 	 * *ALL* TLB shootdown is done
 	 */
 	pmap_add_delayed_free_list(m, free, TRUE);
 }
 
 /*
  * After removing an l3 entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
     struct spglist *free)
 {
 	vm_paddr_t phys;
 	vm_page_t mpte;
 
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (0);
 	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
 
 	phys = PTE_TO_PHYS(ptepde);
 
 	mpte = PHYS_TO_VM_PAGE(phys);
 	return (pmap_unwire_l3(pmap, va, mpte, free));
 }
 
 void
 pmap_pinit0(pmap_t pmap)
 {
 
 	PMAP_LOCK_INIT(pmap);
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 	pmap->pm_l1 = kernel_pmap->pm_l1;
 }
 
 int
 pmap_pinit(pmap_t pmap)
 {
 	vm_paddr_t l1phys;
 	vm_page_t l1pt;
 
 	/*
 	 * allocate the l1 page
 	 */
 	while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
 		VM_WAIT;
 
 	l1phys = VM_PAGE_TO_PHYS(l1pt);
 	pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys);
 
 	if ((l1pt->flags & PG_ZERO) == 0)
 		pagezero(pmap->pm_l1);
 
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 
 	/* Install kernel pagetables */
 	memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE);
 
 	/* Add to the list of all user pmaps */
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 
 	return (1);
 }
 
 /*
  * This routine is called if the desired page table page does not exist.
  *
  * If page table page allocation fails, this routine may sleep before
  * returning NULL.  It sleeps only if a lock pointer was given.
  *
  * Note: If a page allocation fails at page table level two or three,
  * one or two pages may be held during the wait, only to be released
  * afterwards.  This conservative approach is easily argued to avoid
  * race conditions.
  */
 static vm_page_t
 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
 {
 	vm_page_t m, /*pdppg, */pdpg;
 	pt_entry_t entry;
 	vm_paddr_t phys;
 	pn_t pn;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * Allocate a page table page.
 	 */
 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 		if (lockp != NULL) {
 			RELEASE_PV_LIST_LOCK(lockp);
 			PMAP_UNLOCK(pmap);
 			rw_runlock(&pvh_global_lock);
 			VM_WAIT;
 			rw_rlock(&pvh_global_lock);
 			PMAP_LOCK(pmap);
 		}
 
 		/*
 		 * Indicate the need to retry.  While waiting, the page table
 		 * page may have been allocated.
 		 */
 		return (NULL);
 	}
 
 	if ((m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	if (ptepindex >= NUPDE) {
 		pd_entry_t *l1;
 		vm_pindex_t l1index;
 
 		l1index = ptepindex - NUPDE;
 		l1 = &pmap->pm_l1[l1index];
 
 		pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE);
-		entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
+		entry = (PTE_V);
 		entry |= (pn << PTE_PPN0_S);
 		pmap_load_store(l1, entry);
 		pmap_distribute_l1(pmap, l1index, entry);
 
 		PTE_SYNC(l1);
 
 	} else {
 		vm_pindex_t l1index;
 		pd_entry_t *l1, *l2;
 
 		l1index = ptepindex >> (L1_SHIFT - L2_SHIFT);
 		l1 = &pmap->pm_l1[l1index];
 		if (pmap_load(l1) == 0) {
 			/* recurse for allocating page dir */
 			if (_pmap_alloc_l3(pmap, NUPDE + l1index,
 			    lockp) == NULL) {
 				--m->wire_count;
 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
 		} else {
 			phys = PTE_TO_PHYS(pmap_load(l1));
 			pdpg = PHYS_TO_VM_PAGE(phys);
 			pdpg->wire_count++;
 		}
 
 		phys = PTE_TO_PHYS(pmap_load(l1));
 		l2 = (pd_entry_t *)PHYS_TO_DMAP(phys);
 		l2 = &l2[ptepindex & Ln_ADDR_MASK];
 
 		pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE);
-		entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
+		entry = (PTE_V);
 		entry |= (pn << PTE_PPN0_S);
 		pmap_load_store(l2, entry);
 
 		PTE_SYNC(l2);
 	}
 
 	pmap_resident_count_inc(pmap, 1);
 
 	return (m);
 }
 
 static vm_page_t
 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
 	vm_pindex_t ptepindex;
 	pd_entry_t *l2;
 	vm_paddr_t phys;
 	vm_page_t m;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = pmap_l2_pindex(va);
 retry:
 	/*
 	 * Get the page directory entry
 	 */
 	l2 = pmap_l2(pmap, va);
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (l2 != NULL && pmap_load(l2) != 0) {
 		phys = PTE_TO_PHYS(pmap_load(l2));
 		m = PHYS_TO_VM_PAGE(phys);
 		m->wire_count++;
 	} else {
 		/*
 		 * Here if the pte page isn't mapped, or if it has been
 		 * deallocated.
 		 */
 		m = _pmap_alloc_l3(pmap, ptepindex, lockp);
 		if (m == NULL && lockp != NULL)
 			goto retry;
 	}
 	return (m);
 }
 
 
 /***************************************************
  * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 	vm_page_t m;
 
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
 
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1));
 	m->wire_count--;
 	atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 	vm_page_free_zero(m);
 
 	/* Remove pmap from the allpmaps list */
 	LIST_REMOVE(pmap, pm_list);
 
 	/* Remove kernel pagetables */
 	bzero(pmap->pm_l1, PAGE_SIZE);
 }
 
 #if 0
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
 
 	return sysctl_handle_long(oidp, &ksize, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "LU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
 	return sysctl_handle_long(oidp, &kfree, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "LU", "Amount of KVM free");
 #endif /* 0 */
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	vm_paddr_t paddr;
 	vm_page_t nkpg;
 	pd_entry_t *l1, *l2;
 	pt_entry_t entry;
 	pn_t pn;
 
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 
 	addr = roundup2(addr, L2_SIZE);
 	if (addr - 1 >= kernel_map->max_offset)
 		addr = kernel_map->max_offset;
 	while (kernel_vm_end < addr) {
 		l1 = pmap_l1(kernel_pmap, kernel_vm_end);
 		if (pmap_load(l1) == 0) {
 			/* We need a new PDP entry */
 			nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
 			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 			if (nkpg == NULL)
 				panic("pmap_growkernel: no memory to grow kernel");
 			if ((nkpg->flags & PG_ZERO) == 0)
 				pmap_zero_page(nkpg);
 			paddr = VM_PAGE_TO_PHYS(nkpg);
 
 			pn = (paddr / PAGE_SIZE);
-			entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
+			entry = (PTE_V);
 			entry |= (pn << PTE_PPN0_S);
 			pmap_load_store(l1, entry);
 			pmap_distribute_l1(kernel_pmap,
 			    pmap_l1_index(kernel_vm_end), entry);
 
 			PTE_SYNC(l1);
 			continue; /* try again */
 		}
 		l2 = pmap_l1_to_l2(l1, kernel_vm_end);
-		if ((pmap_load(l2) & PTE_REF) != 0) {
+		if ((pmap_load(l2) & PTE_A) != 0) {
 			kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 				kernel_vm_end = kernel_map->max_offset;
 				break;
 			}
 			continue;
 		}
 
 		nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
 		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		if (nkpg == NULL)
 			panic("pmap_growkernel: no memory to grow kernel");
-		if ((nkpg->flags & PG_ZERO) == 0)
+		if ((nkpg->flags & PG_ZERO) == 0) {
 			pmap_zero_page(nkpg);
+		}
 		paddr = VM_PAGE_TO_PHYS(nkpg);
 
 		pn = (paddr / PAGE_SIZE);
-		entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
+		entry = (PTE_V);
 		entry |= (pn << PTE_PPN0_S);
 		pmap_load_store(l2, entry);
 
 		PTE_SYNC(l2);
 		pmap_invalidate_page(kernel_pmap, kernel_vm_end);
 
 		kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
 		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 			kernel_vm_end = kernel_map->max_offset;
 			break;                       
 		}
 	}
 }
 
 
 /***************************************************
  * page management routines.
  ***************************************************/
 
 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 CTASSERT(_NPCM == 3);
 CTASSERT(_NPCPV == 168);
 
 static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
 	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 
 #define	PC_FREE0	0xfffffffffffffffful
 #define	PC_FREE1	0xfffffffffffffffful
 #define	PC_FREE2	0x000000fffffffffful
 
 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
 
 #if 0
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 	"Current number of pv entry chunks");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 	"Current number of pv entry chunks allocated");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 	"Current number of pv entry chunks frees");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 	"Number of times tried to get a chunk page but failed.");
 
 static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
 static int pv_entry_spare;
 
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 	"Current number of pv entry frees");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 	"Current number of pv entry allocs");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 	"Current number of pv entries");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 	"Current number of spare pv entries");
 #endif
 #endif /* 0 */
 
 /*
  * We are in a serious low memory condition.  Resort to
  * drastic measures to free some pages so we can allocate
  * another pv entry chunk.
  *
  * Returns NULL if PV entries were reclaimed from the specified pmap.
  *
  * We do not, however, unmap 2mpages because subsequent accesses will
  * allocate per-page pv entries until repromotion occurs, thereby
  * exacerbating the shortage of free pv entries.
  */
 static vm_page_t
 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
 {
 
 	panic("RISCVTODO: reclaim_pv_chunk");
 }
 
 /*
  * free the pv_entry back to the free list
  */
 static void
 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 {
 	struct pv_chunk *pc;
 	int idx, field, bit;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(atomic_add_long(&pv_entry_frees, 1));
 	PV_STAT(atomic_add_int(&pv_entry_spare, 1));
 	PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
 	pc = pv_to_chunk(pv);
 	idx = pv - &pc->pc_pventry[0];
 	field = idx / 64;
 	bit = idx % 64;
 	pc->pc_map[field] |= 1ul << bit;
 	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
 	    pc->pc_map[2] != PC_FREE2) {
 		/* 98% of the time, pc is already at the head of the list. */
 		if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 		}
 		return;
 	}
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	free_pv_chunk(pc);
 }
 
 static void
 free_pv_chunk(struct pv_chunk *pc)
 {
 	vm_page_t m;
 
 	mtx_lock(&pv_chunks_mutex);
  	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
 	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
 	PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
 	/* entire chunk is free, return it */
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 #if 0 /* TODO: For minidump */
 	dump_drop_page(m->phys_addr);
 #endif
 	vm_page_unwire(m, PQ_INACTIVE);
 	vm_page_free(m);
 }
 
 /*
  * Returns a new PV entry, allocating a new PV chunk from the system when
  * needed.  If this PV chunk allocation fails and a PV list lock pointer was
  * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
  * returned.
  *
  * The given PV list lock may be released.
  */
 static pv_entry_t
 get_pv_entry(pmap_t pmap, struct rwlock **lockp)
 {
 	int bit, field;
 	pv_entry_t pv;
 	struct pv_chunk *pc;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
 retry:
 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 	if (pc != NULL) {
 		for (field = 0; field < _NPCM; field++) {
 			if (pc->pc_map[field]) {
 				bit = ffsl(pc->pc_map[field]) - 1;
 				break;
 			}
 		}
 		if (field < _NPCM) {
 			pv = &pc->pc_pventry[field * 64 + bit];
 			pc->pc_map[field] &= ~(1ul << bit);
 			/* If this was the last item, move it to tail */
 			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
 			    pc->pc_map[2] == 0) {
 				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
 				    pc_list);
 			}
 			PV_STAT(atomic_add_long(&pv_entry_count, 1));
 			PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
 			return (pv);
 		}
 	}
 	/* No free items, allocate another chunk */
 	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED);
 	if (m == NULL) {
 		if (lockp == NULL) {
 			PV_STAT(pc_chunk_tryfail++);
 			return (NULL);
 		}
 		m = reclaim_pv_chunk(pmap, lockp);
 		if (m == NULL)
 			goto retry;
 	}
 	PV_STAT(atomic_add_int(&pc_chunk_count, 1));
 	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
 #if 0 /* TODO: This is for minidump */
 	dump_add_page(m->phys_addr);
 #endif
 	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 	pc->pc_pmap = pmap;
 	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
 	pc->pc_map[1] = PC_FREE1;
 	pc->pc_map[2] = PC_FREE2;
 	mtx_lock(&pv_chunks_mutex);
 	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	pv = &pc->pc_pventry[0];
 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	PV_STAT(atomic_add_long(&pv_entry_count, 1));
 	PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
 	return (pv);
 }
 
 /*
  * First find and then remove the pv entry for the specified pmap and virtual
  * address from the specified pv list.  Returns the pv entry if found and NULL
  * otherwise.  This operation can be performed on pv lists for either 4KB or
  * 2MB page mappings.
  */
 static __inline pv_entry_t
 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			pvh->pv_gen++;
 			break;
 		}
 	}
 	return (pv);
 }
 
 /*
  * First find and then destroy the pv entry for the specified pmap and virtual
  * address.  This operation can be performed on pv lists for either 4KB or 2MB
  * page mappings.
  */
 static void
 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = pmap_pvh_remove(pvh, pmap, va);
 
 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 	free_pv_entry(pmap, pv);
 }
 
 /*
  * Conditionally create the PV entry for a 4KB page mapping if the required
  * memory can be allocated without resorting to reclamation.
  */
 static boolean_t
 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct rwlock **lockp)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/* Pass NULL instead of the lock pointer to disable reclamation. */
 	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
 		pv->pv_va = va;
 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * pmap_remove_l3: do the things to unmap a page in a process
  */
 static int
 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 
     pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
 {
 	pt_entry_t old_l3;
 	vm_paddr_t phys;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
 		cpu_dcache_wb_range(va, L3_SIZE);
 	old_l3 = pmap_load_clear(l3);
 	PTE_SYNC(l3);
 	pmap_invalidate_page(pmap, va);
 	if (old_l3 & PTE_SW_WIRED)
 		pmap->pm_stats.wired_count -= 1;
 	pmap_resident_count_dec(pmap, 1);
 	if (old_l3 & PTE_SW_MANAGED) {
 		phys = PTE_TO_PHYS(old_l3);
 		m = PHYS_TO_VM_PAGE(phys);
 		if (pmap_page_dirty(old_l3))
 			vm_page_dirty(m);
-		if (old_l3 & PTE_REF)
+		if (old_l3 & PTE_A)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 		pmap_pvh_free(&m->md, pmap, va);
 	}
 
 	return (pmap_unuse_l3(pmap, va, l2e, free));
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	struct rwlock *lock;
 	vm_offset_t va, va_next;
 	pd_entry_t *l1, *l2;
 	pt_entry_t l3_pte, *l3;
 	struct spglist free;
 	int anyvalid;
 
 	/*
 	 * Perform an unsynchronized read.  This is, however, safe.
 	 */
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	anyvalid = 0;
 	SLIST_INIT(&free);
 
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 
 	lock = NULL;
 	for (; sva < eva; sva = va_next) {
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		l1 = pmap_l1(pmap, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (l2 == NULL)
 			continue;
 
 		l3_pte = pmap_load(l2);
 
 		/*
 		 * Weed out invalid mappings.
 		 */
 		if (l3_pte == 0)
 			continue;
-		if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S))
+		if ((pmap_load(l2) & PTE_RX) != 0)
 			continue;
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (va_next > eva)
 			va_next = eva;
 
 		va = va_next;
 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 		    sva += L3_SIZE) {
 			if (l3 == NULL)
 				panic("l3 == NULL");
 			if (pmap_load(l3) == 0) {
 				if (va != va_next) {
 					pmap_invalidate_range(pmap, va, sva);
 					va = va_next;
 				}
 				continue;
 			}
 			if (va == va_next)
 				va = sva;
 			if (pmap_remove_l3(pmap, l3, sva, l3_pte, &free,
 			    &lock)) {
 				sva += L3_SIZE;
 				break;
 			}
 		}
 		if (va != va_next)
 			pmap_invalidate_range(pmap, va, sva);
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	if (anyvalid)
 		pmap_invalidate_all(pmap);
 	rw_runlock(&pvh_global_lock);	
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(&free);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	pv_entry_t pv;
 	pmap_t pmap;
 	pt_entry_t *l3, tl3;
 	pd_entry_t *l2, tl2;
 	struct spglist free;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_all: page %p is not managed", m));
 	SLIST_INIT(&free);
 	rw_wlock(&pvh_global_lock);
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pmap_resident_count_dec(pmap, 1);
 		l2 = pmap_l2(pmap, pv->pv_va);
 		KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found"));
 		tl2 = pmap_load(l2);
 
-		KASSERT((tl2 & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S),
+		KASSERT((tl2 & PTE_RX) == 0,
 		    ("pmap_remove_all: found a table when expecting "
 		    "a block in %p's pv list", m));
 
 		l3 = pmap_l2_to_l3(l2, pv->pv_va);
 		if (pmap_is_current(pmap) &&
 		    pmap_l3_valid_cacheable(pmap_load(l3)))
 			cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
 		tl3 = pmap_load_clear(l3);
 		PTE_SYNC(l3);
 		pmap_invalidate_page(pmap, pv->pv_va);
 		if (tl3 & PTE_SW_WIRED)
 			pmap->pm_stats.wired_count--;
-		if ((tl3 & PTE_REF) != 0)
+		if ((tl3 & PTE_A) != 0)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if (pmap_page_dirty(tl3))
 			vm_page_dirty(m);
 		pmap_unuse_l3(pmap, pv->pv_va, pmap_load(l2), &free);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		free_pv_entry(pmap, pv);
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(&pvh_global_lock);
 	pmap_free_zero_pages(&free);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	vm_offset_t va, va_next;
 	pd_entry_t *l1, *l2;
 	pt_entry_t *l3p, l3;
 	pt_entry_t entry;
 
 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE)
 		return;
 
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 
 		l1 = pmap_l1(pmap, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (l2 == NULL)
 			continue;
-		if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S))
+		if ((pmap_load(l2) & PTE_RX) != 0)
 			continue;
 
 		if (va_next > eva)
 			va_next = eva;
 
 		va = va_next;
 		for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
 		    sva += L3_SIZE) {
 			l3 = pmap_load(l3p);
 			if (pmap_l3_valid(l3)) {
 				entry = pmap_load(l3p);
-				entry &= ~(1 << PTE_TYPE_S);
+				entry &= ~(PTE_W);
 				pmap_load_store(l3p, entry);
 				PTE_SYNC(l3p);
 				/* XXX: Use pmap_invalidate_range */
 				pmap_invalidate_page(pmap, va);
 			}
 		}
 	}
 	PMAP_UNLOCK(pmap);
 
 	/* TODO: Only invalidate entries we are touching */
 	pmap_invalidate_all(pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 int
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     u_int flags, int8_t psind __unused)
 {
 	struct rwlock *lock;
 	pd_entry_t *l1, *l2;
 	pt_entry_t new_l3, orig_l3;
 	pt_entry_t *l3;
 	pv_entry_t pv;
 	vm_paddr_t opa, pa, l2_pa, l3_pa;
 	vm_page_t mpte, om, l2_m, l3_m;
 	boolean_t nosleep;
 	pt_entry_t entry;
 	pn_t l2_pn;
 	pn_t l3_pn;
 	pn_t pn;
 
 	va = trunc_page(va);
 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 	pa = VM_PAGE_TO_PHYS(m);
 	pn = (pa / PAGE_SIZE);
 
-	new_l3 = PTE_VALID;
+	new_l3 = PTE_V | PTE_R | PTE_X;
+	if (prot & VM_PROT_WRITE)
+		new_l3 |= PTE_W;
+	if ((va >> 63) == 0)
+		new_l3 |= PTE_U;
 
-	if ((prot & VM_PROT_WRITE) == 0) { /* Read-only */
-		if ((va >> 63) == 0) /* USER */
-			new_l3 |= (PTE_TYPE_SURX << PTE_TYPE_S);
-		else /* KERNEL */
-			new_l3 |= (PTE_TYPE_SRX << PTE_TYPE_S);
-	} else {
-		if ((va >> 63) == 0) /* USER */
-			new_l3 |= (PTE_TYPE_SURWX << PTE_TYPE_S);
-		else /* KERNEL */
-			new_l3 |= (PTE_TYPE_SRWX << PTE_TYPE_S);
-	}
-
 	new_l3 |= (pn << PTE_PPN0_S);
 	if ((flags & PMAP_ENTER_WIRED) != 0)
 		new_l3 |= PTE_SW_WIRED;
 
 	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
 
 	mpte = NULL;
 
 	lock = NULL;
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 
 	if (va < VM_MAXUSER_ADDRESS) {
 		nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
 		mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
 		if (mpte == NULL && nosleep) {
 			CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
 			if (lock != NULL)
 				rw_wunlock(lock);
 			rw_runlock(&pvh_global_lock);
 			PMAP_UNLOCK(pmap);
 			return (KERN_RESOURCE_SHORTAGE);
 		}
 		l3 = pmap_l3(pmap, va);
 	} else {
 		l3 = pmap_l3(pmap, va);
 		/* TODO: This is not optimal, but should mostly work */
 		if (l3 == NULL) {
 			l2 = pmap_l2(pmap, va);
 			if (l2 == NULL) {
 				l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 				    VM_ALLOC_ZERO);
 				if (l2_m == NULL)
 					panic("pmap_enter: l2 pte_m == NULL");
 				if ((l2_m->flags & PG_ZERO) == 0)
 					pmap_zero_page(l2_m);
 
 				l2_pa = VM_PAGE_TO_PHYS(l2_m);
 				l2_pn = (l2_pa / PAGE_SIZE);
 
 				l1 = pmap_l1(pmap, va);
-				entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
+				entry = (PTE_V);
 				entry |= (l2_pn << PTE_PPN0_S);
 				pmap_load_store(l1, entry);
 				pmap_distribute_l1(pmap, pmap_l1_index(va), entry);
 				PTE_SYNC(l1);
 
 				l2 = pmap_l1_to_l2(l1, va);
 			}
 
 			KASSERT(l2 != NULL,
 			    ("No l2 table after allocating one"));
 
 			l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 			    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 			if (l3_m == NULL)
 				panic("pmap_enter: l3 pte_m == NULL");
 			if ((l3_m->flags & PG_ZERO) == 0)
 				pmap_zero_page(l3_m);
 
 			l3_pa = VM_PAGE_TO_PHYS(l3_m);
 			l3_pn = (l3_pa / PAGE_SIZE);
-			entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S));
+			entry = (PTE_V);
 			entry |= (l3_pn << PTE_PPN0_S);
 			pmap_load_store(l2, entry);
 			PTE_SYNC(l2);
 			l3 = pmap_l2_to_l3(l2, va);
 		}
 		pmap_invalidate_page(pmap, va);
 	}
 
 	om = NULL;
 	orig_l3 = pmap_load(l3);
 	opa = PTE_TO_PHYS(orig_l3);
 
 	/*
 	 * Is the specified virtual address already mapped?
 	 */
 	if (pmap_l3_valid(orig_l3)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if ((flags & PMAP_ENTER_WIRED) != 0 &&
 		    (orig_l3 & PTE_SW_WIRED) == 0)
 			pmap->pm_stats.wired_count++;
 		else if ((flags & PMAP_ENTER_WIRED) == 0 &&
 		    (orig_l3 & PTE_SW_WIRED) != 0)
 			pmap->pm_stats.wired_count--;
 
 		/*
 		 * Remove the extra PT page reference.
 		 */
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			KASSERT(mpte->wire_count > 0,
 			    ("pmap_enter: missing reference to page table page,"
 			     " va: 0x%lx", va));
 		}
 
 		/*
 		 * Has the physical page changed?
 		 */
 		if (opa == pa) {
 			/*
 			 * No, might be a protection or wiring change.
 			 */
 			if ((orig_l3 & PTE_SW_MANAGED) != 0) {
 				new_l3 |= PTE_SW_MANAGED;
 				if (pmap_is_write(new_l3))
 					vm_page_aflag_set(m, PGA_WRITEABLE);
 			}
 			goto validate;
 		}
 
 		/* Flush the cache, there might be uncommitted data in it */
 		if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
 			cpu_dcache_wb_range(va, L3_SIZE);
 	} else {
 		/*
 		 * Increment the counters.
 		 */
 		if ((new_l3 & PTE_SW_WIRED) != 0)
 			pmap->pm_stats.wired_count++;
 		pmap_resident_count_inc(pmap, 1);
 	}
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		new_l3 |= PTE_SW_MANAGED;
 		pv = get_pv_entry(pmap, &lock);
 		pv->pv_va = va;
 		CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		if (pmap_is_write(new_l3))
 			vm_page_aflag_set(m, PGA_WRITEABLE);
 	}
 
 	/*
 	 * Update the L3 entry.
 	 */
 	if (orig_l3 != 0) {
 validate:
 		orig_l3 = pmap_load_store(l3, new_l3);
 		PTE_SYNC(l3);
 		opa = PTE_TO_PHYS(orig_l3);
 
 		if (opa != pa) {
 			if ((orig_l3 & PTE_SW_MANAGED) != 0) {
 				om = PHYS_TO_VM_PAGE(opa);
 				if (pmap_page_dirty(orig_l3))
 					vm_page_dirty(om);
-				if ((orig_l3 & PTE_REF) != 0)
+				if ((orig_l3 & PTE_A) != 0)
 					vm_page_aflag_set(om, PGA_REFERENCED);
 				CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
 				pmap_pvh_free(&om->md, pmap, va);
 			}
 		} else if (pmap_page_dirty(orig_l3)) {
 			if ((orig_l3 & PTE_SW_MANAGED) != 0)
 				vm_page_dirty(m);
 		}
 	} else {
 		pmap_load_store(l3, new_l3);
 		PTE_SYNC(l3);
 	}
 	pmap_invalidate_page(pmap, va);
 	if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
 	    cpu_icache_sync_range(va, PAGE_SIZE);
 
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	return (KERN_SUCCESS);
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	struct rwlock *lock;
 	vm_offset_t va;
 	vm_page_t m, mpte;
 	vm_pindex_t diff, psize;
 
 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
 
 	psize = atop(end - start);
 	mpte = NULL;
 	m = m_start;
 	lock = NULL;
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		va = start + ptoa(diff);
 		mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock);
 		m = TAILQ_NEXT(m, listq);
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * but is *MUCH* faster than pmap_enter...
  */
 
 void
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 	struct rwlock *lock;
 
 	lock = NULL;
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
 {
 	struct spglist free;
 	vm_paddr_t phys;
 	pd_entry_t *l2;
 	pt_entry_t *l3;
 	vm_paddr_t pa;
 	pt_entry_t entry;
 	pn_t pn;
 
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->oflags & VPO_UNMANAGED) != 0,
 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 	rw_assert(&pvh_global_lock, RA_LOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		vm_pindex_t l2pindex;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		l2pindex = pmap_l2_pindex(va);
 		if (mpte && (mpte->pindex == l2pindex)) {
 			mpte->wire_count++;
 		} else {
 			/*
 			 * Get the l2 entry
 			 */
 			l2 = pmap_l2(pmap, va);
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.  Otherwise, we
 			 * attempt to allocate a page table page.  If this
 			 * attempt fails, we don't retry.  Instead, we give up.
 			 */
 			if (l2 != NULL && pmap_load(l2) != 0) {
 				phys = PTE_TO_PHYS(pmap_load(l2));
 				mpte = PHYS_TO_VM_PAGE(phys);
 				mpte->wire_count++;
 			} else {
 				/*
 				 * Pass NULL instead of the PV list lock
 				 * pointer, because we don't intend to sleep.
 				 */
 				mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
 				if (mpte == NULL)
 					return (mpte);
 			}
 		}
 		l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
 		l3 = &l3[pmap_l3_index(va)];
 	} else {
 		mpte = NULL;
 		l3 = pmap_l3(kernel_pmap, va);
 	}
 	if (l3 == NULL)
 		panic("pmap_enter_quick_locked: No l3");
 	if (pmap_load(l3) != 0) {
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0 &&
 	    !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
 		if (mpte != NULL) {
 			SLIST_INIT(&free);
 			if (pmap_unwire_l3(pmap, va, mpte, &free)) {
 				pmap_invalidate_page(pmap, va);
 				pmap_free_zero_pages(&free);
 			}
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap_resident_count_inc(pmap, 1);
 
 	pa = VM_PAGE_TO_PHYS(m);
 	pn = (pa / PAGE_SIZE);
 
 	/* RISCVTODO: check permissions */
-	entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S));
+	entry = (PTE_V | PTE_RWX);
 	entry |= (pn << PTE_PPN0_S);
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0)
 		entry |= PTE_SW_MANAGED;
 	pmap_load_store(l3, entry);
 
 	PTE_SYNC(l3);
 	pmap_invalidate_page(pmap, va);
 	return (mpte);
 }
 
 /*
  * This code maps large physical mmap regions into the
  * processor address space.  Note that some shortcuts
  * are taken, but the code works.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("pmap_object_init_pt: non-device object"));
 }
 
 /*
  *	Clear the wired attribute from the mappings for the specified range of
  *	addresses in the given pmap.  Every valid mapping within that range
  *	must have the wired attribute set.  In contrast, invalid mappings
  *	cannot have the wired attribute set, so they are ignored.
  *
  *	The wired attribute of the page table entry is not a hardware feature,
  *	so there is no need to invalidate any TLB entries.
  */
 void
 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t va_next;
 	pd_entry_t *l1, *l2;
 	pt_entry_t *l3;
 	boolean_t pv_lists_locked;
 
 	pv_lists_locked = FALSE;
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 		l1 = pmap_l1(pmap, sva);
 		if (pmap_load(l1) == 0) {
 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
 		if (va_next < sva)
 			va_next = eva;
 
 		l2 = pmap_l1_to_l2(l1, sva);
 		if (pmap_load(l2) == 0)
 			continue;
 
 		if (va_next > eva)
 			va_next = eva;
 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
 		    sva += L3_SIZE) {
 			if (pmap_load(l3) == 0)
 				continue;
 			if ((pmap_load(l3) & PTE_SW_WIRED) == 0)
 				panic("pmap_unwire: l3 %#jx is missing "
 				    "PTE_SW_WIRED", (uintmax_t)*l3);
 
 			/*
 			 * PG_W must be cleared atomically.  Although the pmap
 			 * lock synchronizes access to PG_W, another processor
 			 * could be setting PG_M and/or PG_A concurrently.
 			 */
 			atomic_clear_long(l3, PTE_SW_WIRED);
 			pmap->pm_stats.wired_count--;
 		}
 	}
 	if (pv_lists_locked)
 		rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
     vm_offset_t src_addr)
 {
 
 }
 
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping
  *	the page into KVM and using bzero to clear its contents.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	pagezero((void *)va);
 }
 
 /*
  *	pmap_zero_page_area zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.
  *
  *	off and size may not cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	if (off == 0 && size == PAGE_SIZE)
 		pagezero((void *)va);
 	else
 		bzero((char *)va + off, size);
 }
 
 /*
  *	pmap_zero_page_idle zeros the specified hardware page by mapping 
  *	the page into KVM and using bzero to clear its contents.  This
  *	is intended to be called from the vm_pagezero process only and
  *	outside of Giant.
  */
 void
 pmap_zero_page_idle(vm_page_t m)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	pagezero((void *)va);
 }
 
 /*
  *	pmap_copy_page copies the specified (machine independent)
  *	page by mapping the page into virtual memory and using
  *	bcopy to copy the page, one machine dependent page at a
  *	time.
  */
 void
 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 {
 	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
 	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
 
 	pagecopy((void *)src, (void *)dst);
 }
 
 int unmapped_buf_allowed = 1;
 
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)
 {
 	void *a_cp, *b_cp;
 	vm_page_t m_a, m_b;
 	vm_paddr_t p_a, p_b;
 	vm_offset_t a_pg_offset, b_pg_offset;
 	int cnt;
 
 	while (xfersize > 0) {
 		a_pg_offset = a_offset & PAGE_MASK;
 		m_a = ma[a_offset >> PAGE_SHIFT];
 		p_a = m_a->phys_addr;
 		b_pg_offset = b_offset & PAGE_MASK;
 		m_b = mb[b_offset >> PAGE_SHIFT];
 		p_b = m_b->phys_addr;
 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 		if (__predict_false(!PHYS_IN_DMAP(p_a))) {
 			panic("!DMAP a %lx", p_a);
 		} else {
 			a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
 		}
 		if (__predict_false(!PHYS_IN_DMAP(p_b))) {
 			panic("!DMAP b %lx", p_b);
 		} else {
 			b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
 		}
 		bcopy(a_cp, b_cp, cnt);
 		a_offset += cnt;
 		b_offset += cnt;
 		xfersize -= cnt;
 	}
 }
 
 vm_offset_t
 pmap_quick_enter_page(vm_page_t m)
 {
 
 	return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
 }
 
 void
 pmap_quick_remove_page(vm_offset_t addr)
 {
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 {
 	struct rwlock *lock;
 	pv_entry_t pv;
 	int loops = 0;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_page_exists_quick: page %p is not managed", m));
 	rv = FALSE;
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		if (PV_PMAP(pv) == pmap) {
 			rv = TRUE;
 			break;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	rw_runlock(lock);
 	rw_runlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_page_wired_mappings:
  *
  *	Return the number of managed mappings to the given physical page
  *	that are wired.
  */
 int
 pmap_page_wired_mappings(vm_page_t m)
 {
 	struct rwlock *lock;
 	pmap_t pmap;
 	pt_entry_t *l3;
 	pv_entry_t pv;
 	int count, md_gen;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (0);
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 restart:
 	count = 0;
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_runlock(lock);
 			PMAP_LOCK(pmap);
 			rw_rlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		l3 = pmap_l3(pmap, pv->pv_va);
 		if (l3 != NULL && (pmap_load(l3) & PTE_SW_WIRED) != 0)
 			count++;
 		PMAP_UNLOCK(pmap);
 	}
 	rw_runlock(lock);
 	rw_runlock(&pvh_global_lock);
 	return (count);
 }
 
 /*
  * Destroy all managed, non-wired mappings in the given user-space
  * pmap.  This pmap cannot be active on any processor besides the
  * caller.
  *
  * This function cannot be applied to the kernel pmap.  Moreover, it
  * is not intended for general use.  It is only to be used during
  * process termination.  Consequently, it can be implemented in ways
  * that make it faster than pmap_remove().  First, it can more quickly
  * destroy mappings by iterating over the pmap's collection of PV
  * entries, rather than searching the page table.  Second, it doesn't
  * have to test and clear the page table entries atomically, because
  * no processor is currently accessing the user address space.  In
  * particular, a page table entry's dirty bit won't change state once
  * this function starts.
  */
 void
 pmap_remove_pages(pmap_t pmap)
 {
 	pd_entry_t ptepde, *l2;
 	pt_entry_t *l3, tl3;
 	struct spglist free;
 	vm_page_t m;
 	pv_entry_t pv;
 	struct pv_chunk *pc, *npc;
 	struct rwlock *lock;
 	int64_t bit;
 	uint64_t inuse, bitmask;
 	int allfree, field, freed, idx;
 	vm_paddr_t pa;
 
 	lock = NULL;
 
 	SLIST_INIT(&free);
 	rw_rlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 		allfree = 1;
 		freed = 0;
 		for (field = 0; field < _NPCM; field++) {
 			inuse = ~pc->pc_map[field] & pc_freemask[field];
 			while (inuse != 0) {
 				bit = ffsl(inuse) - 1;
 				bitmask = 1UL << bit;
 				idx = field * 64 + bit;
 				pv = &pc->pc_pventry[idx];
 				inuse &= ~bitmask;
 
 				l2 = pmap_l2(pmap, pv->pv_va);
 				ptepde = pmap_load(l2);
 				l3 = pmap_l2_to_l3(l2, pv->pv_va);
 				tl3 = pmap_load(l3);
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 				if (tl3 & PTE_SW_WIRED) {
 					allfree = 0;
 					continue;
 				}
 
 				pa = PTE_TO_PHYS(tl3);
 				m = PHYS_TO_VM_PAGE(pa);
 				KASSERT(m->phys_addr == pa,
 				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 				    m, (uintmax_t)m->phys_addr,
 				    (uintmax_t)tl3));
 
 				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 				    m < &vm_page_array[vm_page_array_size],
 				    ("pmap_remove_pages: bad l3 %#jx",
 				    (uintmax_t)tl3));
 
 				if (pmap_is_current(pmap) &&
 				    pmap_l3_valid_cacheable(pmap_load(l3)))
 					cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
 				pmap_load_clear(l3);
 				PTE_SYNC(l3);
 				pmap_invalidate_page(pmap, pv->pv_va);
 
 				/*
 				 * Update the vm_page_t clean/reference bits.
 				 */
 				if (pmap_page_dirty(tl3))
 					vm_page_dirty(m);
 
 				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
 
 				/* Mark free */
 				pc->pc_map[field] |= bitmask;
 
 				pmap_resident_count_dec(pmap, 1);
 				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 				m->md.pv_gen++;
 
 				pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free);
 				freed++;
 			}
 		}
 		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
 		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
 		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
 		if (allfree) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			free_pv_chunk(pc);
 		}
 	}
 	pmap_invalidate_all(pmap);
 	if (lock != NULL)
 		rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(&free);
 }
 
 /*
  * This is used to check if a page has been accessed or modified. As we
  * don't have a bit to see if it has been modified we have to assume it
  * has been if the page is read/write.
  */
 static boolean_t
 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
 {
 	struct rwlock *lock;
 	pv_entry_t pv;
 	pt_entry_t *l3, mask, value;
 	pmap_t pmap;
 	int md_gen;
 	boolean_t rv;
 
 	rv = FALSE;
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 restart:
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_runlock(lock);
 			PMAP_LOCK(pmap);
 			rw_rlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		l3 = pmap_l3(pmap, pv->pv_va);
 		mask = 0;
 		value = 0;
 		if (modified) {
-			mask |= PTE_DIRTY;
-			value |= PTE_DIRTY;
+			mask |= PTE_D;
+			value |= PTE_D;
 		}
 		if (accessed) {
-			mask |= PTE_REF;
-			value |= PTE_REF;
+			mask |= PTE_A;
+			value |= PTE_A;
 		}
 
 #if 0
 		if (modified) {
 			mask |= ATTR_AP_RW_BIT;
 			value |= ATTR_AP(ATTR_AP_RW);
 		}
 		if (accessed) {
 			mask |= ATTR_AF | ATTR_DESCR_MASK;
 			value |= ATTR_AF | L3_PAGE;
 		}
 #endif
 
 		rv = (pmap_load(l3) & mask) == value;
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			goto out;
 	}
 out:
 	rw_runlock(lock);
 	rw_runlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_modified: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	return (pmap_page_test_mappings(m, FALSE, TRUE));
 }
 
 /*
  *	pmap_is_prefaultable:
  *
  *	Return whether or not the specified virtual address is eligible
  *	for prefault.
  */
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
 	pt_entry_t *l3;
 	boolean_t rv;
 
 	rv = FALSE;
 	PMAP_LOCK(pmap);
 	l3 = pmap_l3(pmap, addr);
 	if (l3 != NULL && pmap_load(l3) != 0) {
 		rv = TRUE;
 	}
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	in any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_referenced: page %p is not managed", m));
 	return (pmap_page_test_mappings(m, TRUE, FALSE));
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 void
 pmap_remove_write(vm_page_t m)
 {
 	pmap_t pmap;
 	struct rwlock *lock;
 	pv_entry_t pv;
 	pt_entry_t *l3, oldl3;
 	pt_entry_t newl3;
 	int md_gen;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * set by another thread while the object is locked.  Thus,
 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_rlock(&pvh_global_lock);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 retry_pv_loop:
 	rw_wlock(lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				rw_wunlock(lock);
 				goto retry_pv_loop;
 			}
 		}
 		l3 = pmap_l3(pmap, pv->pv_va);
 retry:
 		oldl3 = pmap_load(l3);
 
 		if (pmap_is_write(oldl3)) {
-			newl3 = oldl3 & ~(1 << PTE_TYPE_S);
+			newl3 = oldl3 & ~(PTE_W);
 			if (!atomic_cmpset_long(l3, oldl3, newl3))
 				goto retry;
 			/* TODO: use pmap_page_dirty(oldl3) ? */
-			if ((oldl3 & PTE_REF) != 0)
+			if ((oldl3 & PTE_A) != 0)
 				vm_page_dirty(m);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	rw_wunlock(lock);
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_runlock(&pvh_global_lock);
 }
 
 static __inline boolean_t
 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
 {
 
 	return (FALSE);
 }
 
 #define	PMAP_TS_REFERENCED_MAX	5
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	XXX: The exact number of bits to check and clear is a matter that
  *	should be tested and standardized at some point in the future for
  *	optimal aging of shared pages.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	pv_entry_t pv, pvf;
 	pmap_t pmap;
 	struct rwlock *lock;
 	pd_entry_t *l2;
 	pt_entry_t *l3;
 	vm_paddr_t pa;
 	int cleared, md_gen, not_cleared;
 	struct spglist free;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_ts_referenced: page %p is not managed", m));
 	SLIST_INIT(&free);
 	cleared = 0;
 	pa = VM_PAGE_TO_PHYS(m);
 	lock = PHYS_TO_PV_LIST_LOCK(pa);
 	rw_rlock(&pvh_global_lock);
 	rw_wlock(lock);
 retry:
 	not_cleared = 0;
 	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
 		goto out;
 	pv = pvf;
 	do {
 		if (pvf == NULL)
 			pvf = pv;
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto retry;
 			}
 		}
 		l2 = pmap_l2(pmap, pv->pv_va);
 
-		KASSERT((pmap_load(l2) & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S),
+		KASSERT((pmap_load(l2) & PTE_RX) == 0,
 		    ("pmap_ts_referenced: found an invalid l2 table"));
 
 		l3 = pmap_l2_to_l3(l2, pv->pv_va);
-		if ((pmap_load(l3) & PTE_REF) != 0) {
+		if ((pmap_load(l3) & PTE_A) != 0) {
 			if (safe_to_clear_referenced(pmap, pmap_load(l3))) {
 				/*
 				 * TODO: We don't handle the access flag
 				 * at all. We need to be able to set it in
 				 * the exception handler.
 				 */
 				panic("RISCVTODO: safe_to_clear_referenced\n");
 			} else if ((pmap_load(l3) & PTE_SW_WIRED) == 0) {
 				/*
 				 * Wired pages cannot be paged out so
 				 * doing accessed bit emulation for
 				 * them is wasted effort. We do the
 				 * hard work for unwired pages only.
 				 */
 				pmap_remove_l3(pmap, l3, pv->pv_va,
 				    pmap_load(l2), &free, &lock);
 				pmap_invalidate_page(pmap, pv->pv_va);
 				cleared++;
 				if (pvf == pv)
 					pvf = NULL;
 				pv = NULL;
 				KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
 				    ("inconsistent pv lock %p %p for page %p",
 				    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
 			} else
 				not_cleared++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 			m->md.pv_gen++;
 		}
 	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
 	    not_cleared < PMAP_TS_REFERENCED_MAX);
 out:
 	rw_wunlock(lock);
 	rw_runlock(&pvh_global_lock);
 	pmap_free_zero_pages(&free);
 	return (cleared + not_cleared);
 }
 
 /*
  *	Apply the given advice to the specified range of addresses within the
  *	given pmap.  Depending on the advice, clear the referenced and/or
  *	modified flags in each mapping and set the mapped page's dirty field.
  */
 void
 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 {
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	KASSERT(!vm_page_xbusied(m),
 	    ("pmap_clear_modify: page %p is exclusive busied", m));
 
 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
 	 * If the object containing the page is locked and the page is not
 	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
 
 	/* RISCVTODO: We lack support for tracking if a page is modified */
 }
 
 void *
 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 {
 
         return ((void *)PHYS_TO_DMAP(pa));
 }
 
 void
 pmap_unmapbios(vm_paddr_t pa, vm_size_t size)
 {
 }
 
 /*
  * Sets the memory attribute for the specified page.
  */
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
 
 	m->md.pv_memattr = ma;
 
 	/*
 	 * RISCVTODO: Implement the below (from the amd64 pmap)
 	 * If "m" is a normal page, update its direct mapping.  This update
 	 * can be relied upon to perform any cache operations that are
 	 * required for data coherence.
 	 */
 	if ((m->flags & PG_FICTITIOUS) == 0 &&
 	    PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m)))
 		panic("RISCVTODO: pmap_page_set_memattr");
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 {
 
 	panic("RISCVTODO: pmap_mincore");
 }
 
 void
 pmap_activate(struct thread *td)
 {
 	pmap_t pmap;
 
 	critical_enter();
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1);
 
-	__asm __volatile("csrw sptbr, %0" :: "r"(td->td_pcb->pcb_l1addr));
+	__asm __volatile("csrw sptbr, %0" :: "r"(td->td_pcb->pcb_l1addr >> PAGE_SHIFT));
 
 	pmap_invalidate_all(pmap);
 	critical_exit();
 }
 
 void
 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
 {
 
 	panic("RISCVTODO: pmap_sync_icache");
 }
 
 /*
  *	Increase the starting virtual address of the given mapping if a
  *	different alignment might result in more superpage mappings.
  */
 void
 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t size)
 {
 }
 
 /**
  * Get the kernel virtual address of a set of physical pages. If there are
  * physical addresses not covered by the DMAP perform a transient mapping
  * that will be removed when calling pmap_unmap_io_transient.
  *
  * \param page        The pages the caller wishes to obtain the virtual
  *                    address on the kernel memory map.
  * \param vaddr       On return contains the kernel virtual memory address
  *                    of the pages passed in the page parameter.
  * \param count       Number of pages passed in.
  * \param can_fault   TRUE if the thread using the mapped pages can take
  *                    page faults, FALSE otherwise.
  *
  * \returns TRUE if the caller must call pmap_unmap_io_transient when
  *          finished or FALSE otherwise.
  *
  */
 boolean_t
 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
     boolean_t can_fault)
 {
 	vm_paddr_t paddr;
 	boolean_t needs_mapping;
 	int error, i;
 
 	/*
 	 * Allocate any KVA space that we need, this is done in a separate
 	 * loop to prevent calling vmem_alloc while pinned.
 	 */
 	needs_mapping = FALSE;
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) {
 			error = vmem_alloc(kernel_arena, PAGE_SIZE,
 			    M_BESTFIT | M_WAITOK, &vaddr[i]);
 			KASSERT(error == 0, ("vmem_alloc failed: %d", error));
 			needs_mapping = TRUE;
 		} else {
 			vaddr[i] = PHYS_TO_DMAP(paddr);
 		}
 	}
 
 	/* Exit early if everything is covered by the DMAP */
 	if (!needs_mapping)
 		return (FALSE);
 
 	if (!can_fault)
 		sched_pin();
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (paddr >= DMAP_MAX_PHYSADDR) {
 			panic(
 			   "pmap_map_io_transient: TODO: Map out of DMAP data");
 		}
 	}
 
 	return (needs_mapping);
 }
 
 void
 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
     boolean_t can_fault)
 {
 	vm_paddr_t paddr;
 	int i;
 
 	if (!can_fault)
 		sched_unpin();
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (paddr >= DMAP_MAX_PHYSADDR) {
 			panic("RISCVTODO: pmap_unmap_io_transient: Unmap data");
 		}
 	}
 }
Index: user/alc/PQ_LAUNDRY/sys/riscv/riscv/swtch.S
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/riscv/swtch.S	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/riscv/swtch.S	(revision 303667)
@@ -1,274 +1,277 @@
 /*-
  * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "assym.s"
 #include "opt_sched.h"
 
 #include <machine/param.h>
 #include <machine/asm.h>
 #include <machine/riscvreg.h>
 #include <machine/pte.h>
 
 __FBSDID("$FreeBSD$");
 
 /*
  * void cpu_throw(struct thread *old, struct thread *new)
  */
 ENTRY(cpu_throw)
 	/* Store the new curthread */
 	sd	a1, PC_CURTHREAD(gp)
 	/* And the new pcb */
 	ld	x13, TD_PCB(a1)
 	sd	x13, PC_CURPCB(gp)
 
 	sfence.vm
 
 	/* Switch to the new pmap */
 	ld	t0, PCB_L1ADDR(x13)
+	srli	t0, t0, PAGE_SHIFT
 	csrw	sptbr, t0
 
 	/* TODO: Invalidate the TLB */
 
 	sfence.vm
 
 	/* Load registers */
 	ld	ra, (PCB_RA)(x13)
 	ld	sp, (PCB_SP)(x13)
 
 	/* s[0-11] */
 	ld	s0, (PCB_S + 0 * 8)(x13)
 	ld	s1, (PCB_S + 1 * 8)(x13)
 	ld	s2, (PCB_S + 2 * 8)(x13)
 	ld	s3, (PCB_S + 3 * 8)(x13)
 	ld	s4, (PCB_S + 4 * 8)(x13)
 	ld	s5, (PCB_S + 5 * 8)(x13)
 	ld	s6, (PCB_S + 6 * 8)(x13)
 	ld	s7, (PCB_S + 7 * 8)(x13)
 	ld	s8, (PCB_S + 8 * 8)(x13)
 	ld	s9, (PCB_S + 9 * 8)(x13)
 	ld	s10, (PCB_S + 10 * 8)(x13)
 	ld	s11, (PCB_S + 11 * 8)(x13)
+
 	ret
 
 .Lcpu_throw_panic_str:
 	.asciz "cpu_throw: %p\0"
 END(cpu_throw)
 
 /*
  * void cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx)
  *
  * a0 = old
  * a1 = new
  * a2 = mtx
  * x3 to x7, x16 and x17 are caller saved
  */
 ENTRY(cpu_switch)
 	/* Store the new curthread */
 	sd	a1, PC_CURTHREAD(gp)
 	/* And the new pcb */
 	ld	x13, TD_PCB(a1)
 	sd	x13, PC_CURPCB(gp)
 
 	/* Save the old context. */
 	ld	x13, TD_PCB(a0)
 
 	/* Store ra, sp and the callee-saved registers */
 	sd	ra, (PCB_RA)(x13)
 	sd	sp, (PCB_SP)(x13)
 
 	/* s[0-11] */
 	sd	s0, (PCB_S + 0 * 8)(x13)
 	sd	s1, (PCB_S + 1 * 8)(x13)
 	sd	s2, (PCB_S + 2 * 8)(x13)
 	sd	s3, (PCB_S + 3 * 8)(x13)
 	sd	s4, (PCB_S + 4 * 8)(x13)
 	sd	s5, (PCB_S + 5 * 8)(x13)
 	sd	s6, (PCB_S + 6 * 8)(x13)
 	sd	s7, (PCB_S + 7 * 8)(x13)
 	sd	s8, (PCB_S + 8 * 8)(x13)
 	sd	s9, (PCB_S + 9 * 8)(x13)
 	sd	s10, (PCB_S + 10 * 8)(x13)
 	sd	s11, (PCB_S + 11 * 8)(x13)
 
 	/*
 	 * Restore the saved context.
 	 */
 	ld	x13, TD_PCB(a1)
 
 	/*
 	 * TODO: We may need to flush the cache here if switching
 	 * to a user process.
 	 */
 
 	sfence.vm
 
 	/* Switch to the new pmap */
 	ld	t0, PCB_L1ADDR(x13)
+	srli	t0, t0, PAGE_SHIFT
 	csrw	sptbr, t0
 
 	/* TODO: Invalidate the TLB */
 
 	sfence.vm
 
 	/* Release the old thread */
 	sd	a2, TD_LOCK(a0)
 #if defined(SCHED_ULE) && defined(SMP)
 	/* Spin if TD_LOCK points to a blocked_lock */
 	la	a2, _C_LABEL(blocked_lock)
 1:
 	ld	t0, TD_LOCK(a1)
 	beq	t0, a2, 1b
 #endif
 
 	/* Restore the registers */
 	ld	ra, (PCB_RA)(x13)
 	ld	sp, (PCB_SP)(x13)
 
 	/* s[0-11] */
 	ld	s0, (PCB_S + 0 * 8)(x13)
 	ld	s1, (PCB_S + 1 * 8)(x13)
 	ld	s2, (PCB_S + 2 * 8)(x13)
 	ld	s3, (PCB_S + 3 * 8)(x13)
 	ld	s4, (PCB_S + 4 * 8)(x13)
 	ld	s5, (PCB_S + 5 * 8)(x13)
 	ld	s6, (PCB_S + 6 * 8)(x13)
 	ld	s7, (PCB_S + 7 * 8)(x13)
 	ld	s8, (PCB_S + 8 * 8)(x13)
 	ld	s9, (PCB_S + 9 * 8)(x13)
 	ld	s10, (PCB_S + 10 * 8)(x13)
 	ld	s11, (PCB_S + 11 * 8)(x13)
 	ret
 .Lcpu_switch_panic_str:
 	.asciz "cpu_switch: %p\0"
 END(cpu_switch)
 
 /*
  * fork_exit(void (*callout)(void *, struct trapframe *), void *arg,
  *  struct trapframe *frame)
  */
 
 ENTRY(fork_trampoline)
 	mv	a0, s0
 	mv	a1, s1
 	mv	a2, sp
 	call	_C_LABEL(fork_exit)
 
 	/* Restore sstatus */
 	ld	t0, (TF_SSTATUS)(sp)
 	/* Ensure interrupts disabled */
-	li	t1, ~SSTATUS_IE
+	li	t1, ~SSTATUS_SIE
 	and	t0, t0, t1
 	csrw	sstatus, t0
 
 	/* Restore exception program counter */
 	ld	t0, (TF_SEPC)(sp)
 	csrw	sepc, t0
 
 	/* Restore the registers */
 	ld	t0, (TF_T + 0 * 8)(sp)
 	ld	t1, (TF_T + 1 * 8)(sp)
 	ld	t2, (TF_T + 2 * 8)(sp)
 	ld	t3, (TF_T + 3 * 8)(sp)
 	ld	t4, (TF_T + 4 * 8)(sp)
 	ld	t5, (TF_T + 5 * 8)(sp)
 	ld	t6, (TF_T + 6 * 8)(sp)
 
 	ld	s0, (TF_S + 0 * 8)(sp)
 	ld	s1, (TF_S + 1 * 8)(sp)
 	ld	s2, (TF_S + 2 * 8)(sp)
 	ld	s3, (TF_S + 3 * 8)(sp)
 	ld	s4, (TF_S + 4 * 8)(sp)
 	ld	s5, (TF_S + 5 * 8)(sp)
 	ld	s6, (TF_S + 6 * 8)(sp)
 	ld	s7, (TF_S + 7 * 8)(sp)
 	ld	s8, (TF_S + 8 * 8)(sp)
 	ld	s9, (TF_S + 9 * 8)(sp)
 	ld	s10, (TF_S + 10 * 8)(sp)
 	ld	s11, (TF_S + 11 * 8)(sp)
 
 	ld	a0, (TF_A + 0 * 8)(sp)
 	ld	a1, (TF_A + 1 * 8)(sp)
 	ld	a2, (TF_A + 2 * 8)(sp)
 	ld	a3, (TF_A + 3 * 8)(sp)
 	ld	a4, (TF_A + 4 * 8)(sp)
 	ld	a5, (TF_A + 5 * 8)(sp)
 	ld	a6, (TF_A + 6 * 8)(sp)
 	ld	a7, (TF_A + 7 * 8)(sp)
 
 	/* Load user ra and sp */
 	ld	tp, (TF_TP)(sp)
 	ld	ra, (TF_RA)(sp)
 
 	/*
 	 * Store our pcpup on stack, we will load it back
 	 * on kernel mode trap.
 	 */
 	sd	gp, (TF_SIZE)(sp)
 	ld	gp, (TF_GP)(sp)
 
 	/* Save kernel stack so we can use it doing a user trap */
 	addi	sp, sp, TF_SIZE
 	csrw	sscratch, sp
 
 	/* Load user stack */
 	ld	sp, (TF_SP - TF_SIZE)(sp)
 
-	eret
+	sret
 END(fork_trampoline)
 
 ENTRY(savectx)
 	/* Store ra, sp and the callee-saved registers */
 	sd	ra, (PCB_RA)(a0)
 	sd	sp, (PCB_SP)(a0)
 
 	/* s[0-11] */
 	sd	s0, (PCB_S + 0 * 8)(a0)
 	sd	s1, (PCB_S + 1 * 8)(a0)
 	sd	s2, (PCB_S + 2 * 8)(a0)
 	sd	s3, (PCB_S + 3 * 8)(a0)
 	sd	s4, (PCB_S + 4 * 8)(a0)
 	sd	s5, (PCB_S + 5 * 8)(a0)
 	sd	s6, (PCB_S + 6 * 8)(a0)
 	sd	s7, (PCB_S + 7 * 8)(a0)
 	sd	s8, (PCB_S + 8 * 8)(a0)
 	sd	s9, (PCB_S + 9 * 8)(a0)
 	sd	s10, (PCB_S + 10 * 8)(a0)
 	sd	s11, (PCB_S + 11 * 8)(a0)
 
 	/* Store the VFP registers */
 #ifdef VFP
 	/* TODO */
 #endif
 	ret
 END(savectx)
Index: user/alc/PQ_LAUNDRY/sys/riscv/riscv/timer.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/riscv/timer.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/riscv/timer.c	(revision 303667)
@@ -1,299 +1,318 @@
 /*-
- * Copyright (c) 2015 Ruslan Bukin <br@bsdpad.com>
+ * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * RISC-V Timer
  */
 
 #include "opt_platform.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/rman.h>
 #include <sys/timeet.h>
 #include <sys/timetc.h>
 #include <sys/watchdog.h>
 
 #include <sys/proc.h>
 
 #include <machine/bus.h>
 #include <machine/cpu.h>
 #include <machine/intr.h>
 #include <machine/asm.h>
 #include <machine/trap.h>
 
 #include <dev/fdt/fdt_common.h>
 #include <dev/ofw/openfirm.h>
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
 #define	DEFAULT_FREQ	1000000
 
+#define	TIMER_COUNTS		0x00
+#define	TIMER_MTIMECMP(cpu)	(0x08 + (cpu * 8))
+
+#define	READ8(_sc, _reg)        \
+	bus_space_read_8(_sc->bst, _sc->bsh, _reg)
+#define	WRITE8(_sc, _reg, _val) \
+	bus_space_write_8(_sc->bst, _sc->bsh, _reg, _val)
+
 struct riscv_tmr_softc {
-	struct resource		*res[1];
-	void			*ihl[1];
+	struct resource		*res[2];
+	bus_space_tag_t		bst;
+	bus_space_handle_t	bsh;
+	void			*ih;
 	uint32_t		clkfreq;
 	struct eventtimer	et;
 };
 
 static struct riscv_tmr_softc *riscv_tmr_sc = NULL;
 
 static struct resource_spec timer_spec[] = {
+	{ SYS_RES_MEMORY,	0,	RF_ACTIVE },
 	{ SYS_RES_IRQ,		0,	RF_ACTIVE },
 	{ -1, 0 }
 };
 
 static timecounter_get_t riscv_tmr_get_timecount;
 
 static struct timecounter riscv_tmr_timecount = {
 	.tc_name           = "RISC-V Timecounter",
 	.tc_get_timecount  = riscv_tmr_get_timecount,
 	.tc_poll_pps       = NULL,
 	.tc_counter_mask   = ~0u,
 	.tc_frequency      = 0,
 	.tc_quality        = 1000,
 };
 
 static long
-get_counts(void)
+get_counts(struct riscv_tmr_softc *sc)
 {
 
-	return (csr_read(stime));
+	return (READ8(sc, TIMER_COUNTS));
 }
 
 static unsigned
 riscv_tmr_get_timecount(struct timecounter *tc)
 {
+	struct riscv_tmr_softc *sc;
 
-	return (get_counts());
+	sc = tc->tc_priv;
+
+	return (get_counts(sc));
 }
 
 static int
 riscv_tmr_start(struct eventtimer *et, sbintime_t first, sbintime_t period)
 {
 	struct riscv_tmr_softc *sc;
-	int counts;
+	uint64_t counts;
+	int cpu;
 
 	sc = (struct riscv_tmr_softc *)et->et_priv;
 
 	if (first != 0) {
 		counts = ((uint32_t)et->et_frequency * first) >> 32;
+		counts += READ8(sc, TIMER_COUNTS);
+		cpu = PCPU_GET(cpuid);
+		WRITE8(sc, TIMER_MTIMECMP(cpu), counts);
+		csr_set(sie, SIE_STIE);
 		machine_command(ECALL_MTIMECMP, counts);
+
 		return (0);
 	}
 
 	return (EINVAL);
 
 }
 
 static int
 riscv_tmr_stop(struct eventtimer *et)
 {
 	struct riscv_tmr_softc *sc;
 
 	sc = (struct riscv_tmr_softc *)et->et_priv;
 
 	/* TODO */
 
 	return (0);
 }
 
 static int
 riscv_tmr_intr(void *arg)
 {
 	struct riscv_tmr_softc *sc;
 
 	sc = (struct riscv_tmr_softc *)arg;
 
-	/*
-	 * Clear interrupt pending bit.
-	 * Note: SIP_STIP bit is not implemented in sip register
-	 * in Spike simulator, so use machine command to clear
-	 * interrupt pending bit in mip.
-	 */
-	machine_command(ECALL_CLEAR_PENDING, 0);
+	csr_clear(sip, SIP_STIP);
 
 	if (sc->et.et_active)
 		sc->et.et_event_cb(&sc->et, sc->et.et_arg);
 
 	return (FILTER_HANDLED);
 }
 
 static int
 riscv_tmr_fdt_probe(device_t dev)
 {
 
 	if (!ofw_bus_status_okay(dev))
 		return (ENXIO);
 
 	if (ofw_bus_is_compatible(dev, "riscv,timer")) {
 		device_set_desc(dev, "RISC-V Timer");
 		return (BUS_PROBE_DEFAULT);
 	}
 
 	return (ENXIO);
 }
 
 static int
 riscv_tmr_attach(device_t dev)
 {
 	struct riscv_tmr_softc *sc;
 	phandle_t node;
 	pcell_t clock;
 	int error;
 
 	sc = device_get_softc(dev);
 	if (riscv_tmr_sc)
 		return (ENXIO);
 
 	/* Get the base clock frequency */
 	node = ofw_bus_get_node(dev);
 	if (node > 0) {
 		error = OF_getprop(node, "clock-frequency", &clock,
 		    sizeof(clock));
 		if (error > 0) {
 			sc->clkfreq = fdt32_to_cpu(clock);
 		}
 	}
 
 	if (sc->clkfreq == 0)
 		sc->clkfreq = DEFAULT_FREQ;
 
 	if (sc->clkfreq == 0) {
 		device_printf(dev, "No clock frequency specified\n");
 		return (ENXIO);
 	}
 
 	if (bus_alloc_resources(dev, timer_spec, sc->res)) {
 		device_printf(dev, "could not allocate resources\n");
 		return (ENXIO);
 	}
 
+	/* Memory interface */
+	sc->bst = rman_get_bustag(sc->res[0]);
+	sc->bsh = rman_get_bushandle(sc->res[0]);
+
 	riscv_tmr_sc = sc;
 
 	/* Setup IRQs handler */
-	error = bus_setup_intr(dev, sc->res[0], INTR_TYPE_CLK,
-	    riscv_tmr_intr, NULL, sc, &sc->ihl[0]);
+	error = bus_setup_intr(dev, sc->res[1], INTR_TYPE_CLK,
+	    riscv_tmr_intr, NULL, sc, &sc->ih);
 	if (error) {
 		device_printf(dev, "Unable to alloc int resource.\n");
 		return (ENXIO);
 	}
 
 	riscv_tmr_timecount.tc_frequency = sc->clkfreq;
+	riscv_tmr_timecount.tc_priv = sc;
 	tc_init(&riscv_tmr_timecount);
 
 	sc->et.et_name = "RISC-V Eventtimer";
 	sc->et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU;
 	sc->et.et_quality = 1000;
 
 	sc->et.et_frequency = sc->clkfreq;
 	sc->et.et_min_period = (0x00000002LLU << 32) / sc->et.et_frequency;
 	sc->et.et_max_period = (0xfffffffeLLU << 32) / sc->et.et_frequency;
 	sc->et.et_start = riscv_tmr_start;
 	sc->et.et_stop = riscv_tmr_stop;
 	sc->et.et_priv = sc;
 	et_register(&sc->et);
 
 	return (0);
 }
 
 static device_method_t riscv_tmr_fdt_methods[] = {
 	DEVMETHOD(device_probe,		riscv_tmr_fdt_probe),
 	DEVMETHOD(device_attach,	riscv_tmr_attach),
 	{ 0, 0 }
 };
 
 static driver_t riscv_tmr_fdt_driver = {
 	"timer",
 	riscv_tmr_fdt_methods,
 	sizeof(struct riscv_tmr_softc),
 };
 
 static devclass_t riscv_tmr_fdt_devclass;
 
 EARLY_DRIVER_MODULE(timer, simplebus, riscv_tmr_fdt_driver, riscv_tmr_fdt_devclass,
     0, 0, BUS_PASS_TIMER + BUS_PASS_ORDER_MIDDLE);
 EARLY_DRIVER_MODULE(timer, ofwbus, riscv_tmr_fdt_driver, riscv_tmr_fdt_devclass,
     0, 0, BUS_PASS_TIMER + BUS_PASS_ORDER_MIDDLE);
 
 void
 DELAY(int usec)
 {
-	int32_t counts, counts_per_usec;
-	uint32_t first, last;
+	int64_t counts, counts_per_usec;
+	uint64_t first, last;
 
 	/*
 	 * Check the timers are setup, if not just
 	 * use a for loop for the meantime
 	 */
 	if (riscv_tmr_sc == NULL) {
 		for (; usec > 0; usec--)
 			for (counts = 200; counts > 0; counts--)
 				/*
 				 * Prevent the compiler from optimizing
 				 * out the loop
 				 */
 				cpufunc_nullop();
 		return;
 	}
 
 	/* Get the number of times to count */
 	counts_per_usec = ((riscv_tmr_timecount.tc_frequency / 1000000) + 1);
 
 	/*
 	 * Clamp the timeout at a maximum value (about 32 seconds with
 	 * a 66MHz clock). *Nobody* should be delay()ing for anywhere
 	 * near that length of time and if they are, they should be hung
 	 * out to dry.
 	 */
 	if (usec >= (0x80000000U / counts_per_usec))
 		counts = (0x80000000U / counts_per_usec) - 1;
 	else
 		counts = usec * counts_per_usec;
 
-	first = get_counts();
+	first = get_counts(riscv_tmr_sc);
 
 	while (counts > 0) {
-		last = get_counts();
-		counts -= (int32_t)(last - first);
+		last = get_counts(riscv_tmr_sc);
+		counts -= (int64_t)(last - first);
 		first = last;
 	}
 }
Index: user/alc/PQ_LAUNDRY/sys/riscv/riscv/trap.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/riscv/trap.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/riscv/trap.c	(revision 303667)
@@ -1,361 +1,373 @@
 /*-
- * Copyright (c) 2015 Ruslan Bukin <br@bsdpad.com>
+ * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/pioctl.h>
 #include <sys/bus.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
 #ifdef KDB
 #include <sys/kdb.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 
 #include <machine/frame.h>
 #include <machine/pcb.h>
 #include <machine/pcpu.h>
 
 #include <machine/resource.h>
 #include <machine/intr.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 #endif
 
 int (*dtrace_invop_jump_addr)(struct trapframe *);
 
 extern register_t fsu_intr_fault;
 
 /* Called from exception.S */
 void do_trap_supervisor(struct trapframe *);
 void do_trap_user(struct trapframe *);
 
 static __inline void
 call_trapsignal(struct thread *td, int sig, int code, void *addr)
 {
 	ksiginfo_t ksi;
 
 	ksiginfo_init_trap(&ksi);
 	ksi.ksi_signo = sig;
 	ksi.ksi_code = code;
 	ksi.ksi_addr = addr;
 	trapsignal(td, &ksi);
 }
 
 int
 cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
 {
 	struct proc *p;
 	register_t *ap;
 	int nap;
 
 	nap = 8;
 	p = td->td_proc;
 	ap = &td->td_frame->tf_a[0];
 
 	sa->code = td->td_frame->tf_t[0];
 
 	if (sa->code == SYS_syscall || sa->code == SYS___syscall) {
 		sa->code = *ap++;
 		nap--;
 	}
 
 	if (p->p_sysent->sv_mask)
 		sa->code &= p->p_sysent->sv_mask;
 	if (sa->code >= p->p_sysent->sv_size)
 		sa->callp = &p->p_sysent->sv_table[0];
 	else
 		sa->callp = &p->p_sysent->sv_table[sa->code];
 
 	sa->narg = sa->callp->sy_narg;
 	memcpy(sa->args, ap, nap * sizeof(register_t));
 	if (sa->narg > nap)
 		panic("TODO: Could we have more then 8 args?");
 
 	td->td_retval[0] = 0;
 	td->td_retval[1] = 0;
 
 	return (0);
 }
 
 #include "../../kern/subr_syscall.c"
 
 static void
 dump_regs(struct trapframe *frame)
 {
 	int n;
 	int i;
 
 	n = (sizeof(frame->tf_t) / sizeof(frame->tf_t[0]));
 	for (i = 0; i < n; i++)
 		printf("t[%d] == 0x%016lx\n", i, frame->tf_t[i]);
 
 	n = (sizeof(frame->tf_s) / sizeof(frame->tf_s[0]));
 	for (i = 0; i < n; i++)
 		printf("s[%d] == 0x%016lx\n", i, frame->tf_s[i]);
 
 	n = (sizeof(frame->tf_a) / sizeof(frame->tf_a[0]));
 	for (i = 0; i < n; i++)
 		printf("a[%d] == 0x%016lx\n", i, frame->tf_a[i]);
 
 	printf("sepc == 0x%016lx\n", frame->tf_sepc);
 	printf("sstatus == 0x%016lx\n", frame->tf_sstatus);
 }
 
 static void
 svc_handler(struct trapframe *frame)
 {
 	struct syscall_args sa;
 	struct thread *td;
 	int error;
 
 	td = curthread;
 	td->td_frame = frame;
 
 	error = syscallenter(td, &sa);
 	syscallret(td, error, &sa);
 }
 
 static void
 data_abort(struct trapframe *frame, int lower)
 {
 	struct vm_map *map;
 	uint64_t sbadaddr;
 	struct thread *td;
 	struct pcb *pcb;
 	vm_prot_t ftype;
 	vm_offset_t va;
 	struct proc *p;
 	int ucode;
 	int error;
 	int sig;
 
 #ifdef KDB
 	if (kdb_active) {
 		kdb_reenter();
 		return;
 	}
 #endif
 
 	td = curthread;
 	pcb = td->td_pcb;
 
 	/*
 	 * Special case for fuswintr and suswintr. These can't sleep so
 	 * handle them early on in the trap handler.
 	 */
 	if (__predict_false(pcb->pcb_onfault == (vm_offset_t)&fsu_intr_fault)) {
 		frame->tf_sepc = pcb->pcb_onfault;
 		return;
 	}
 
 	sbadaddr = frame->tf_sbadaddr;
 
 	p = td->td_proc;
 
 	if (lower)
 		map = &td->td_proc->p_vmspace->vm_map;
 	else {
 		/* The top bit tells us which range to use */
 		if ((sbadaddr >> 63) == 1)
 			map = kernel_map;
 		else
 			map = &td->td_proc->p_vmspace->vm_map;
 	}
 
 	va = trunc_page(sbadaddr);
 
-	if (frame->tf_scause == EXCP_STORE_ACCESS_FAULT) {
+	if (frame->tf_scause == EXCP_FAULT_STORE) {
 		ftype = (VM_PROT_READ | VM_PROT_WRITE);
 	} else {
 		ftype = (VM_PROT_READ);
 	}
 
 	if (map != kernel_map) {
 		/*
 		 * Keep swapout from messing with us during this
 		 *	critical time.
 		 */
 		PROC_LOCK(p);
 		++p->p_lock;
 		PROC_UNLOCK(p);
 
 		/* Fault in the user page: */
 		error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 
 		PROC_LOCK(p);
 		--p->p_lock;
 		PROC_UNLOCK(p);
 	} else {
 		/*
 		 * Don't have to worry about process locking or stacks in the
 		 * kernel.
 		 */
 		error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
 	}
 
 	if (error != KERN_SUCCESS) {
 		if (lower) {
 			sig = SIGSEGV;
 			if (error == KERN_PROTECTION_FAILURE)
 				ucode = SEGV_ACCERR;
 			else
 				ucode = SEGV_MAPERR;
 			call_trapsignal(td, sig, ucode, (void *)sbadaddr);
 		} else {
 			if (td->td_intr_nesting_level == 0 &&
 			    pcb->pcb_onfault != 0) {
 				frame->tf_a[0] = error;
 				frame->tf_sepc = pcb->pcb_onfault;
 				return;
 			}
 			dump_regs(frame);
 			panic("vm_fault failed: %lx, va 0x%016lx",
 				frame->tf_sepc, sbadaddr);
 		}
 	}
 
 	if (lower)
 		userret(td, frame);
 }
 
 void
 do_trap_supervisor(struct trapframe *frame)
 {
 	uint64_t exception;
+	uint64_t sstatus;
 
+	/* Ensure we came from supervisor mode, interrupts disabled */
+	__asm __volatile("csrr %0, sstatus" : "=&r" (sstatus));
+	KASSERT((sstatus & (SSTATUS_SPP | SSTATUS_SIE)) == SSTATUS_SPP,
+			("We must came from S mode with interrupts disabled"));
+
 	exception = (frame->tf_scause & EXCP_MASK);
 	if (frame->tf_scause & EXCP_INTR) {
 		/* Interrupt */
 		riscv_cpu_intr(frame);
 		return;
 	}
 
 #ifdef KDTRACE_HOOKS
 	if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, exception))
 		return;
 #endif
 
 	CTR3(KTR_TRAP, "do_trap_supervisor: curthread: %p, sepc: %lx, frame: %p",
 	    curthread, frame->tf_sepc, frame);
 
 	switch(exception) {
-	case EXCP_LOAD_ACCESS_FAULT:
-	case EXCP_STORE_ACCESS_FAULT:
-	case EXCP_INSTR_ACCESS_FAULT:
+	case EXCP_FAULT_LOAD:
+	case EXCP_FAULT_STORE:
+	case EXCP_FAULT_FETCH:
 		data_abort(frame, 0);
 		break;
-	case EXCP_INSTR_BREAKPOINT:
+	case EXCP_BREAKPOINT:
 #ifdef KDTRACE_HOOKS
 		if (dtrace_invop_jump_addr != 0) {
 			dtrace_invop_jump_addr(frame);
 			break;
 		}
 #endif
 #ifdef KDB
 		kdb_trap(exception, 0, frame);
 #else
 		dump_regs(frame);
 		panic("No debugger in kernel.\n");
 #endif
 		break;
-	case EXCP_INSTR_ILLEGAL:
+	case EXCP_ILLEGAL_INSTRUCTION:
 		dump_regs(frame);
 		panic("Illegal instruction at 0x%016lx\n", frame->tf_sepc);
 		break;
 	default:
 		dump_regs(frame);
 		panic("Unknown kernel exception %x badaddr %lx\n",
 			exception, frame->tf_sbadaddr);
 	}
 }
 
 void
 do_trap_user(struct trapframe *frame)
 {
 	uint64_t exception;
 	struct thread *td;
+	uint64_t sstatus;
 
 	td = curthread;
 	td->td_frame = frame;
 
+	/* Ensure we came from usermode, interrupts disabled */
+	__asm __volatile("csrr %0, sstatus" : "=&r" (sstatus));
+	KASSERT((sstatus & (SSTATUS_SPP | SSTATUS_SIE)) == 0,
+			("We must came from U mode with interrupts disabled"));
+
 	exception = (frame->tf_scause & EXCP_MASK);
 	if (frame->tf_scause & EXCP_INTR) {
 		/* Interrupt */
 		riscv_cpu_intr(frame);
 		return;
 	}
 
 	CTR3(KTR_TRAP, "do_trap_user: curthread: %p, sepc: %lx, frame: %p",
 	    curthread, frame->tf_sepc, frame);
 
 	switch(exception) {
-	case EXCP_LOAD_ACCESS_FAULT:
-	case EXCP_STORE_ACCESS_FAULT:
-	case EXCP_INSTR_ACCESS_FAULT:
+	case EXCP_FAULT_LOAD:
+	case EXCP_FAULT_STORE:
+	case EXCP_FAULT_FETCH:
 		data_abort(frame, 1);
 		break;
-	case EXCP_UMODE_ENV_CALL:
+	case EXCP_USER_ECALL:
 		frame->tf_sepc += 4;	/* Next instruction */
 		svc_handler(frame);
 		break;
-	case EXCP_INSTR_ILLEGAL:
+	case EXCP_ILLEGAL_INSTRUCTION:
 		call_trapsignal(td, SIGILL, ILL_ILLTRP, (void *)frame->tf_sepc);
 		userret(td, frame);
 		break;
-	case EXCP_INSTR_BREAKPOINT:
+	case EXCP_BREAKPOINT:
 		call_trapsignal(td, SIGTRAP, TRAP_BRKPT, (void *)frame->tf_sepc);
 		userret(td, frame);
 		break;
 	default:
 		dump_regs(frame);
 		panic("Unknown userland exception %x badaddr %lx\n",
 			exception, frame->tf_sbadaddr);
 	}
 }
Index: user/alc/PQ_LAUNDRY/sys/riscv/riscv/vm_machdep.c
===================================================================
--- user/alc/PQ_LAUNDRY/sys/riscv/riscv/vm_machdep.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/sys/riscv/riscv/vm_machdep.c	(revision 303667)
@@ -1,252 +1,253 @@
 /*-
- * Copyright (c) 2015 Ruslan Bukin <br@bsdpad.com>
+ * Copyright (c) 2015-2016 Ruslan Bukin <br@bsdpad.com>
  * All rights reserved.
  *
  * Portions of this software were developed by SRI International and the
  * University of Cambridge Computer Laboratory under DARPA/AFRL contract
  * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
  *
  * Portions of this software were developed by the University of Cambridge
  * Computer Laboratory as part of the CTSRD Project, with support from the
  * UK Higher Education Innovation Fund (HEIF).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/limits.h>
 #include <sys/proc.h>
 #include <sys/sf_buf.h>
 #include <sys/signal.h>
 #include <sys/unistd.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
 
 #include <machine/riscvreg.h>
 #include <machine/cpu.h>
 #include <machine/pcb.h>
 #include <machine/frame.h>
 
 /*
  * Finish a fork operation, with process p2 nearly set up.
  * Copy and update the pcb, set up the stack so that the child
  * ready to run and return to user mode.
  */
 void
 cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
 {
 	struct pcb *pcb2;
 	struct trapframe *tf;
 
 	if ((flags & RFPROC) == 0)
 		return;
 
 	pcb2 = (struct pcb *)(td2->td_kstack +
 	    td2->td_kstack_pages * PAGE_SIZE) - 1;
 
 	td2->td_pcb = pcb2;
 	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
 
 	td2->td_pcb->pcb_l1addr =
 	    vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l1);
 
 	tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1);
 	bcopy(td1->td_frame, tf, sizeof(*tf));
 
 	/* Clear syscall error flag */
 	tf->tf_t[0] = 0;
 
 	/* Arguments for child */
 	tf->tf_a[0] = 0;
 	tf->tf_a[1] = 0;
-	tf->tf_sstatus = SSTATUS_PIE;
+	tf->tf_sstatus = (SSTATUS_SPIE);
+	tf->tf_sstatus |= (MSTATUS_PRV_U << MSTATUS_SPP_SHIFT);
 
 	td2->td_frame = tf;
 
 	/* Set the return value registers for fork() */
 	td2->td_pcb->pcb_s[0] = (uintptr_t)fork_return;
 	td2->td_pcb->pcb_s[1] = (uintptr_t)td2;
 	td2->td_pcb->pcb_ra = (uintptr_t)fork_trampoline;
 	td2->td_pcb->pcb_sp = (uintptr_t)td2->td_frame;
 
 	/* Setup to release spin count in fork_exit(). */
 	td2->td_md.md_spinlock_count = 1;
-	td2->td_md.md_saved_sstatus_ie = 1;
+	td2->td_md.md_saved_sstatus_ie = (SSTATUS_SIE);
 }
 
 void
 cpu_reset(void)
 {
 
 	printf("cpu_reset");
 	while(1)
 		__asm volatile("wfi" ::: "memory");
 }
 
 void
 cpu_thread_swapin(struct thread *td)
 {
 }
 
 void
 cpu_thread_swapout(struct thread *td)
 {
 }
 
 void
 cpu_set_syscall_retval(struct thread *td, int error)
 {
 	struct trapframe *frame;
 
 	frame = td->td_frame;
 
 	switch (error) {
 	case 0:
 		frame->tf_a[0] = td->td_retval[0];
 		frame->tf_a[1] = td->td_retval[1];
 		frame->tf_t[0] = 0;		/* syscall succeeded */
 		break;
 	case ERESTART:
 		frame->tf_sepc -= 4;		/* prev instruction */
 		break;
 	case EJUSTRETURN:
 		break;
 	default:
 		frame->tf_a[0] = error;
 		frame->tf_t[0] = 1;		/* syscall error */
 		break;
 	}
 }
 
 /*
  * Initialize machine state, mostly pcb and trap frame for a new
  * thread, about to return to userspace.  Put enough state in the new
  * thread's PCB to get it to go back to the fork_return(), which
  * finalizes the thread state and handles peculiarities of the first
  * return to userspace for the new thread.
  */
 void
 cpu_copy_thread(struct thread *td, struct thread *td0)
 {
 
 	bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe));
 	bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb));
 
 	td->td_pcb->pcb_s[0] = (uintptr_t)fork_return;
 	td->td_pcb->pcb_s[1] = (uintptr_t)td;
 	td->td_pcb->pcb_ra = (uintptr_t)fork_trampoline;
 	td->td_pcb->pcb_sp = (uintptr_t)td->td_frame;
 
 	/* Setup to release spin count in fork_exit(). */
 	td->td_md.md_spinlock_count = 1;
-	td->td_md.md_saved_sstatus_ie = 1;
+	td->td_md.md_saved_sstatus_ie = (SSTATUS_SIE);
 }
 
 /*
  * Set that machine state for performing an upcall that starts
  * the entry function with the given argument.
  */
 void
 cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg,
 	stack_t *stack)
 {
 	struct trapframe *tf = td->td_frame;
 
 	tf->tf_sp = STACKALIGN((uintptr_t)stack->ss_sp + stack->ss_size);
 	tf->tf_sepc = (register_t)entry;
 	tf->tf_a[0] = (register_t)arg;
 }
 
 int
 cpu_set_user_tls(struct thread *td, void *tls_base)
 {
 	struct pcb *pcb;
 
 	if ((uintptr_t)tls_base >= VM_MAXUSER_ADDRESS)
 		return (EINVAL);
 
 	pcb = td->td_pcb;
 	pcb->pcb_tp = (register_t)tls_base;
 
 	return (0);
 }
 
 void
 cpu_thread_exit(struct thread *td)
 {
 }
 
 void
 cpu_thread_alloc(struct thread *td)
 {
 
 	td->td_pcb = (struct pcb *)(td->td_kstack +
 	    td->td_kstack_pages * PAGE_SIZE) - 1;
 	td->td_frame = (struct trapframe *)STACKALIGN(
 	    (caddr_t)td->td_pcb - 8 - sizeof(struct trapframe));
 }
 
 void
 cpu_thread_free(struct thread *td)
 {
 }
 
 void
 cpu_thread_clean(struct thread *td)
 {
 }
 
 /*
  * Intercept the return address from a freshly forked process that has NOT
  * been scheduled yet.
  *
  * This is needed to make kernel threads stay in kernel mode.
  */
 void
 cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg)
 {
 
 	td->td_pcb->pcb_s[0] = (uintptr_t)func;
 	td->td_pcb->pcb_s[1] = (uintptr_t)arg;
 	td->td_pcb->pcb_ra = (uintptr_t)fork_trampoline;
 	td->td_pcb->pcb_sp = (uintptr_t)td->td_frame;
 }
 
 void
 cpu_exit(struct thread *td)
 {
 }
 
 void
 swi_vm(void *v)
 {
 
 	/* Nothing to do here - busdma bounce buffers are not implemented. */
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/sed/Makefile
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/sed/Makefile	(revision 303666)
+++ user/alc/PQ_LAUNDRY/usr.bin/sed/Makefile	(revision 303667)
@@ -1,15 +1,15 @@
 #	@(#)Makefile	8.1 (Berkeley) 6/6/93
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 PROG=	sed
 SRCS=	compile.c main.c misc.c process.c
 
-WARNS?=	5
+WARNS?=	2
 
 .if ${MK_TESTS} != "no"
 SUBDIR+= tests
 .endif
 
 .include <bsd.prog.mk>
Index: user/alc/PQ_LAUNDRY/usr.bin/sed/compile.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/sed/compile.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/usr.bin/sed/compile.c	(revision 303667)
@@ -1,945 +1,949 @@
 /*-
  * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Diomidis Spinellis of Imperial College, University of London.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #ifndef lint
 static const char sccsid[] = "@(#)compile.c	8.1 (Berkeley) 6/6/93";
 #endif
 
 #include <sys/types.h>
 #include <sys/stat.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
 #include <regex.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <wchar.h>
 
 #include "defs.h"
 #include "extern.h"
 
 #define LHSZ	128
 #define	LHMASK	(LHSZ - 1)
 static struct labhash {
 	struct	labhash *lh_next;
 	u_int	lh_hash;
 	struct	s_command *lh_cmd;
 	int	lh_ref;
 } *labels[LHSZ];
 
-static const char	 *compile_addr(const char *, struct s_addr *);
-static       char	 *compile_ccl(const char **, char *);
-static const char	 *compile_delimited(const char *, char *, int);
-static const char	 *compile_flags(const char *, struct s_subst *);
-static const regex_t	 *compile_re(const char *, int);
-static const char	 *compile_subst(const char *, struct s_subst *);
-static       char	 *compile_text(size_t *);
-static const char	 *compile_tr(const char *, struct s_tr **);
+static char	 *compile_addr(char *, struct s_addr *);
+static char	 *compile_ccl(char **, char *);
+static char	 *compile_delimited(char *, char *, int);
+static char	 *compile_flags(char *, struct s_subst *);
+static regex_t	 *compile_re(char *, int);
+static char	 *compile_subst(char *, struct s_subst *);
+static char	 *compile_text(void);
+static char	 *compile_tr(char *, struct s_tr **);
 static struct s_command
 		**compile_stream(struct s_command **);
-static char	 *duptoeol(const char *, const char *, size_t *);
+static char	 *duptoeol(char *, const char *);
 static void	  enterlabel(struct s_command *);
 static struct s_command
-		 *findlabel(const char *);
-static void	  fixuplabel(struct s_command *, const struct s_command *);
+		 *findlabel(char *);
+static void	  fixuplabel(struct s_command *, struct s_command *);
 static void	  uselabel(void);
 
 /*
  * Command specification.  This is used to drive the command parser.
  */
 struct s_format {
 	char code;				/* Command code */
 	int naddr;				/* Number of address args */
 	enum e_args args;			/* Argument type */
 };
 
 static struct s_format cmd_fmts[] = {
 	{'{', 2, GROUP},
 	{'}', 0, ENDGROUP},
 	{'a', 1, TEXT},
 	{'b', 2, BRANCH},
 	{'c', 2, TEXT},
 	{'d', 2, EMPTY},
 	{'D', 2, EMPTY},
 	{'g', 2, EMPTY},
 	{'G', 2, EMPTY},
 	{'h', 2, EMPTY},
 	{'H', 2, EMPTY},
 	{'i', 1, TEXT},
 	{'l', 2, EMPTY},
 	{'n', 2, EMPTY},
 	{'N', 2, EMPTY},
 	{'p', 2, EMPTY},
 	{'P', 2, EMPTY},
 	{'q', 1, EMPTY},
 	{'r', 1, RFILE},
 	{'s', 2, SUBST},
 	{'t', 2, BRANCH},
 	{'w', 2, WFILE},
 	{'x', 2, EMPTY},
 	{'y', 2, TR},
 	{'!', 2, NONSEL},
 	{':', 0, LABEL},
 	{'#', 0, COMMENT},
 	{'=', 1, EMPTY},
 	{'\0', 0, COMMENT},
 };
 
 /* The compiled program. */
 struct s_command *prog;
 
 /*
  * Compile the program into prog.
  * Initialise appends.
  */
 void
 compile(void)
 {
 	*compile_stream(&prog) = NULL;
 	fixuplabel(prog, NULL);
 	uselabel();
 	if (appendnum == 0)
 		appends = NULL;
 	else if ((appends = malloc(sizeof(struct s_appends) * appendnum)) ==
 	    NULL)
 		err(1, "malloc");
 	if ((match = malloc((maxnsub + 1) * sizeof(regmatch_t))) == NULL)
 		err(1, "malloc");
 }
 
-#define	EATSPACE() do {						\
-	while (*p && isspace((unsigned char)*p))		\
-		p++;						\
+#define EATSPACE() do {							\
+	if (p)								\
+		while (*p && isspace((unsigned char)*p))                \
+			p++;						\
 	} while (0)
 
-#define	EATSPACEN() do {					\
-	while (*p && *p != '\n' && isspace((unsigned char)*p))  \
-		p++;						\
-	} while (0)
-
 static struct s_command **
 compile_stream(struct s_command **link)
 {
-	const char *p;
+	char *p;
+	static char lbuf[_POSIX2_LINE_MAX + 1];	/* To save stack */
 	struct s_command *cmd, *cmd2, *stack;
 	struct s_format *fp;
 	char re[_POSIX2_LINE_MAX + 1];
 	int naddr;				/* Number of addresses */
 
 	stack = NULL;
 	for (;;) {
-		if ((p = cu_fgets(NULL)) == NULL) {
+		if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) {
 			if (stack != NULL)
 				errx(1, "%lu: %s: unexpected EOF (pending }'s)",
 							linenum, fname);
 			return (link);
 		}
 
-semicolon:	EATSPACEN();
-		switch (*p) {
-		case '#': case '\0': case '\n':
-			continue;	/* to next command-unit */
-		case ';':
-			p++;
-			goto semicolon;
+semicolon:	EATSPACE();
+		if (p) {
+			if (*p == '#' || *p == '\0')
+				continue;
+			else if (*p == ';') {
+				p++;
+				goto semicolon;
+			}
 		}
-
 		if ((*link = cmd = malloc(sizeof(struct s_command))) == NULL)
 			err(1, "malloc");
 		link = &cmd->next;
 		cmd->startline = cmd->nonsel = 0;
 		/* First parse the addresses */
 		naddr = 0;
 
 /* Valid characters to start an address */
 #define	addrchar(c)	(strchr("0123456789/\\$", (c)))
 		if (addrchar(*p)) {
 			naddr++;
 			if ((cmd->a1 = malloc(sizeof(struct s_addr))) == NULL)
 				err(1, "malloc");
 			p = compile_addr(p, cmd->a1);
 			EATSPACE();				/* EXTENSION */
 			if (*p == ',') {
 				p++;
 				EATSPACE();			/* EXTENSION */
 				naddr++;
 				if ((cmd->a2 = malloc(sizeof(struct s_addr)))
 				    == NULL)
 					err(1, "malloc");
 				p = compile_addr(p, cmd->a2);
 				EATSPACE();
 			} else
 				cmd->a2 = NULL;
 		} else
 			cmd->a1 = cmd->a2 = NULL;
 
 nonsel:		/* Now parse the command */
-		if (*p == '\0' || *p == '\n')
+		if (!*p)
 			errx(1, "%lu: %s: command expected", linenum, fname);
 		cmd->code = *p;
 		for (fp = cmd_fmts; fp->code; fp++)
 			if (fp->code == *p)
 				break;
 		if (!fp->code)
-			errx(1, "%lu: %s: invalid command code %c (%s)", linenum, fname, *p, p);
+			errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p);
 		if (naddr > fp->naddr)
 			errx(1,
 				"%lu: %s: command %c expects up to %d address(es), found %d",
 				linenum, fname, *p, fp->naddr, naddr);
 		switch (fp->args) {
 		case NONSEL:			/* ! */
 			p++;
 			EATSPACE();
 			cmd->nonsel = 1;
 			goto nonsel;
 		case GROUP:			/* { */
 			p++;
-			EATSPACEN();
+			EATSPACE();
 			cmd->next = stack;
 			stack = cmd;
 			link = &cmd->u.c;
-			if (*p != '\0' && *p != '\n')
+			if (*p)
 				goto semicolon;
 			break;
 		case ENDGROUP:
 			/*
 			 * Short-circuit command processing, since end of
 			 * group is really just a noop.
 			 */
 			cmd->nonsel = 1;
 			if (stack == NULL)
 				errx(1, "%lu: %s: unexpected }", linenum, fname);
 			cmd2 = stack;
 			stack = cmd2->next;
 			cmd2->next = cmd;
 			/*FALLTHROUGH*/
 		case EMPTY:		/* d D g G h H l n N p P q x = \0 */
 			p++;
-			EATSPACEN();
+			EATSPACE();
 			if (*p == ';') {
 				p++;
 				link = &cmd->next;
 				goto semicolon;
 			}
-			if (*p != '\0' && *p != '\n')
+			if (*p)
 				errx(1, "%lu: %s: extra characters at the end of %c command",
 						linenum, fname, cmd->code);
 			break;
 		case TEXT:			/* a c i */
 			p++;
 			EATSPACE();
 			if (*p != '\\')
 				errx(1,
 "%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code);
 			p++;
-			EATSPACEN();
-			if (*p != '\n')
+			EATSPACE();
+			if (*p)
 				errx(1,
-				"%lu: %s: extra characters (%c) after \\ at the end of %c command",
-				linenum, fname, *p, cmd->code);
-			cmd->t = compile_text(&cmd->tlen);
+				"%lu: %s: extra characters after \\ at the end of %c command",
+				linenum, fname, cmd->code);
+			cmd->t = compile_text();
 			break;
 		case COMMENT:			/* \0 # */
 			break;
 		case WFILE:			/* w */
 			p++;
 			EATSPACE();
 			if (*p == '\0')
 				errx(1, "%lu: %s: filename expected", linenum, fname);
-			cmd->t = duptoeol(p, "w command", &cmd->tlen);
+			cmd->t = duptoeol(p, "w command");
 			if (aflag)
 				cmd->u.fd = -1;
-			else if ((cmd->u.fd = open(cmd->t,
+			else if ((cmd->u.fd = open(p,
 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
 			    DEFFILEMODE)) == -1)
 				err(1, "%s", p);
 			break;
 		case RFILE:			/* r */
 			p++;
 			EATSPACE();
 			if (*p == '\0')
 				errx(1, "%lu: %s: filename expected", linenum, fname);
 			else
-				cmd->t = duptoeol(p, "read command", &cmd->tlen);
+				cmd->t = duptoeol(p, "read command");
 			break;
 		case BRANCH:			/* b t */
 			p++;
-			EATSPACEN();
-			if (*p == '\0' || *p == '\n')
+			EATSPACE();
+			if (*p == '\0')
 				cmd->t = NULL;
 			else
-				cmd->t = duptoeol(p, "branch", &cmd->tlen);
+				cmd->t = duptoeol(p, "branch");
 			break;
 		case LABEL:			/* : */
 			p++;
 			EATSPACE();
-			cmd->t = duptoeol(p, "label", &cmd->tlen);
-			if (cmd->t[0] == '\0')
+			cmd->t = duptoeol(p, "label");
+			if (strlen(p) == 0)
 				errx(1, "%lu: %s: empty label", linenum, fname);
 			enterlabel(cmd);
 			break;
 		case SUBST:			/* s */
 			p++;
-			if (*p == '\0' || *p == '\\' || *p == '\n')
+			if (*p == '\0' || *p == '\\')
 				errx(1,
 "%lu: %s: substitute pattern can not be delimited by newline or backslash",
 					linenum, fname);
 			if ((cmd->u.s = calloc(1, sizeof(struct s_subst))) == NULL)
 				err(1, "malloc");
 			p = compile_delimited(p, re, 0);
 			if (p == NULL)
 				errx(1,
 				"%lu: %s: unterminated substitute pattern", linenum, fname);
 
+			/* Compile RE with no case sensitivity temporarily */
+			if (*re == '\0')
+				cmd->u.s->re = NULL;
+			else
+				cmd->u.s->re = compile_re(re, 0);
 			--p;
 			p = compile_subst(p, cmd->u.s);
 			p = compile_flags(p, cmd->u.s);
 
-			if (*re != '\0')
+			/* Recompile RE with case sensitivity from "I" flag if any */
+			if (*re == '\0')
+				cmd->u.s->re = NULL;
+			else
 				cmd->u.s->re = compile_re(re, cmd->u.s->icase);
-
 			EATSPACE();
-
 			if (*p == ';') {
 				p++;
 				link = &cmd->next;
 				goto semicolon;
 			}
 			break;
 		case TR:			/* y */
 			p++;
 			p = compile_tr(p, &cmd->u.y);
 			EATSPACE();
 			if (*p == ';') {
 				p++;
 				link = &cmd->next;
 				goto semicolon;
 			}
 			if (*p)
 				errx(1,
 "%lu: %s: extra text at the end of a transform command", linenum, fname);
 			break;
 		}
 	}
 }
 
 /*
  * Get a delimited string.  P points to the delimiter of the string; d points
  * to a buffer area.  Newline and delimiter escapes are processed; other
  * escapes are ignored.
  *
  * Returns a pointer to the first character after the final delimiter or NULL
  * in the case of a non-terminated string.  The character array d is filled
  * with the processed string.
  */
-static const char *
-compile_delimited(const char *p, char *d, int is_tr)
+static char *
+compile_delimited(char *p, char *d, int is_tr)
 {
 	char c;
 
 	c = *p++;
 	if (c == '\0')
 		return (NULL);
 	else if (c == '\\')
 		errx(1, "%lu: %s: \\ can not be used as a string delimiter",
 				linenum, fname);
 	else if (c == '\n')
 		errx(1, "%lu: %s: newline can not be used as a string delimiter",
 				linenum, fname);
 	while (*p) {
 		if (*p == '[' && *p != c) {
 			if ((d = compile_ccl(&p, d)) == NULL)
 				errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname);
 			continue;
 		} else if (*p == '\\' && p[1] == '[') {
 			*d++ = *p++;
 		} else if (*p == '\\' && p[1] == c)
 			p++;
 		else if (*p == '\\' && p[1] == 'n') {
 			*d++ = '\n';
 			p += 2;
 			continue;
 		} else if (*p == '\\' && p[1] == '\\') {
 			if (is_tr)
 				p++;
 			else
 				*d++ = *p++;
 		} else if (*p == c) {
 			*d = '\0';
 			return (p + 1);
 		}
 		*d++ = *p++;
 	}
 	return (NULL);
 }
 
 
 /* compile_ccl: expand a POSIX character class */
 static char *
-compile_ccl(const char **sp, char *t)
+compile_ccl(char **sp, char *t)
 {
 	int c, d;
-	const char *s = *sp;
+	char *s = *sp;
 
 	*t++ = *s++;
 	if (*s == '^')
 		*t++ = *s++;
 	if (*s == ']')
 		*t++ = *s++;
 	for (; *s && (*t = *s) != ']'; s++, t++)
 		if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) {
 			*++t = *++s, t++, s++;
 			for (c = *s; (*t = *s) != ']' || c != d; s++, t++)
 				if ((c = *s) == '\0')
 					return NULL;
 		}
 	return (*s == ']') ? *sp = ++s, ++t : NULL;
 }
 
 /*
  * Compiles the regular expression in RE and returns a pointer to the compiled
  * regular expression.
  * Cflags are passed to regcomp.
  */
-static const regex_t *
-compile_re(const char *re, int case_insensitive)
+static regex_t *
+compile_re(char *re, int case_insensitive)
 {
 	regex_t *rep;
 	int eval, flags;
 
 
 	flags = rflags;
 	if (case_insensitive)
 		flags |= REG_ICASE;
 	if ((rep = malloc(sizeof(regex_t))) == NULL)
 		err(1, "malloc");
 	if ((eval = regcomp(rep, re, flags)) != 0)
 		errx(1, "%lu: %s: RE error: %s",
 				linenum, fname, strregerror(eval, rep));
 	if (maxnsub < rep->re_nsub)
 		maxnsub = rep->re_nsub;
 	return (rep);
 }
 
 /*
  * Compile the substitution string of a regular expression and set res to
  * point to a saved copy of it.  Nsub is the number of parenthesized regular
  * expressions.
  */
-static const char *
-compile_subst(const char *p, struct s_subst *s)
+static char *
+compile_subst(char *p, struct s_subst *s)
 {
+	static char lbuf[_POSIX2_LINE_MAX + 1];
 	int asize, size;
 	u_char ref;
 	char c, *text, *op, *sp;
-	int more = 0, sawesc = 0;
+	int more = 1, sawesc = 0;
 
 	c = *p++;			/* Terminator character */
 	if (c == '\0')
 		return (NULL);
 
 	s->maxbref = 0;
 	s->linenum = linenum;
 	asize = 2 * _POSIX2_LINE_MAX + 1;
 	if ((text = malloc(asize)) == NULL)
 		err(1, "malloc");
 	size = 0;
 	do {
 		op = sp = text + size;
-		for (; *p != '\0' && *p != '\n'; p++) {
+		for (; *p; p++) {
 			if (*p == '\\' || sawesc) {
 				/*
 				 * If this is a continuation from the last
 				 * buffer, we won't have a character to
 				 * skip over.
 				 */
 				if (sawesc)
 					sawesc = 0;
 				else
 					p++;
 
 				if (*p == '\0') {
 					/*
 					 * This escaped character is continued
 					 * in the next part of the line.  Note
 					 * this fact, then cause the loop to
 					 * exit w/ normal EOL case and reenter
 					 * above with the new buffer.
 					 */
 					sawesc = 1;
 					p--;
-					break;
-				} else if (*p == '\n') {
-					*sp++ = '\n';
-					break;
+					continue;
 				} else if (strchr("123456789", *p) != NULL) {
 					*sp++ = '\\';
 					ref = *p - '0';
 					if (s->re != NULL &&
 					    ref > s->re->re_nsub)
 						errx(1, "%lu: %s: \\%c not defined in the RE",
 								linenum, fname, *p);
 					if (s->maxbref < ref)
 						s->maxbref = ref;
 				} else if (*p == '&' || *p == '\\')
 					*sp++ = '\\';
 			} else if (*p == c) {
 				if (*++p == '\0' && more) {
-					const char *nextp;
-
-					nextp = cu_fgets(&more);
-					if (nextp != NULL)
-						p = nextp;
+					if (cu_fgets(lbuf, sizeof(lbuf), &more))
+						p = lbuf;
 				}
 				*sp++ = '\0';
 				size += sp - op;
 				if ((s->new = realloc(text, size)) == NULL)
 					err(1, "realloc");
 				return (p);
 			} else if (*p == '\n') {
 				errx(1,
 "%lu: %s: unescaped newline inside substitute pattern", linenum, fname);
 				/* NOTREACHED */
 			}
 			*sp++ = *p;
 		}
 		size += sp - op;
 		if (asize - size < _POSIX2_LINE_MAX + 1) {
 			asize *= 2;
 			if ((text = realloc(text, asize)) == NULL)
 				err(1, "realloc");
 		}
-	} while ((p = cu_fgets(&more)) != NULL);
+	} while (cu_fgets(p = lbuf, sizeof(lbuf), &more) != NULL);
 	errx(1, "%lu: %s: unterminated substitute in regular expression",
 			linenum, fname);
 	/* NOTREACHED */
 }
 
 /*
  * Compile the flags of the s command
  */
-static const char *
-compile_flags(const char *p, struct s_subst *s)
+static char *
+compile_flags(char *p, struct s_subst *s)
 {
 	int gn;			/* True if we have seen g or n */
 	unsigned long nval;
-	char *q;
+	char wfile[_POSIX2_LINE_MAX + 1], *q, *eq;
 
 	s->n = 1;				/* Default */
 	s->p = 0;
 	s->wfile = NULL;
 	s->wfd = -1;
 	s->icase = 0;
 	for (gn = 0;;) {
-		EATSPACEN();			/* EXTENSION */
+		EATSPACE();			/* EXTENSION */
 		switch (*p) {
 		case 'g':
 			if (gn)
 				errx(1,
 "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
 			gn = 1;
 			s->n = 0;
 			break;
 		case '\0':
 		case '\n':
 		case ';':
 			return (p);
 		case 'p':
 			s->p = 1;
 			break;
 		case 'i':
 		case 'I':
 			s->icase = 1;
 			break;
 		case '1': case '2': case '3':
 		case '4': case '5': case '6':
 		case '7': case '8': case '9':
 			if (gn)
 				errx(1,
 "%lu: %s: more than one number or 'g' in substitute flags", linenum, fname);
 			gn = 1;
 			errno = 0;
-			nval = strtol(p, &q, 10);
+			nval = strtol(p, &p, 10);
 			if (errno == ERANGE || nval > INT_MAX)
 				errx(1,
 "%lu: %s: overflow in the 'N' substitute flag", linenum, fname);
 			s->n = nval;
-			p = q;
-			continue;
+			p--;
+			break;
 		case 'w':
 			p++;
 #ifdef HISTORIC_PRACTICE
 			if (*p != ' ') {
 				warnx("%lu: %s: space missing before w wfile", linenum, fname);
 				return (p);
 			}
 #endif
 			EATSPACE();
-			s->wfile = duptoeol(p, "w flag", NULL);
-			if (!aflag && (s->wfd = open(s->wfile,
+			q = wfile;
+			eq = wfile + sizeof(wfile) - 1;
+			while (*p) {
+				if (*p == '\n')
+					break;
+				if (q >= eq)
+					err(1, "wfile too long");
+				*q++ = *p++;
+			}
+			*q = '\0';
+			if (q == wfile)
+				errx(1, "%lu: %s: no wfile specified", linenum, fname);
+			s->wfile = strdup(wfile);
+			if (!aflag && (s->wfd = open(wfile,
 			    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
 			    DEFFILEMODE)) == -1)
-				err(1, "%s", s->wfile);
+				err(1, "%s", wfile);
 			return (p);
 		default:
-			errx(1, "%lu: %s: bad flag in substitute command: '%c' (%.10s)",
-					linenum, fname, *p, p);
+			errx(1, "%lu: %s: bad flag in substitute command: '%c'",
+					linenum, fname, *p);
 			break;
 		}
 		p++;
 	}
 }
 
 /*
  * Compile a translation set of strings into a lookup table.
  */
-static const char *
-compile_tr(const char *p, struct s_tr **py)
+static char *
+compile_tr(char *p, struct s_tr **py)
 {
 	struct s_tr *y;
 	int i;
 	const char *op, *np;
 	char old[_POSIX2_LINE_MAX + 1];
 	char new[_POSIX2_LINE_MAX + 1];
 	size_t oclen, oldlen, nclen, newlen;
 	mbstate_t mbs1, mbs2;
 
 	if ((*py = y = malloc(sizeof(*y))) == NULL)
-		err(1, "malloc");
+		err(1, NULL);
 	y->multis = NULL;
 	y->nmultis = 0;
 
 	if (*p == '\0' || *p == '\\')
 		errx(1,
 	"%lu: %s: transform pattern can not be delimited by newline or backslash",
 			linenum, fname);
 	p = compile_delimited(p, old, 1);
 	if (p == NULL)
 		errx(1, "%lu: %s: unterminated transform source string",
 				linenum, fname);
 	p = compile_delimited(p - 1, new, 1);
 	if (p == NULL)
 		errx(1, "%lu: %s: unterminated transform target string",
 				linenum, fname);
 	EATSPACE();
 	op = old;
 	oldlen = mbsrtowcs(NULL, &op, 0, NULL);
 	if (oldlen == (size_t)-1)
-		err(1, "mbsrtowcs");
+		err(1, NULL);
 	np = new;
 	newlen = mbsrtowcs(NULL, &np, 0, NULL);
 	if (newlen == (size_t)-1)
-		err(1, "mbsrtowcs");
+		err(1, NULL);
 	if (newlen != oldlen)
 		errx(1, "%lu: %s: transform strings are not the same length",
 				linenum, fname);
 	if (MB_CUR_MAX == 1) {
 		/*
 		 * The single-byte encoding case is easy: generate a
 		 * lookup table.
 		 */
 		for (i = 0; i <= UCHAR_MAX; i++)
 			y->bytetab[i] = (char)i;
 		for (; *op; op++, np++)
 			y->bytetab[(u_char)*op] = *np;
 	} else {
 		/*
 		 * Multi-byte encoding case: generate a lookup table as
 		 * above, but only for single-byte characters. The first
 		 * bytes of multi-byte characters have their lookup table
 		 * entries set to 0, which causes do_tr() to search through
 		 * an auxiliary vector of multi-byte mappings.
 		 */
 		memset(&mbs1, 0, sizeof(mbs1));
 		memset(&mbs2, 0, sizeof(mbs2));
 		for (i = 0; i <= UCHAR_MAX; i++)
 			y->bytetab[i] = (btowc(i) != WEOF) ? i : 0;
 		while (*op != '\0') {
 			oclen = mbrlen(op, MB_LEN_MAX, &mbs1);
 			if (oclen == (size_t)-1 || oclen == (size_t)-2)
 				errc(1, EILSEQ, NULL);
 			nclen = mbrlen(np, MB_LEN_MAX, &mbs2);
 			if (nclen == (size_t)-1 || nclen == (size_t)-2)
 				errc(1, EILSEQ, NULL);
 			if (oclen == 1 && nclen == 1)
 				y->bytetab[(u_char)*op] = *np;
 			else {
 				y->bytetab[(u_char)*op] = 0;
 				y->multis = realloc(y->multis,
 				    (y->nmultis + 1) * sizeof(*y->multis));
 				if (y->multis == NULL)
-					err(1, "realloc");
+					err(1, NULL);
 				i = y->nmultis++;
 				y->multis[i].fromlen = oclen;
 				memcpy(y->multis[i].from, op, oclen);
 				y->multis[i].tolen = nclen;
 				memcpy(y->multis[i].to, np, nclen);
 			}
 			op += oclen;
 			np += nclen;
 		}
 	}
 	return (p);
 }
 
 /*
  * Compile the text following an a, c, or i command.
  */
 static char *
-compile_text(size_t *ptlen)
+compile_text(void)
 {
 	int asize, esc_nl, size;
-	char *text, *s;
-	const char *p, *op;
+	char *text, *p, *op, *s;
+	char lbuf[_POSIX2_LINE_MAX + 1];
 
 	asize = 2 * _POSIX2_LINE_MAX + 1;
 	if ((text = malloc(asize)) == NULL)
 		err(1, "malloc");
 	size = 0;
-	while ((p = cu_fgets(NULL)) != NULL) {
+	while (cu_fgets(lbuf, sizeof(lbuf), NULL) != NULL) {
 		op = s = text + size;
+		p = lbuf;
 		for (esc_nl = 0; *p != '\0'; p++) {
 			if (*p == '\\' && p[1] != '\0' && *++p == '\n')
 				esc_nl = 1;
 			*s++ = *p;
-			if (*p == '\n')
-				break;
 		}
 		size += s - op;
 		if (!esc_nl) {
 			*s = '\0';
 			break;
 		}
 		if (asize - size < _POSIX2_LINE_MAX + 1) {
 			asize *= 2;
 			if ((text = realloc(text, asize)) == NULL)
 				err(1, "realloc");
 		}
 	}
 	text[size] = '\0';
-	if ((text = realloc(text, size + 1)) == NULL)
+	if ((p = realloc(text, size + 1)) == NULL)
 		err(1, "realloc");
-	*ptlen = size;
-	return (text);
+	return (p);
 }
 
 /*
  * Get an address and return a pointer to the first character after
  * it.  Fill the structure pointed to according to the address.
  */
-static const char *
-compile_addr(const char *p, struct s_addr *a)
+static char *
+compile_addr(char *p, struct s_addr *a)
 {
 	char *end, re[_POSIX2_LINE_MAX + 1];
 	int icase;
 
 	icase = 0;
 
 	a->type = 0;
 	switch (*p) {
 	case '\\':				/* Context address */
 		++p;
 		/* FALLTHROUGH */
 	case '/':				/* Context address */
 		p = compile_delimited(p, re, 0);
 		if (p == NULL)
 			errx(1, "%lu: %s: unterminated regular expression", linenum, fname);
 		/* Check for case insensitive regexp flag */
 		if (*p == 'I') {
 			icase = 1;
 			p++;
 		}
 		if (*re == '\0')
 			a->u.r = NULL;
 		else
 			a->u.r = compile_re(re, icase);
 		a->type = AT_RE;
 		return (p);
 
 	case '$':				/* Last line */
 		a->type = AT_LAST;
 		return (p + 1);
 
 	case '+':				/* Relative line number */
 		a->type = AT_RELLINE;
 		p++;
 		/* FALLTHROUGH */
 						/* Line number */
 	case '0': case '1': case '2': case '3': case '4':
 	case '5': case '6': case '7': case '8': case '9':
 		if (a->type == 0)
 			a->type = AT_LINE;
 		a->u.l = strtol(p, &end, 10);
 		return (end);
 	default:
 		errx(1, "%lu: %s: expected context address", linenum, fname);
 		return (NULL);
 	}
 }
 
 /*
  * duptoeol --
  *	Return a copy of all the characters up to \n or \0.
  */
 static char *
-duptoeol(const char *s, const char *ctype, size_t *ptlen)
+duptoeol(char *s, const char *ctype)
 {
 	size_t len;
 	int ws;
-	char *p;
-	const char *start;
+	char *p, *start;
 
 	ws = 0;
 	for (start = s; *s != '\0' && *s != '\n'; ++s)
 		ws = isspace((unsigned char)*s);
+	*s = '\0';
 	if (ws)
 		warnx("%lu: %s: whitespace after %s", linenum, fname, ctype);
-	len = s - start;
-	if ((p = malloc(len + 1)) == NULL)
+	len = s - start + 1;
+	if ((p = malloc(len)) == NULL)
 		err(1, "malloc");
-	memmove(p, start, len);
-	p[len] = '\0';
-	if (ptlen != NULL)
-		*ptlen = len;
-	return p;
+	return (memmove(p, start, len));
 }
 
 /*
  * Convert goto label names to addresses, and count a and r commands, in
  * the given subset of the script.  Free the memory used by labels in b
  * and t commands (but not by :).
  *
  * TODO: Remove } nodes
  */
 static void
-fixuplabel(struct s_command *cp, const struct s_command *end)
+fixuplabel(struct s_command *cp, struct s_command *end)
 {
 
 	for (; cp != end; cp = cp->next)
 		switch (cp->code) {
 		case 'a':
 		case 'r':
 			appendnum++;
 			break;
 		case 'b':
 		case 't':
 			/* Resolve branch target. */
 			if (cp->t == NULL) {
 				cp->u.c = NULL;
 				break;
 			}
 			if ((cp->u.c = findlabel(cp->t)) == NULL)
-				errx(1, "%lu: %s: %c: undefined label '%s'", linenum, fname, cp->code, cp->t);
+				errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t);
 			free(cp->t);
 			break;
 		case '{':
 			/* Do interior commands. */
 			fixuplabel(cp->u.c, cp->next);
 			break;
 		}
 }
 
 /*
  * Associate the given command label for later lookup.
  */
 static void
 enterlabel(struct s_command *cp)
 {
 	struct labhash **lhp, *lh;
 	u_char *p;
 	u_int h, c;
 
 	for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++)
 		h = (h << 5) + h + c;
 	lhp = &labels[h & LHMASK];
 	for (lh = *lhp; lh != NULL; lh = lh->lh_next)
 		if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0)
 			errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t);
 	if ((lh = malloc(sizeof *lh)) == NULL)
 		err(1, "malloc");
 	lh->lh_next = *lhp;
 	lh->lh_hash = h;
 	lh->lh_cmd = cp;
 	lh->lh_ref = 0;
 	*lhp = lh;
 }
 
 /*
  * Find the label contained in the command l in the command linked
  * list cp.  L is excluded from the search.  Return NULL if not found.
  */
 static struct s_command *
-findlabel(const char *name)
+findlabel(char *name)
 {
 	struct labhash *lh;
-	const u_char *p;
+	u_char *p;
 	u_int h, c;
 
-	for (h = 0, p = (const u_char *)name; (c = *p) != 0; p++)
+	for (h = 0, p = (u_char *)name; (c = *p) != 0; p++)
 		h = (h << 5) + h + c;
 	for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) {
 		if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) {
 			lh->lh_ref = 1;
 			return (lh->lh_cmd);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Warn about any unused labels.  As a side effect, release the label hash
  * table space.
  */
 static void
 uselabel(void)
 {
 	struct labhash *lh, *next;
 	int i;
 
 	for (i = 0; i < LHSZ; i++) {
 		for (lh = labels[i]; lh != NULL; lh = next) {
 			next = lh->lh_next;
 			if (!lh->lh_ref)
 				warnx("%lu: %s: unused label '%s'",
 				    linenum, fname, lh->lh_cmd->t);
 			free(lh);
 		}
 	}
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/sed/defs.h
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/sed/defs.h	(revision 303666)
+++ user/alc/PQ_LAUNDRY/usr.bin/sed/defs.h	(revision 303667)
@@ -1,150 +1,149 @@
 /*-
  * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Diomidis Spinellis of Imperial College, University of London.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)defs.h	8.1 (Berkeley) 6/6/93
  * $FreeBSD$
  */
 
 /*
  * Types of address specifications
  */
 enum e_atype {
 	AT_RE	    = 1,			/* Line that match RE */
 	AT_LINE,				/* Specific line */
 	AT_RELLINE,				/* Relative line */
 	AT_LAST,				/* Last line */
 };
 
 /*
  * Format of an address
  */
 struct s_addr {
 	enum e_atype type;			/* Address type */
 	union {
 		u_long l;			/* Line number */
-		const regex_t *r;		/* Regular expression */
+		regex_t *r;			/* Regular expression */
 	} u;
 };
 
 /*
  * Substitution command
  */
 struct s_subst {
 	int n;					/* Occurrence to subst. */
 	int p;					/* True if p flag */
 	int icase;				/* True if I flag */
 	char *wfile;				/* NULL if no wfile */
 	int wfd;				/* Cached file descriptor */
-	const regex_t *re;			/* Regular expression */
+	regex_t *re;				/* Regular expression */
 	unsigned int maxbref;			/* Largest backreference. */
 	u_long linenum;				/* Line number. */
 	char *new;				/* Replacement text */
 };
 
 /*
  * Translate command.
  */
 struct s_tr {
 	unsigned char bytetab[256];
 	struct trmulti {
 		size_t fromlen;
 		char from[MB_LEN_MAX];
 		size_t tolen;
 		char to[MB_LEN_MAX];
 	} *multis;
 	int nmultis;
 };
 
 /*
  * An internally compiled command.
  * Initialy, label references are stored in t, on a second pass they
  * are updated to pointers.
  */
 struct s_command {
 	struct s_command *next;			/* Pointer to next command */
 	struct s_addr *a1, *a2;			/* Start and end address */
 	u_long startline;			/* Start line number or zero */
 	char *t;				/* Text for : a c i r w */
-	size_t tlen;
 	union {
 		struct s_command *c;		/* Command(s) for b t { */
 		struct s_subst *s;		/* Substitute command */
 		struct s_tr *y;			/* Replace command array */
 		int fd;				/* File descriptor for w */
 	} u;
 	char code;				/* Command code */
 	u_int nonsel:1;				/* True if ! */
 };
 
 /*
  * Types of command arguments recognised by the parser
  */
 enum e_args {
 	EMPTY,			/* d D g G h H l n N p P q x = \0 */
 	TEXT,			/* a c i */
 	NONSEL,			/* ! */
 	GROUP,			/* { */
 	ENDGROUP,		/* } */
 	COMMENT,		/* # */
 	BRANCH,			/* b t */
 	LABEL,			/* : */
 	RFILE,			/* r */
 	WFILE,			/* w */
 	SUBST,			/* s */
 	TR			/* y */
 };
 
 /*
  * Structure containing things to append before a line is read
  */
 struct s_appends {
 	enum {AP_STRING, AP_FILE} type;
 	char *s;
 	size_t len;
 };
 
 enum e_spflag {
 	APPEND,					/* Append to the contents. */
 	REPLACE,				/* Replace the contents. */
 };
 
 /*
  * Structure for a space (process, hold, otherwise).
  */
 typedef struct {
 	char *space;		/* Current space pointer. */
 	size_t len;		/* Current length. */
 	int deleted;		/* If deleted. */
 	int append_newline;	/* If originally terminated by \n. */
 	char *back;		/* Backing memory. */
 	size_t blen;		/* Backing memory length. */
 } SPACE;
Index: user/alc/PQ_LAUNDRY/usr.bin/sed/extern.h
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/sed/extern.h	(revision 303666)
+++ user/alc/PQ_LAUNDRY/usr.bin/sed/extern.h	(revision 303667)
@@ -1,56 +1,56 @@
 /*-
  * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Diomidis Spinellis of Imperial College, University of London.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)extern.h	8.1 (Berkeley) 6/6/93
  * $FreeBSD$
  */
 
 extern struct s_command *prog;
 extern struct s_appends *appends;
 extern regmatch_t *match;
 extern size_t maxnsub;
 extern u_long linenum;
 extern int appendnum;
 extern int aflag, eflag, nflag;
 extern const char *fname, *outfname;
 extern FILE *infile, *outfile;
 extern int rflags;	/* regex flags to use */
 
-void	 cfclose(struct s_command *, const struct s_command *);
+void	 cfclose(struct s_command *, struct s_command *);
 void	 compile(void);
 void	 cspace(SPACE *, const char *, size_t, enum e_spflag);
-const char *cu_fgets(int *);
+char	*cu_fgets(char *, int, int *);
 int	 mf_fgets(SPACE *, enum e_spflag);
 int	 lastline(void);
 void	 process(void);
 void	 resetstate(void);
-char	*strregerror(int, const regex_t *);
+char	*strregerror(int, regex_t *);
Index: user/alc/PQ_LAUNDRY/usr.bin/sed/main.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/sed/main.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/usr.bin/sed/main.c	(revision 303667)
@@ -1,532 +1,542 @@
 /*-
  * Copyright (c) 2013 Johann 'Myrkraverk' Oskarsson.
  * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Diomidis Spinellis of Imperial College, University of London.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1992, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif
 
 #ifndef lint
 static const char sccsid[] = "@(#)main.c	8.2 (Berkeley) 1/3/94";
 #endif
 
 #include <sys/types.h>
 #include <sys/mman.h>
 #include <sys/param.h>
 #include <sys/stat.h>
 
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <libgen.h>
 #include <limits.h>
 #include <locale.h>
 #include <regex.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "defs.h"
 #include "extern.h"
 
 /*
  * Linked list of units (strings and files) to be compiled
  */
 struct s_compunit {
 	struct s_compunit *next;
 	enum e_cut {CU_FILE, CU_STRING} type;
-	const char *s;			/* Pointer to string or fname */
+	char *s;			/* Pointer to string or fname */
 };
 
 /*
  * Linked list pointer to compilation units and pointer to current
  * next pointer.
  */
 static struct s_compunit *script, **cu_nextp = &script;
 
 /*
  * Linked list of files to be processed
  */
 struct s_flist {
-	const char *fname;
+	char *fname;
 	struct s_flist *next;
 };
 
 /*
  * Linked list pointer to files and pointer to current
  * next pointer.
  */
 static struct s_flist *files, **fl_nextp = &files;
 
 FILE *infile;			/* Current input file */
 FILE *outfile;			/* Current output file */
 
 int aflag, eflag, nflag;
 int rflags = 0;
 static int rval;		/* Exit status */
 
 static int ispan;		/* Whether inplace editing spans across files */
 
 /*
  * Current file and line number; line numbers restart across compilation
  * units, but span across input files.  The latter is optional if editing
  * in place.
  */
 const char *fname;		/* File name. */
 const char *outfname;		/* Output file name */
 static char oldfname[PATH_MAX];	/* Old file name (for in-place editing) */
 static char tmpfname[PATH_MAX];	/* Temporary file name (for in-place editing) */
 static const char *inplace;	/* Inplace edit file extension. */
 u_long linenum;
 
-static void add_compunit(enum e_cut, const char *);
-static void add_file(const char *);
+static void add_compunit(enum e_cut, char *);
+static void add_file(char *);
 static void usage(void);
 
 int
 main(int argc, char *argv[])
 {
-	char *temp_arg;
 	int c, fflag;
+	char *temp_arg;
 
 	(void) setlocale(LC_ALL, "");
 
 	fflag = 0;
 	inplace = NULL;
 
 	while ((c = getopt(argc, argv, "EI:ae:f:i:lnru")) != -1)
 		switch (c) {
 		case 'r':		/* Gnu sed compat */
 		case 'E':
 			rflags = REG_EXTENDED;
 			break;
 		case 'I':
 			inplace = optarg;
 			ispan = 1;	/* span across input files */
 			break;
 		case 'a':
 			aflag = 1;
 			break;
 		case 'e':
 			eflag = 1;
-			asprintf(&temp_arg, "%s\n", optarg);
-			if (temp_arg == NULL)
-				errx(1, "Couldn't allocate temporary buffer");
+			if ((temp_arg = malloc(strlen(optarg) + 2)) == NULL)
+				err(1, "malloc");
+			strcpy(temp_arg, optarg);
+			strcat(temp_arg, "\n");
 			add_compunit(CU_STRING, temp_arg);
 			break;
 		case 'f':
 			fflag = 1;
 			add_compunit(CU_FILE, optarg);
 			break;
 		case 'i':
 			inplace = optarg;
 			ispan = 0;	/* don't span across input files */
 			break;
 		case 'l':
 			if(setvbuf(stdout, NULL, _IOLBF, 0) != 0)
 				warnx("setting line buffered output failed");
 			break;
 		case 'n':
 			nflag = 1;
 			break;
 		case 'u':
 			if(setvbuf(stdout, NULL, _IONBF, 0) != 0)
 				warnx("setting unbuffered output failed");
 			break;
 		default:
 		case '?':
 			usage();
 		}
 	argc -= optind;
 	argv += optind;
 
 	/* First usage case; script is the first arg */
 	if (!eflag && !fflag && *argv) {
 		add_compunit(CU_STRING, *argv);
 		argv++;
 	}
 
 	compile();
 
 	/* Continue with first and start second usage */
 	if (*argv)
 		for (; *argv; argv++)
 			add_file(*argv);
 	else
 		add_file(NULL);
 	process();
 	cfclose(prog, NULL);
 	if (fclose(stdout))
 		err(1, "stdout");
 	exit(rval);
 }
 
 static void
 usage(void)
 {
 	(void)fprintf(stderr,
 	    "usage: %s script [-Ealnru] [-i extension] [file ...]\n"
 	    "\t%s [-Ealnu] [-i extension] [-e script] ... [-f script_file]"
 	    " ... [file ...]\n", getprogname(), getprogname());
 	exit(1);
 }
 
 /*
  * Like fgets, but go through the chain of compilation units chaining them
  * together.  Empty strings and files are ignored.
  */
-const char *
-cu_fgets(int *more)
+char *
+cu_fgets(char *buf, int n, int *more)
 {
 	static enum {ST_EOF, ST_FILE, ST_STRING} state = ST_EOF;
 	static FILE *f;		/* Current open file */
-	static const char *s;	/* Current pointer inside string */
-	static char string_ident[30], *lastresult;
-	static size_t lastsize;
+	static char *s;		/* Current pointer inside string */
+	static char string_ident[30];
 	char *p;
-	const char *start;
 
 again:
 	switch (state) {
 	case ST_EOF:
 		if (script == NULL) {
 			if (more != NULL)
 				*more = 0;
 			return (NULL);
 		}
 		linenum = 0;
 		switch (script->type) {
 		case CU_FILE:
 			if ((f = fopen(script->s, "r")) == NULL)
 				err(1, "%s", script->s);
 			fname = script->s;
 			state = ST_FILE;
 			goto again;
 		case CU_STRING:
 			if (((size_t)snprintf(string_ident,
 			    sizeof(string_ident), "\"%s\"", script->s)) >=
 			    sizeof(string_ident) - 1)
 				(void)strcpy(string_ident +
 				    sizeof(string_ident) - 6, " ...\"");
 			fname = string_ident;
 			s = script->s;
 			state = ST_STRING;
 			goto again;
 		}
 	case ST_FILE:
-		p = lastresult;
-		if (getline(&p, &lastsize, f) != -1) {
+		if ((p = fgets(buf, n, f)) != NULL) {
 			linenum++;
-			if (linenum == 1 && p[0] == '#' && p[1] == 'n')
+			if (linenum == 1 && buf[0] == '#' && buf[1] == 'n')
 				nflag = 1;
 			if (more != NULL)
 				*more = !feof(f);
-			return (lastresult = p);
-		} else if (ferror(f))
-			err(1, "%s", script->s);
+			return (p);
+		}
 		script = script->next;
 		(void)fclose(f);
 		state = ST_EOF;
 		goto again;
 	case ST_STRING:
 		if (linenum == 0 && s[0] == '#' && s[1] == 'n')
 			nflag = 1;
-		else if (s[0] == '\0') {
-			state = ST_EOF;
-			script = script->next;
-			goto again;
-		}
-		start = s;
+		p = buf;
 		for (;;) {
+			if (n-- <= 1) {
+				*p = '\0';
+				linenum++;
+				if (more != NULL)
+					*more = 1;
+				return (buf);
+			}
 			switch (*s) {
 			case '\0':
 				state = ST_EOF;
-				script = script->next;
-				/* FALLTHROUGH */
+				if (s == script->s) {
+					script = script->next;
+					goto again;
+				} else {
+					script = script->next;
+					*p = '\0';
+					linenum++;
+					if (more != NULL)
+						*more = 0;
+					return (buf);
+				}
 			case '\n':
+				*p++ = '\n';
+				*p = '\0';
 				s++;
 				linenum++;
 				if (more != NULL)
 					*more = 0;
-				return (start);
+				return (buf);
 			default:
-				s++;
+				*p++ = *s++;
 			}
 		}
 	}
 	/* NOTREACHED */
 	return (NULL);
 }
 
 /*
  * Like fgets, but go through the list of files chaining them together.
  * Set len to the length of the line.
  */
 int
 mf_fgets(SPACE *sp, enum e_spflag spflag)
 {
 	struct stat sb;
 	ssize_t len;
 	char *dirbuf, *basebuf;
 	static char *p = NULL;
 	static size_t plen = 0;
 	int c;
 	static int firstfile;
 
 	if (infile == NULL) {
 		/* stdin? */
 		if (files->fname == NULL) {
 			if (inplace != NULL)
 				errx(1, "-I or -i may not be used with stdin");
 			infile = stdin;
 			fname = "stdin";
 			outfile = stdout;
 			outfname = "stdout";
 		}
 		firstfile = 1;
 	}
 
 	for (;;) {
 		if (infile != NULL && (c = getc(infile)) != EOF) {
 			(void)ungetc(c, infile);
 			break;
 		}
 		/* If we are here then either eof or no files are open yet */
 		if (infile == stdin) {
 			sp->len = 0;
 			return (0);
 		}
 		if (infile != NULL) {
 			fclose(infile);
 			if (*oldfname != '\0') {
 				/* if there was a backup file, remove it */
 				unlink(oldfname);
 				/*
 				 * Backup the original.  Note that hard links
 				 * are not supported on all filesystems.
 				 */
 				if ((link(fname, oldfname) != 0) &&
 				   (rename(fname, oldfname) != 0)) {
 					warn("rename()");
 					if (*tmpfname)
 						unlink(tmpfname);
 					exit(1);
 				}
 				*oldfname = '\0';
 			}
 			if (*tmpfname != '\0') {
 				if (outfile != NULL && outfile != stdout)
 					if (fclose(outfile) != 0) {
 						warn("fclose()");
 						unlink(tmpfname);
 						exit(1);
 					}
 				outfile = NULL;
 				if (rename(tmpfname, fname) != 0) {
 					/* this should not happen really! */
 					warn("rename()");
 					unlink(tmpfname);
 					exit(1);
 				}
 				*tmpfname = '\0';
 			}
 			outfname = NULL;
 		}
 		if (firstfile == 0)
 			files = files->next;
 		else
 			firstfile = 0;
 		if (files == NULL) {
 			sp->len = 0;
 			return (0);
 		}
 		fname = files->fname;
 		if (inplace != NULL) {
 			if (lstat(fname, &sb) != 0)
 				err(1, "%s", fname);
 			if (!(sb.st_mode & S_IFREG))
 				errx(1, "%s: %s %s", fname,
 				    "in-place editing only",
 				    "works for regular files");
 			if (*inplace != '\0') {
 				strlcpy(oldfname, fname,
 				    sizeof(oldfname));
 				len = strlcat(oldfname, inplace,
 				    sizeof(oldfname));
-				if ((size_t)len > sizeof(oldfname))
+				if (len > (ssize_t)sizeof(oldfname))
 					errx(1, "%s: name too long", fname);
 			}
 			if ((dirbuf = strdup(fname)) == NULL ||
 			    (basebuf = strdup(fname)) == NULL)
 				err(1, "strdup");
 			len = snprintf(tmpfname, sizeof(tmpfname),
 			    "%s/.!%ld!%s", dirname(dirbuf), (long)getpid(),
 			    basename(basebuf));
 			free(dirbuf);
 			free(basebuf);
-			if ((size_t)len >= sizeof(tmpfname))
+			if (len >= (ssize_t)sizeof(tmpfname))
 				errx(1, "%s: name too long", fname);
 			unlink(tmpfname);
 			if (outfile != NULL && outfile != stdout)
 				fclose(outfile);
 			if ((outfile = fopen(tmpfname, "w")) == NULL)
 				err(1, "%s", fname);
 			fchown(fileno(outfile), sb.st_uid, sb.st_gid);
 			fchmod(fileno(outfile), sb.st_mode & ALLPERMS);
 			outfname = tmpfname;
 			if (!ispan) {
 				linenum = 0;
 				resetstate();
 			}
 		} else {
 			outfile = stdout;
 			outfname = "stdout";
 		}
 		if ((infile = fopen(fname, "r")) == NULL) {
 			warn("%s", fname);
 			rval = 1;
 			continue;
 		}
 	}
 	/*
 	 * We are here only when infile is open and we still have something
 	 * to read from it.
 	 *
 	 * Use getline() so that we can handle essentially infinite input
 	 * data.  The p and plen are static so each invocation gives
 	 * getline() the same buffer which is expanded as needed.
 	 */
 	len = getline(&p, &plen, infile);
 	if (len == -1)
 		err(1, "%s", fname);
 	if (len != 0 && p[len - 1] == '\n') {
 		sp->append_newline = 1;
 		len--;
 	} else if (!lastline()) {
 		sp->append_newline = 1;
 	} else {
 		sp->append_newline = 0;
 	}
 	cspace(sp, p, len, spflag);
 
 	linenum++;
 
 	return (1);
 }
 
 /*
  * Add a compilation unit to the linked list
  */
 static void
-add_compunit(enum e_cut type, const char *s)
+add_compunit(enum e_cut type, char *s)
 {
 	struct s_compunit *cu;
 
 	if ((cu = malloc(sizeof(struct s_compunit))) == NULL)
 		err(1, "malloc");
 	cu->type = type;
 	cu->s = s;
 	cu->next = NULL;
 	*cu_nextp = cu;
 	cu_nextp = &cu->next;
 }
 
 /*
  * Add a file to the linked list
  */
 static void
-add_file(const char *s)
+add_file(char *s)
 {
 	struct s_flist *fp;
 
 	if ((fp = malloc(sizeof(struct s_flist))) == NULL)
 		err(1, "malloc");
 	fp->next = NULL;
 	*fl_nextp = fp;
 	fp->fname = s;
 	fl_nextp = &fp->next;
 }
 
 static int
 next_files_have_lines(void)
 {
 	struct s_flist *file;
 	FILE *file_fd;
 	int ch;
 
 	file = files;
 	while ((file = file->next) != NULL) {
 		if ((file_fd = fopen(file->fname, "r")) == NULL)
 			continue;
 
 		if ((ch = getc(file_fd)) != EOF) {
 			/*
 			 * This next file has content, therefore current
 			 * file doesn't contains the last line.
 			 */
 			ungetc(ch, file_fd);
 			fclose(file_fd);
 			return (1);
 		}
 
 		fclose(file_fd);
 	}
 
 	return (0);
 }
 
 int
 lastline(void)
 {
 	int ch;
 
 	if (feof(infile)) {
 		return !(
 		    (inplace == NULL || ispan) &&
 		    next_files_have_lines());
 	}
 	if ((ch = getc(infile)) == EOF) {
 		return !(
 		    (inplace == NULL || ispan) &&
 		    next_files_have_lines());
 	}
 	ungetc(ch, infile);
 	return (0);
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/sed/misc.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/sed/misc.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/usr.bin/sed/misc.c	(revision 303667)
@@ -1,69 +1,71 @@
 /*-
  * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Diomidis Spinellis of Imperial College, University of London.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #ifndef lint
 static const char sccsid[] = "@(#)misc.c	8.1 (Berkeley) 6/6/93";
 #endif
 
 #include <sys/types.h>
 
 #include <err.h>
 #include <limits.h>
 #include <regex.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #include "defs.h"
 #include "extern.h"
 
 /*
  * Return a string for a regular expression error passed.  This is overkill,
  * because of the silly semantics of regerror (we can never know the size of
  * the buffer).
  */
 char *
-strregerror(int errcode, const regex_t *preg)
+strregerror(int errcode, regex_t *preg)
 {
 	static char *oe;
 	size_t s;
 
+	if (oe != NULL)
+		free(oe);
 	s = regerror(errcode, preg, NULL, 0);
-	if ((oe = realloc(oe, s)) == NULL)
-		err(1, "realloc");
+	if ((oe = malloc(s)) == NULL)
+		err(1, "malloc");
 	(void)regerror(errcode, preg, oe, s);
 	return (oe);
 }
Index: user/alc/PQ_LAUNDRY/usr.bin/sed/process.c
===================================================================
--- user/alc/PQ_LAUNDRY/usr.bin/sed/process.c	(revision 303666)
+++ user/alc/PQ_LAUNDRY/usr.bin/sed/process.c	(revision 303667)
@@ -1,785 +1,785 @@
 /*-
  * Copyright (c) 1992 Diomidis Spinellis.
  * Copyright (c) 1992, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Diomidis Spinellis of Imperial College, University of London.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #ifndef lint
 static const char sccsid[] = "@(#)process.c	8.6 (Berkeley) 4/20/94";
 #endif
 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 #include <sys/uio.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
 #include <regex.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <wchar.h>
 #include <wctype.h>
 
 #include "defs.h"
 #include "extern.h"
 
 static SPACE HS, PS, SS, YS;
 #define	pd		PS.deleted
 #define	ps		PS.space
 #define	psl		PS.len
 #define	psanl		PS.append_newline
 #define	hs		HS.space
 #define	hsl		HS.len
 
-static inline int	applies(struct s_command *);
-static void		do_tr(const struct s_tr *);
-static void		flush_appends(void);
-static void		lputs(const char *, size_t);
-static int		regexec_e(const regex_t *, const char *, int, int,
-			    size_t, size_t);
-static void		regsub(SPACE *, const char *, const char *);
-static int		substitute(const struct s_command *);
+static inline int	 applies(struct s_command *);
+static void		 do_tr(struct s_tr *);
+static void		 flush_appends(void);
+static void		 lputs(char *, size_t);
+static int		 regexec_e(regex_t *, const char *, int, int, size_t,
+			     size_t);
+static void		 regsub(SPACE *, char *, char *);
+static int		 substitute(struct s_command *);
 
 struct s_appends *appends;	/* Array of pointers to strings to append. */
 static int appendx;		/* Index into appends array. */
 int appendnum;			/* Size of appends array. */
 
 static int lastaddr;		/* Set by applies if last address of a range. */
 static int sdone;		/* If any substitutes since last line input. */
 				/* Iov structure for 'w' commands. */
-static const regex_t *defpreg;
+static regex_t *defpreg;
 size_t maxnsub;
 regmatch_t *match;
 
 #define OUT() do {							\
 	fwrite(ps, 1, psl, outfile);					\
 	if (psanl) fputc('\n', outfile);				\
 } while (0)
 
 void
 process(void)
 {
 	struct s_command *cp;
 	SPACE tspace;
 	size_t oldpsl;
 	char *p;
 	int oldpsanl;
 
 	p = NULL;
 	oldpsanl = oldpsl = 0;
 
 	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
 		pd = 0;
 top:
 		cp = prog;
 redirect:
 		while (cp != NULL) {
 			if (!applies(cp)) {
 				cp = cp->next;
 				continue;
 			}
 			switch (cp->code) {
 			case '{':
 				cp = cp->u.c;
 				goto redirect;
 			case 'a':
 				if (appendx >= appendnum)
 					if ((appends = realloc(appends,
 					    sizeof(struct s_appends) *
 					    (appendnum *= 2))) == NULL)
 						err(1, "realloc");
 				appends[appendx].type = AP_STRING;
 				appends[appendx].s = cp->t;
 				appends[appendx].len = strlen(cp->t);
 				appendx++;
 				break;
 			case 'b':
 				cp = cp->u.c;
 				goto redirect;
 			case 'c':
 				pd = 1;
 				psl = 0;
 				if (cp->a2 == NULL || lastaddr || lastline())
 					(void)fprintf(outfile, "%s", cp->t);
 				break;
 			case 'd':
 				pd = 1;
 				goto new;
 			case 'D':
 				if (pd)
 					goto new;
 				if (psl == 0 ||
 				    (p = memchr(ps, '\n', psl)) == NULL) {
 					pd = 1;
 					goto new;
 				} else {
 					psl -= (p + 1) - ps;
 					memmove(ps, p + 1, psl);
 					goto top;
 				}
 			case 'g':
 				cspace(&PS, hs, hsl, REPLACE);
 				break;
 			case 'G':
 				cspace(&PS, "\n", 1, APPEND);
 				cspace(&PS, hs, hsl, APPEND);
 				break;
 			case 'h':
 				cspace(&HS, ps, psl, REPLACE);
 				break;
 			case 'H':
 				cspace(&HS, "\n", 1, APPEND);
 				cspace(&HS, ps, psl, APPEND);
 				break;
 			case 'i':
 				(void)fprintf(outfile, "%s", cp->t);
 				break;
 			case 'l':
 				lputs(ps, psl);
 				break;
 			case 'n':
 				if (!nflag && !pd)
 					OUT();
 				flush_appends();
 				if (!mf_fgets(&PS, REPLACE))
 					exit(0);
 				pd = 0;
 				break;
 			case 'N':
 				flush_appends();
 				cspace(&PS, "\n", 1, APPEND);
 				if (!mf_fgets(&PS, APPEND))
 					exit(0);
 				break;
 			case 'p':
 				if (pd)
 					break;
 				OUT();
 				break;
 			case 'P':
 				if (pd)
 					break;
 				if ((p = memchr(ps, '\n', psl)) != NULL) {
 					oldpsl = psl;
 					oldpsanl = psanl;
 					psl = p - ps;
 					psanl = 1;
 				}
 				OUT();
 				if (p != NULL) {
 					psl = oldpsl;
 					psanl = oldpsanl;
 				}
 				break;
 			case 'q':
 				if (!nflag && !pd)
 					OUT();
 				flush_appends();
 				exit(0);
 			case 'r':
 				if (appendx >= appendnum)
 					if ((appends = realloc(appends,
 					    sizeof(struct s_appends) *
 					    (appendnum *= 2))) == NULL)
 						err(1, "realloc");
 				appends[appendx].type = AP_FILE;
 				appends[appendx].s = cp->t;
 				appends[appendx].len = strlen(cp->t);
 				appendx++;
 				break;
 			case 's':
 				sdone |= substitute(cp);
 				break;
 			case 't':
 				if (sdone) {
 					sdone = 0;
 					cp = cp->u.c;
 					goto redirect;
 				}
 				break;
 			case 'w':
 				if (pd)
 					break;
 				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
 				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
 				    DEFFILEMODE)) == -1)
 					err(1, "%s", cp->t);
 				if (write(cp->u.fd, ps, psl) != (ssize_t)psl ||
 				    write(cp->u.fd, "\n", 1) != 1)
 					err(1, "%s", cp->t);
 				break;
 			case 'x':
 				/*
 				 * If the hold space is null, make it empty
 				 * but not null.  Otherwise the pattern space
 				 * will become null after the swap, which is
 				 * an abnormal condition.
 				 */
 				if (hs == NULL)
 					cspace(&HS, "", 0, REPLACE);
 				tspace = PS;
 				PS = HS;
 				psanl = tspace.append_newline;
 				HS = tspace;
 				break;
 			case 'y':
 				if (pd || psl == 0)
 					break;
 				do_tr(cp->u.y);
 				break;
 			case ':':
 			case '}':
 				break;
 			case '=':
 				(void)fprintf(outfile, "%lu\n", linenum);
 			}
 			cp = cp->next;
 		} /* for all cp */
 
 new:		if (!nflag && !pd)
 			OUT();
 		flush_appends();
 	} /* for all lines */
 }
 
 /*
  * TRUE if the address passed matches the current program state
  * (lastline, linenumber, ps).
  */
 #define	MATCH(a)							\
 	((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) :	\
 	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
 
 /*
  * Return TRUE if the command applies to the current line.  Sets the start
  * line for process ranges.  Interprets the non-select (``!'') flag.
  */
 static inline int
 applies(struct s_command *cp)
 {
 	int r;
 
 	lastaddr = 0;
 	if (cp->a1 == NULL && cp->a2 == NULL)
 		r = 1;
 	else if (cp->a2)
 		if (cp->startline > 0) {
                         switch (cp->a2->type) {
                         case AT_RELLINE:
                                 if (linenum - cp->startline <= cp->a2->u.l)
                                         r = 1;
                                 else {
 				        cp->startline = 0;
 				        r = 0;
                                 }
                                 break;
                         default:
                                 if (MATCH(cp->a2)) {
                                         cp->startline = 0;
                                         lastaddr = 1;
                                         r = 1;
                                 } else if (cp->a2->type == AT_LINE &&
                                             linenum > cp->a2->u.l) {
                                         /*
                                          * We missed the 2nd address due to a
                                          * branch, so just close the range and
                                          * return false.
                                          */
                                         cp->startline = 0;
                                         r = 0;
                                 } else
                                         r = 1;
                         }
 		} else if (cp->a1 && MATCH(cp->a1)) {
 			/*
 			 * If the second address is a number less than or
 			 * equal to the line number first selected, only
 			 * one line shall be selected.
 			 *	-- POSIX 1003.2
 			 * Likewise if the relative second line address is zero.
 			 */
 			if ((cp->a2->type == AT_LINE &&
 			    linenum >= cp->a2->u.l) ||
 			    (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0))
 				lastaddr = 1;
 			else {
 				cp->startline = linenum;
 			}
 			r = 1;
 		} else
 			r = 0;
 	else
 		r = MATCH(cp->a1);
 	return (cp->nonsel ? ! r : r);
 }
 
 /*
  * Reset the sed processor to its initial state.
  */
 void
 resetstate(void)
 {
 	struct s_command *cp;
 
 	/*
 	 * Reset all in-range markers.
 	 */
 	for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
 		if (cp->a2)
 			cp->startline = 0;
 
 	/*
 	 * Clear out the hold space.
 	 */
 	cspace(&HS, "", 0, REPLACE);
 }
 
 /*
  * substitute --
  *	Do substitutions in the pattern space.  Currently, we build a
  *	copy of the new pattern space in the substitute space structure
  *	and then swap them.
  */
 static int
-substitute(const struct s_command *cp)
+substitute(struct s_command *cp)
 {
 	SPACE tspace;
-	const regex_t *re;
+	regex_t *re;
 	regoff_t slen;
 	int lastempty, n;
-	regoff_t le = 0;
+	size_t le = 0;
 	char *s;
 
 	s = ps;
 	re = cp->u.s->re;
 	if (re == NULL) {
 		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
 			linenum = cp->u.s->linenum;
 			errx(1, "%lu: %s: \\%u not defined in the RE",
 					linenum, fname, cp->u.s->maxbref);
 		}
 	}
 	if (!regexec_e(re, ps, 0, 0, 0, psl))
 		return (0);
 
 	SS.len = 0;				/* Clean substitute space. */
 	slen = psl;
 	n = cp->u.s->n;
 	lastempty = 1;
 
 	do {
 		/* Copy the leading retained string. */
 		if (n <= 1 && (match[0].rm_so > le))
 			cspace(&SS, s, match[0].rm_so - le, APPEND);
 
 		/* Skip zero-length matches right after other matches. */
 		if (lastempty || (match[0].rm_so - le) ||
 		    match[0].rm_so != match[0].rm_eo) {
 			if (n <= 1) {
 				/* Want this match: append replacement. */
 				regsub(&SS, ps, cp->u.s->new);
 				if (n == 1)
 					n = -1;
 			} else {
 				/* Want a later match: append original. */
 				if (match[0].rm_eo - le)
 					cspace(&SS, s, match[0].rm_eo - le,
 					    APPEND);
 				n--;
 			}
 		}
 
 		/* Move past this match. */
 		s = ps + match[0].rm_eo;
 		slen = psl - match[0].rm_eo;
 		le = match[0].rm_eo;
 
 		/*
 		 * After a zero-length match, advance one byte,
 		 * and at the end of the line, terminate.
 		 */
 		if (match[0].rm_so == match[0].rm_eo) {
 			if (*s == '\0' || *s == '\n')
 				slen = -1;
 			else
 				slen--;
 			if (*s != '\0') {
 			 	cspace(&SS, s++, 1, APPEND);
 				le++;
 			}
 			lastempty = 1;
 		} else
 			lastempty = 0;
 
 	} while (n >= 0 && slen >= 0 &&
 	    regexec_e(re, ps, REG_NOTBOL, 0, le, psl));
 
 	/* Did not find the requested number of matches. */
 	if (n > 0)
 		return (0);
 
 	/* Copy the trailing retained string. */
 	if (slen > 0)
 		cspace(&SS, s, slen, APPEND);
 
 	/*
 	 * Swap the substitute space and the pattern space, and make sure
 	 * that any leftover pointers into stdio memory get lost.
 	 */
 	tspace = PS;
 	PS = SS;
 	psanl = tspace.append_newline;
 	SS = tspace;
 	SS.space = SS.back;
 
 	/* Handle the 'p' flag. */
 	if (cp->u.s->p)
 		OUT();
 
 	/* Handle the 'w' flag. */
 	if (cp->u.s->wfile && !pd) {
 		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
 		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
 			err(1, "%s", cp->u.s->wfile);
 		if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl ||
 		    write(cp->u.s->wfd, "\n", 1) != 1)
 			err(1, "%s", cp->u.s->wfile);
 	}
 	return (1);
 }
 
 /*
  * do_tr --
  *	Perform translation ('y' command) in the pattern space.
  */
 static void
-do_tr(const struct s_tr *y)
+do_tr(struct s_tr *y)
 {
 	SPACE tmp;
 	char c, *p;
 	size_t clen, left;
 	int i;
 
 	if (MB_CUR_MAX == 1) {
 		/*
 		 * Single-byte encoding: perform in-place translation
 		 * of the pattern space.
 		 */
 		for (p = ps; p < &ps[psl]; p++)
 			*p = y->bytetab[(u_char)*p];
 	} else {
 		/*
 		 * Multi-byte encoding: perform translation into the
 		 * translation space, then swap the translation and
 		 * pattern spaces.
 		 */
 		/* Clean translation space. */
 		YS.len = 0;
 		for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
 			if ((c = y->bytetab[(u_char)*p]) != '\0') {
 				cspace(&YS, &c, 1, APPEND);
 				clen = 1;
 				continue;
 			}
 			for (i = 0; i < y->nmultis; i++)
 				if (left >= y->multis[i].fromlen &&
 				    memcmp(p, y->multis[i].from,
 				    y->multis[i].fromlen) == 0)
 					break;
 			if (i < y->nmultis) {
 				cspace(&YS, y->multis[i].to,
 				    y->multis[i].tolen, APPEND);
 				clen = y->multis[i].fromlen;
 			} else {
 				cspace(&YS, p, 1, APPEND);
 				clen = 1;
 			}
 		}
 		/* Swap the translation space and the pattern space. */
 		tmp = PS;
 		PS = YS;
 		psanl = tmp.append_newline;
 		YS = tmp;
 		YS.space = YS.back;
 	}
 }
 
 /*
  * Flush append requests.  Always called before reading a line,
  * therefore it also resets the substitution done (sdone) flag.
  */
 static void
 flush_appends(void)
 {
 	FILE *f;
 	int count, i;
 	char buf[8 * 1024];
 
 	for (i = 0; i < appendx; i++)
 		switch (appends[i].type) {
 		case AP_STRING:
 			fwrite(appends[i].s, sizeof(char), appends[i].len,
 			    outfile);
 			break;
 		case AP_FILE:
 			/*
 			 * Read files probably shouldn't be cached.  Since
 			 * it's not an error to read a non-existent file,
 			 * it's possible that another program is interacting
 			 * with the sed script through the filesystem.  It
 			 * would be truly bizarre, but possible.  It's probably
 			 * not that big a performance win, anyhow.
 			 */
 			if ((f = fopen(appends[i].s, "r")) == NULL)
 				break;
 			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
 				(void)fwrite(buf, sizeof(char), count, outfile);
 			(void)fclose(f);
 			break;
 		}
 	if (ferror(outfile))
 		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
 	appendx = sdone = 0;
 }
 
 static void
-lputs(const char *s, size_t len)
+lputs(char *s, size_t len)
 {
 	static const char escapes[] = "\\\a\b\f\r\t\v";
 	int c, col, width;
 	const char *p;
 	struct winsize win;
 	static int termwidth = -1;
 	size_t clen, i;
 	wchar_t wc;
 	mbstate_t mbs;
 
 	if (outfile != stdout)
 		termwidth = 60;
 	if (termwidth == -1) {
 		if ((p = getenv("COLUMNS")) && *p != '\0')
 			termwidth = atoi(p);
 		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
 		    win.ws_col > 0)
 			termwidth = win.ws_col;
 		else
 			termwidth = 60;
 	}
 	if (termwidth <= 0)
 		termwidth = 1;
 
 	memset(&mbs, 0, sizeof(mbs));
 	col = 0;
 	while (len != 0) {
 		clen = mbrtowc(&wc, s, len, &mbs);
 		if (clen == 0)
 			clen = 1;
 		if (clen == (size_t)-1 || clen == (size_t)-2) {
 			wc = (unsigned char)*s;
 			clen = 1;
 			memset(&mbs, 0, sizeof(mbs));
 		}
 		if (wc == '\n') {
 			if (col + 1 >= termwidth)
 				fprintf(outfile, "\\\n");
 			fputc('$', outfile);
 			fputc('\n', outfile);
 			col = 0;
 		} else if (iswprint(wc)) {
 			width = wcwidth(wc);
 			if (col + width >= termwidth) {
 				fprintf(outfile, "\\\n");
 				col = 0;
 			}
 			fwrite(s, 1, clen, outfile);
 			col += width;
 		} else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
 		    (p = strchr(escapes, c)) != NULL) {
 			if (col + 2 >= termwidth) {
 				fprintf(outfile, "\\\n");
 				col = 0;
 			}
 			fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]);
 			col += 2;
 		} else {
 			if (col + 4 * clen >= (unsigned)termwidth) {
 				fprintf(outfile, "\\\n");
 				col = 0;
 			}
 			for (i = 0; i < clen; i++)
 				fprintf(outfile, "\\%03o",
 				    (int)(unsigned char)s[i]);
 			col += 4 * clen;
 		}
 		s += clen;
 		len -= clen;
 	}
 	if (col + 1 >= termwidth)
 		fprintf(outfile, "\\\n");
 	(void)fputc('$', outfile);
 	(void)fputc('\n', outfile);
 	if (ferror(outfile))
 		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
 }
 
 static int
-regexec_e(const regex_t *preg, const char *string, int eflags, int nomatch,
+regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
 	size_t start, size_t stop)
 {
 	int eval;
 
 	if (preg == NULL) {
 		if (defpreg == NULL)
 			errx(1, "first RE may not be empty");
 	} else
 		defpreg = preg;
 
 	/* Set anchors */
 	match[0].rm_so = start;
 	match[0].rm_eo = stop;
 
 	eval = regexec(defpreg, string,
 	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
 	switch(eval) {
 	case 0:
 		return (1);
 	case REG_NOMATCH:
 		return (0);
 	}
 	errx(1, "RE error: %s", strregerror(eval, defpreg));
 	/* NOTREACHED */
 }
 
 /*
  * regsub - perform substitutions after a regexp match
  * Based on a routine by Henry Spencer
  */
 static void
-regsub(SPACE *sp, const char *string, const char *src)
+regsub(SPACE *sp, char *string, char *src)
 {
 	int len, no;
 	char c, *dst;
 
 #define	NEEDSP(reqlen)							\
 	/* XXX What is the +1 for? */					\
 	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
 		sp->blen += (reqlen) + 1024;				\
 		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) \
 		    == NULL)						\
 			err(1, "realloc");				\
 		dst = sp->space + sp->len;				\
 	}
 
 	dst = sp->space + sp->len;
 	while ((c = *src++) != '\0') {
 		if (c == '&')
 			no = 0;
 		else if (c == '\\' && isdigit((unsigned char)*src))
 			no = *src++ - '0';
 		else
 			no = -1;
 		if (no < 0) {		/* Ordinary character. */
 			if (c == '\\' && (*src == '\\' || *src == '&'))
 				c = *src++;
 			NEEDSP(1);
 			*dst++ = c;
 			++sp->len;
 		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
 			len = match[no].rm_eo - match[no].rm_so;
 			NEEDSP(len);
 			memmove(dst, string + match[no].rm_so, len);
 			dst += len;
 			sp->len += len;
 		}
 	}
 	NEEDSP(1);
 	*dst = '\0';
 }
 
 /*
  * cspace --
  *	Concatenate space: append the source space to the destination space,
  *	allocating new space as necessary.
  */
 void
 cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
 {
 	size_t tlen;
 
 	/* Make sure SPACE has enough memory and ramp up quickly. */
 	tlen = sp->len + len + 1;
 	if (tlen > sp->blen) {
 		sp->blen = tlen + 1024;
 		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) ==
 		    NULL)
 			err(1, "realloc");
 	}
 
 	if (spflag == REPLACE)
 		sp->len = 0;
 
 	memmove(sp->space + sp->len, p, len);
 
 	sp->space[sp->len += len] = '\0';
 }
 
 /*
  * Close all cached opened files and report any errors
  */
 void
-cfclose(struct s_command *cp, const struct s_command *end)
+cfclose(struct s_command *cp, struct s_command *end)
 {
 
 	for (; cp != end; cp = cp->next)
 		switch(cp->code) {
 		case 's':
 			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
 				err(1, "%s", cp->u.s->wfile);
 			cp->u.s->wfd = -1;
 			break;
 		case 'w':
 			if (cp->u.fd != -1 && close(cp->u.fd))
 				err(1, "%s", cp->t);
 			cp->u.fd = -1;
 			break;
 		case '{':
 			cfclose(cp->u.c, cp->next);
 			break;
 		}
 }
Index: user/alc/PQ_LAUNDRY
===================================================================
--- user/alc/PQ_LAUNDRY	(revision 303666)
+++ user/alc/PQ_LAUNDRY	(revision 303667)

Property changes on: user/alc/PQ_LAUNDRY
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r303654-303666