Index: projects/clang390-import/cddl/usr.sbin/zfsd/tests/Makefile
===================================================================
--- projects/clang390-import/cddl/usr.sbin/zfsd/tests/Makefile	(revision 305016)
+++ projects/clang390-import/cddl/usr.sbin/zfsd/tests/Makefile	(revision 305017)
@@ -1,45 +1,45 @@
 # $FreeBSD$
 
 SRCDIR=${.CURDIR}/../../../..
 .include "${.CURDIR}/../Makefile.common"
 .PATH:	${.CURDIR}/..
 
-TESTSDIR?=	${TESTSBASE}/cddl/sbin/zfsd
+TESTSDIR?=	${TESTSBASE}/cddl/usr.sbin/zfsd
 
 PLAIN_TESTS_CXX=	zfsd_unittest
 SRCS.zfsd_unittest:=		${SRCS:Nzfsd_main.cc}
 SRCS.zfsd_unittest+=		libmocks.c zfsd_unittest.cc
 SRCS=
 
 # Use #include <zfsd/xxx.h> in test programs.
 INCFLAGS+=	-I${.CURDIR}/../..
 
 .if defined(DESTDIR)
 INCFLAGS+=	-I${DESTDIR}/usr/include
 LIBRARY_PATH=	${DESTDIR}/lib:${DESTDIR}/usr/lib
 LDFLAGS.zfsd_unittest+=	-L${DESTDIR}/lib -L${DESTDIR}/usr/lib
 .elif defined(WORLDTMP)
 INCFLAGS+=	-I${WORLDTMP}/usr/include
 LIBRARY_PATH=	${WORLDTMP}/lib:${WORLDTMP}/usr/lib
 LDFLAGS.zfsd_unittest+=	-L${WORLDTMP}/lib -L${WORLDTMP}/usr/lib
 .else
 LIBRARY_PATH=
 .endif
 
 # Googletest options
 LOCALBASE?=	/usr/local
 INCFLAGS+=	-I${LOCALBASE}/include -D_THREAD_SAFE -pthread
 LDFLAGS.zfsd_unittest+=	-L${LOCALBASE}/lib -D_THREAD_SAFE -pthread
 LDADD.zfsd_unittest+=		${LOCALBASE}/lib/libgtest.a
 
 # GoogleMock options
 LDADD.zfsd_unittest+= ${LOCALBASE}/lib/libgmock.a ${LOCALBASE}/lib/libgmock_main.a
 
 # Googlemock fails if we don't have this line
 # https://groups.google.com/forum/#!msg/googletestframework/h8ixEPCFm0o/amwfu4xGJb0J
 CFLAGS.zfsd_unittest+= -DGTEST_HAS_PTHREAD
 
 # Install the tests
 TESTSBASE?=	/usr/tests
 
 .include <bsd.test.mk>
Index: projects/clang390-import/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc
===================================================================
--- projects/clang390-import/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc	(revision 305016)
+++ projects/clang390-import/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc	(revision 305017)
@@ -1,771 +1,770 @@
 /*-
  * Copyright (c) 2012, 2013, 2014 Spectra Logic Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    substantially similar to the "NO WARRANTY" disclaimer below
  *    ("Disclaimer") and any redistribution must be conditioned upon
  *    including a substantially similar Disclaimer requirement for further
  *    binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  *
  * Authors: Alan Somers         (Spectra Logic Corporation)
  */
 #include <sys/cdefs.h>
 
 #include <stdarg.h>
 #include <syslog.h>
 
 #include <libnvpair.h>
 #include <libzfs.h>
 
 #include <list>
 #include <map>
 #include <sstream>
 #include <string>
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
 #include <devdctl/guid.h>
 #include <devdctl/event.h>
 #include <devdctl/event_factory.h>
 #include <devdctl/exception.h>
 #include <devdctl/consumer.h>
 
 #include <zfsd/callout.h>
 #include <zfsd/vdev_iterator.h>
 #include <zfsd/zfsd_event.h>
 #include <zfsd/case_file.h>
 #include <zfsd/vdev.h>
 #include <zfsd/zfsd.h>
 #include <zfsd/zfsd_exception.h>
 #include <zfsd/zpool_list.h>
 
 #include "libmocks.h"
 
 __FBSDID("$FreeBSD$");
 
 /*================================== Macros ==================================*/
 #define	NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x))
 
 /*============================ Namespace Control =============================*/
 using std::string;
 using std::stringstream;
 
 using DevdCtl::Event;
-using DevdCtl::EventBuffer;
 using DevdCtl::EventFactory;
 using DevdCtl::EventList;
 using DevdCtl::Guid;
 using DevdCtl::NVPairMap;
 
 /* redefine zpool_handle here because libzfs_impl.h is not includable */
 struct zpool_handle
 {
         libzfs_handle_t *zpool_hdl;
         zpool_handle_t *zpool_next;
         char zpool_name[ZPOOL_MAXNAMELEN];
         int zpool_state;
         size_t zpool_config_size;
         nvlist_t *zpool_config;
         nvlist_t *zpool_old_config;
         nvlist_t *zpool_props;
         diskaddr_t zpool_start_block;
 };
 
 class MockZfsEvent : public ZfsEvent
 {
 public:
 	MockZfsEvent(Event::Type, NVPairMap&, const string&);
 	virtual ~MockZfsEvent() {}
 
 	static BuildMethod MockZfsEventBuilder;
 
 	MOCK_CONST_METHOD0(ProcessPoolEvent, void());
 
 	static EventFactory::Record s_buildRecords[];
 };
 
 EventFactory::Record MockZfsEvent::s_buildRecords[] =
 {
         { Event::NOTIFY, "ZFS", &MockZfsEvent::MockZfsEventBuilder }
 };
 
 MockZfsEvent::MockZfsEvent(Event::Type type, NVPairMap& map,
 			   const string& str)
  : ZfsEvent(type, map, str)
 {
 }
 
 Event *
 MockZfsEvent::MockZfsEventBuilder(Event::Type type,
 				  NVPairMap &nvpairs,
 			  	  const string &eventString)
 {
 	return (new MockZfsEvent(type, nvpairs, eventString));
 }
 
 /*
  * A dummy Vdev class used for testing other classes
  */
 class MockVdev : public Vdev
 {
 public:
 	MockVdev(nvlist_t *vdevConfig);
 	virtual ~MockVdev() {}
 
 	MOCK_CONST_METHOD0(GUID, Guid());
 	MOCK_CONST_METHOD0(PoolGUID, Guid());
 	MOCK_CONST_METHOD0(State, vdev_state());
 	MOCK_CONST_METHOD0(PhysicalPath, string());
 };
 
 MockVdev::MockVdev(nvlist_t *vdevConfig)
  : Vdev(vdevConfig)
 {
 }
 
 /*
  * A CaseFile class with side effects removed, for testing
  */
 class TestableCaseFile : public CaseFile
 {
 public:
 	static TestableCaseFile &Create(Vdev &vdev);
 	TestableCaseFile(Vdev &vdev);
 	virtual ~TestableCaseFile() {}
 
 	MOCK_METHOD0(Close, void());
 	MOCK_METHOD1(RegisterCallout, void(const Event &event));
 	MOCK_METHOD0(RefreshVdevState, bool());
 	MOCK_METHOD1(ReEvaluate, bool(const ZfsEvent &event));
 
 	bool RealReEvaluate(const ZfsEvent &event)
 	{
 		return (CaseFile::ReEvaluate(event));
 	}
 
 	/*
 	 * This splices the event lists, a procedure that would normally be done
 	 * by OnGracePeriodEnded, but we don't necessarily call that in the
 	 * unit tests
 	 */
 	void SpliceEvents();
 
 	/*
 	 * Used by some of our expectations.  CaseFile does not publicize this
 	 */
 	static int getActiveCases()
 	{
 		return (s_activeCases.size());
 	}
 };
 
 TestableCaseFile::TestableCaseFile(Vdev &vdev)
  : CaseFile(vdev)
 {
 }
 
 TestableCaseFile &
 TestableCaseFile::Create(Vdev &vdev)
 {
 	TestableCaseFile *newCase;
 	newCase = new TestableCaseFile(vdev);
 	return (*newCase);
 }
 
 void
 TestableCaseFile::SpliceEvents()
 {
 	m_events.splice(m_events.begin(), m_tentativeEvents);
 }
 
 
 /*
  * Test class ZfsdException
  */
 class ZfsdExceptionTest : public ::testing::Test
 {
 protected:
 	virtual void SetUp()
 	{
 		ASSERT_EQ(0, nvlist_alloc(&poolConfig, NV_UNIQUE_NAME, 0));
 		ASSERT_EQ(0, nvlist_add_string(poolConfig,
 				ZPOOL_CONFIG_POOL_NAME, "unit_test_pool"));
 		ASSERT_EQ(0, nvlist_add_uint64(poolConfig,
 				ZPOOL_CONFIG_POOL_GUID, 0x1234));
 
 		ASSERT_EQ(0, nvlist_alloc(&vdevConfig, NV_UNIQUE_NAME, 0));
 		ASSERT_EQ(0, nvlist_add_uint64(vdevConfig,
 				ZPOOL_CONFIG_GUID, 0x5678));
 		bzero(&poolHandle, sizeof(poolHandle));
 		poolHandle.zpool_config = poolConfig;
 	}
 
 	virtual void TearDown()
 	{
 		nvlist_free(poolConfig);
 		nvlist_free(vdevConfig);
 	}
 
 	nvlist_t	*poolConfig;
 	nvlist_t	*vdevConfig;
 	zpool_handle_t   poolHandle;
 };
 
 TEST_F(ZfsdExceptionTest, StringConstructorNull)
 {
 	ZfsdException ze("");
 	EXPECT_STREQ("", ze.GetString().c_str());
 }
 
 TEST_F(ZfsdExceptionTest, StringConstructorFormatted)
 {
 	ZfsdException ze(" %d %s", 55, "hello world");
 	EXPECT_STREQ(" 55 hello world", ze.GetString().c_str());
 }
 
 TEST_F(ZfsdExceptionTest, LogSimple)
 {
 	ZfsdException ze("unit test w/o vdev or pool");
 	ze.Log();
 	EXPECT_EQ(LOG_ERR, syslog_last_priority);
 	EXPECT_STREQ("unit test w/o vdev or pool\n", syslog_last_message);
 }
 
 TEST_F(ZfsdExceptionTest, Pool)
 {
 	const char msg[] = "Exception with pool name";
 	char expected[4096];
 	sprintf(expected, "Pool unit_test_pool: %s\n", msg);
 	ZfsdException ze(poolConfig, msg);
 	ze.Log();
 	EXPECT_STREQ(expected, syslog_last_message);
 }
 
 TEST_F(ZfsdExceptionTest, PoolHandle)
 {
 	const char msg[] = "Exception with pool handle";
 	char expected[4096];
 	sprintf(expected, "Pool unit_test_pool: %s\n", msg);
 	ZfsdException ze(&poolHandle, msg);
 	ze.Log();
 	EXPECT_STREQ(expected, syslog_last_message);
 }
 
 /*
  * Test class Vdev
  */
 class VdevTest : public ::testing::Test
 {
 protected:
 	virtual void SetUp()
 	{
 		ASSERT_EQ(0, nvlist_alloc(&m_poolConfig, NV_UNIQUE_NAME, 0));
 		ASSERT_EQ(0, nvlist_add_uint64(m_poolConfig,
 					       ZPOOL_CONFIG_POOL_GUID,
 					       0x1234));
 
 		ASSERT_EQ(0, nvlist_alloc(&m_vdevConfig, NV_UNIQUE_NAME, 0));
 		ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig, ZPOOL_CONFIG_GUID,
 					       0x5678));
 	}
 
 	virtual void TearDown()
 	{
 		nvlist_free(m_poolConfig);
 		nvlist_free(m_vdevConfig);
 	}
 
 	nvlist_t	*m_poolConfig;
 	nvlist_t	*m_vdevConfig;
 };
 
 
 TEST_F(VdevTest, StateFromConfig)
 {
 	vdev_stat_t vs;
 
 	vs.vs_state = VDEV_STATE_OFFLINE;
 
 	ASSERT_EQ(0, nvlist_add_uint64_array(m_vdevConfig,
 					     ZPOOL_CONFIG_VDEV_STATS,
 					     (uint64_t*)&vs,
 					     sizeof(vs) / sizeof(uint64_t)));
 
 	Vdev vdev(m_poolConfig, m_vdevConfig);
 
 	EXPECT_EQ(VDEV_STATE_OFFLINE, vdev.State());
 }
 
 TEST_F(VdevTest, StateFaulted)
 {
 	ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig, ZPOOL_CONFIG_FAULTED, 1));
 
 	Vdev vdev(m_poolConfig, m_vdevConfig);
 
 	EXPECT_EQ(VDEV_STATE_FAULTED, vdev.State());
 }
 
 /*
  * Test that we can construct a Vdev from the label information that is stored
  * on an available spare drive
  */
 TEST_F(VdevTest, ConstructAvailSpare)
 {
 	nvlist_t	*labelConfig;
 
 	ASSERT_EQ(0, nvlist_alloc(&labelConfig, NV_UNIQUE_NAME, 0));
 	ASSERT_EQ(0, nvlist_add_uint64(labelConfig, ZPOOL_CONFIG_GUID,
 				       1948339428197961030));
 	ASSERT_EQ(0, nvlist_add_uint64(labelConfig, ZPOOL_CONFIG_POOL_STATE,
 				       POOL_STATE_SPARE));
 
 	EXPECT_NO_THROW(Vdev vdev(labelConfig));
 
 	nvlist_free(labelConfig);
 }
 
 /* Available spares will always show the HEALTHY state */
 TEST_F(VdevTest, AvailSpareState) {
 	nvlist_t	*labelConfig;
 
 	ASSERT_EQ(0, nvlist_alloc(&labelConfig, NV_UNIQUE_NAME, 0));
 	ASSERT_EQ(0, nvlist_add_uint64(labelConfig, ZPOOL_CONFIG_GUID,
 				       1948339428197961030));
 	ASSERT_EQ(0, nvlist_add_uint64(labelConfig, ZPOOL_CONFIG_POOL_STATE,
 				       POOL_STATE_SPARE));
 
 	Vdev vdev(labelConfig);
 	EXPECT_EQ(VDEV_STATE_HEALTHY, vdev.State());
 
 	nvlist_free(labelConfig);
 }
 
 /* Test the Vdev::IsSpare method */
 TEST_F(VdevTest, IsSpare) {
 	Vdev notSpare(m_poolConfig, m_vdevConfig);
 	EXPECT_EQ(false, notSpare.IsSpare());
 
 	ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig, ZPOOL_CONFIG_IS_SPARE, 1));
 	Vdev isSpare(m_poolConfig, m_vdevConfig);
 	EXPECT_EQ(true, isSpare.IsSpare());
 }
 
 /*
  * Test class ZFSEvent
  */
 class ZfsEventTest : public ::testing::Test
 {
 protected:
 	virtual void SetUp()
 	{
 		m_eventFactory = new EventFactory();
 		m_eventFactory->UpdateRegistry(MockZfsEvent::s_buildRecords,
 		    NUM_ELEMENTS(MockZfsEvent::s_buildRecords));
 
 		m_event = NULL;
 	}
 
 	virtual void TearDown()
 	{
 		delete m_eventFactory;
 		delete m_event;
 	}
 
 	EventFactory	*m_eventFactory;
 	Event		*m_event;
 };
 
 TEST_F(ZfsEventTest, ProcessPoolEventGetsCalled)
 {
 	string evString("!system=ZFS "
 			"subsystem=ZFS "
 			"type=misc.fs.zfs.vdev_remove "
 			"pool_name=foo "
 			"pool_guid=9756779504028057996 "
 			"vdev_guid=1631193447431603339 "
 			"vdev_path=/dev/da1 "
 			"timestamp=1348871594");
 	m_event = Event::CreateEvent(*m_eventFactory, evString);
 	MockZfsEvent *mock_event = static_cast<MockZfsEvent*>(m_event);
 
 	EXPECT_CALL(*mock_event, ProcessPoolEvent()).Times(1);
 	mock_event->Process();
 }
 
 /*
  * Test class CaseFile
  */
 
 class CaseFileTest : public ::testing::Test
 {
 protected:
 	virtual void SetUp()
 	{
 		m_eventFactory = new EventFactory();
 		m_eventFactory->UpdateRegistry(MockZfsEvent::s_buildRecords,
 		    NUM_ELEMENTS(MockZfsEvent::s_buildRecords));
 
 		m_event = NULL;
 
 		nvlist_alloc(&m_vdevConfig, NV_UNIQUE_NAME, 0);
 		ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig,
 					       ZPOOL_CONFIG_GUID, 0xbeef));
 		m_vdev = new MockVdev(m_vdevConfig);
 		ON_CALL(*m_vdev, GUID())
 		    .WillByDefault(::testing::Return(Guid(123)));
 		ON_CALL(*m_vdev, PoolGUID())
 		    .WillByDefault(::testing::Return(Guid(456)));
 		ON_CALL(*m_vdev, State())
 		    .WillByDefault(::testing::Return(VDEV_STATE_HEALTHY));
 		m_caseFile = &TestableCaseFile::Create(*m_vdev);
 		ON_CALL(*m_caseFile, ReEvaluate(::testing::_))
 		    .WillByDefault(::testing::Invoke(m_caseFile, &TestableCaseFile::RealReEvaluate));
 		return;
 	}
 
 	virtual void TearDown()
 	{
 		delete m_caseFile;
 		nvlist_free(m_vdevConfig);
 		delete m_vdev;
 		delete m_event;
 		delete m_eventFactory;
 	}
 
 	nvlist_t		*m_vdevConfig;
 	MockVdev		*m_vdev;
 	TestableCaseFile 	*m_caseFile;
 	Event			*m_event;
 	EventFactory		*m_eventFactory;
 };
 
 /*
  * A Vdev with no events should not be degraded or faulted
  */
 TEST_F(CaseFileTest, HealthyVdev)
 {
 	EXPECT_FALSE(m_caseFile->ShouldDegrade());
 	EXPECT_FALSE(m_caseFile->ShouldFault());
 }
 
 /*
  * A Vdev with only one event should not be degraded or faulted
  * For performance reasons, RefreshVdevState should not be called.
  */
 TEST_F(CaseFileTest, HealthyishVdev)
 {
 	string evString("!system=ZFS "
 			"class=ereport.fs.zfs.io "
 			"ena=12091638756982918145 "
 			"parent_guid=13237004955564865395 "
 			"parent_type=raidz "
 			"pool=testpool.4415 "
 			"pool_context=0 "
 			"pool_failmode=wait "
 			"pool_guid=456 "
 			"subsystem=ZFS "
 			"timestamp=1348867914 "
 			"type=ereport.fs.zfs.io "
 			"vdev_guid=123 "
 			"vdev_path=/dev/da400 "
 			"vdev_type=disk "
 			"zio_blkid=622 "
 			"zio_err=1 "
 			"zio_level=-2 "
 			"zio_object=0 "
 			"zio_objset=37 "
 			"zio_offset=25598976 "
 			"zio_size=1024");
 	m_event = Event::CreateEvent(*m_eventFactory, evString);
 	ZfsEvent *zfs_event = static_cast<ZfsEvent*>(m_event);
 
 	EXPECT_CALL(*m_caseFile, RefreshVdevState())
 	    .Times(::testing::Exactly(0));
 	EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event));
 	EXPECT_FALSE(m_caseFile->ShouldDegrade());
 	EXPECT_FALSE(m_caseFile->ShouldFault());
 }
 
 /* The case file should be closed when its pool is destroyed */
 TEST_F(CaseFileTest, PoolDestroy)
 {
 	string evString("!system=ZFS "
 			"pool_name=testpool.4415 "
 			"pool_guid=456 "
 			"subsystem=ZFS "
 			"timestamp=1348867914 "
 			"type=misc.fs.zfs.pool_destroy ");
 	m_event = Event::CreateEvent(*m_eventFactory, evString);
 	ZfsEvent *zfs_event = static_cast<ZfsEvent*>(m_event);
 	EXPECT_CALL(*m_caseFile, Close());
 	EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event));
 }
 
 /*
  * A Vdev with a very large number of IO errors should fault
  * For performance reasons, RefreshVdevState should be called at most once
  */
 TEST_F(CaseFileTest, VeryManyIOErrors)
 {
 	EXPECT_CALL(*m_caseFile, RefreshVdevState())
 	    .Times(::testing::AtMost(1))
 	    .WillRepeatedly(::testing::Return(true));
 
 	for(int i=0; i<100; i++) {
 		stringstream evStringStream;
 		evStringStream <<
 			"!system=ZFS "
 			"class=ereport.fs.zfs.io "
 			"ena=12091638756982918145 "
 			"parent_guid=13237004955564865395 "
 			"parent_type=raidz "
 			"pool=testpool.4415 "
 			"pool_context=0 "
 			"pool_failmode=wait "
 			"pool_guid=456 "
 			"subsystem=ZFS "
 			"timestamp=";
 		evStringStream << i << " ";
 		evStringStream <<
 			"type=ereport.fs.zfs.io "
 			"vdev_guid=123 "
 			"vdev_path=/dev/da400 "
 			"vdev_type=disk "
 			"zio_blkid=622 "
 			"zio_err=1 "
 			"zio_level=-2 "
 			"zio_object=0 "
 			"zio_objset=37 "
 			"zio_offset=25598976 "
 			"zio_size=1024";
 		Event *event(Event::CreateEvent(*m_eventFactory,
 						evStringStream.str()));
 		ZfsEvent *zfs_event = static_cast<ZfsEvent*>(event);
 		EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event));
 		delete event;
 	}
 
 	m_caseFile->SpliceEvents();
 	EXPECT_FALSE(m_caseFile->ShouldDegrade());
 	EXPECT_TRUE(m_caseFile->ShouldFault());
 }
 
 /*
  * A Vdev with a very large number of checksum errors should degrade
  * For performance reasons, RefreshVdevState should be called at most once
  */
 TEST_F(CaseFileTest, VeryManyChecksumErrors)
 {
 	EXPECT_CALL(*m_caseFile, RefreshVdevState())
 	    .Times(::testing::AtMost(1))
 	    .WillRepeatedly(::testing::Return(true));
 
 	for(int i=0; i<100; i++) {
 		stringstream evStringStream;
 		evStringStream <<
 			"!system=ZFS "
 			"bad_cleared_bits=03000000000000803f50b00000000000 "
 			"bad_range_clears=0000000e "
 			"bad_range_sets=00000000 "
 			"bad_ranges=0000000000000010 "
 			"bad_ranges_min_gap=8 "
 			"bad_set_bits=00000000000000000000000000000000 "
 			"class=ereport.fs.zfs.checksum "
 			"ena=12272856582652437505 "
 			"parent_guid=5838204195352909894 "
 			"parent_type=raidz pool=testpool.7640 "
 			"pool_context=0 "
 			"pool_failmode=wait "
 			"pool_guid=456 "
 			"subsystem=ZFS timestamp=";
 		evStringStream << i << " ";
 		evStringStream <<
 			"type=ereport.fs.zfs.checksum "
 			"vdev_guid=123 "
 			"vdev_path=/mnt/tmp/file1.7702 "
 			"vdev_type=file "
 			"zio_blkid=0 "
 			"zio_err=0 "
 			"zio_level=0 "
 			"zio_object=3 "
 			"zio_objset=0 "
 			"zio_offset=16896 "
 			"zio_size=512";
 		Event *event(Event::CreateEvent(*m_eventFactory,
 						evStringStream.str()));
 		ZfsEvent *zfs_event = static_cast<ZfsEvent*>(event);
 		EXPECT_TRUE(m_caseFile->ReEvaluate(*zfs_event));
 		delete event;
 	}
 
 	m_caseFile->SpliceEvents();
 	EXPECT_TRUE(m_caseFile->ShouldDegrade());
 	EXPECT_FALSE(m_caseFile->ShouldFault());
 }
 
 /*
  * Test CaseFile::ReEvaluateByGuid
  */
 class ReEvaluateByGuidTest : public ::testing::Test
 {
 protected:
 	virtual void SetUp()
 	{
 		m_eventFactory = new EventFactory();
 		m_eventFactory->UpdateRegistry(MockZfsEvent::s_buildRecords,
 		    NUM_ELEMENTS(MockZfsEvent::s_buildRecords));
 		m_event = Event::CreateEvent(*m_eventFactory, s_evString);
 		nvlist_alloc(&m_vdevConfig, NV_UNIQUE_NAME, 0);
 		ASSERT_EQ(0, nvlist_add_uint64(m_vdevConfig,
 					       ZPOOL_CONFIG_GUID, 0xbeef));
 		m_vdev456 = new ::testing::NiceMock<MockVdev>(m_vdevConfig);
 		m_vdev789 = new ::testing::NiceMock<MockVdev>(m_vdevConfig);
 		ON_CALL(*m_vdev456, GUID())
 		    .WillByDefault(::testing::Return(Guid(123)));
 		ON_CALL(*m_vdev456, PoolGUID())
 		    .WillByDefault(::testing::Return(Guid(456)));
 		ON_CALL(*m_vdev456, State())
 		    .WillByDefault(::testing::Return(VDEV_STATE_HEALTHY));
 		ON_CALL(*m_vdev789, GUID())
 		    .WillByDefault(::testing::Return(Guid(123)));
 		ON_CALL(*m_vdev789, PoolGUID())
 		    .WillByDefault(::testing::Return(Guid(789)));
 		ON_CALL(*m_vdev789, State())
 		    .WillByDefault(::testing::Return(VDEV_STATE_HEALTHY));
 		m_caseFile456 = NULL;
 		m_caseFile789 = NULL;
 		return;
 	}
 
 	virtual void TearDown()
 	{
 		delete m_caseFile456;
 		delete m_caseFile789;
 		nvlist_free(m_vdevConfig);
 		delete m_vdev456;
 		delete m_vdev789;
 		delete m_event;
 		delete m_eventFactory;
 	}
 
 	static string			 s_evString;
 	nvlist_t			*m_vdevConfig;
 	::testing::NiceMock<MockVdev>	*m_vdev456;
 	::testing::NiceMock<MockVdev>	*m_vdev789;
 	TestableCaseFile 		*m_caseFile456;
 	TestableCaseFile 		*m_caseFile789;
 	Event				*m_event;
 	EventFactory			*m_eventFactory;
 };
 
 string ReEvaluateByGuidTest::s_evString(
 	"!system=ZFS "
 	"pool_guid=16271873792808333580 "
 	"pool_name=foo "
 	"subsystem=ZFS "
 	"timestamp=1360620391 "
 	"type=misc.fs.zfs.config_sync");
 
 
 /*
  * Test the ReEvaluateByGuid method on an empty list of casefiles.
  * We must create one event, even though it never gets used, because it will
  * be passed by reference to ReEvaluateByGuid
  */
 TEST_F(ReEvaluateByGuidTest, ReEvaluateByGuid_empty)
 {
 	ZfsEvent *zfs_event = static_cast<ZfsEvent*>(m_event);
 
 	EXPECT_EQ(0, TestableCaseFile::getActiveCases());
 	CaseFile::ReEvaluateByGuid(Guid(456), *zfs_event);
 	EXPECT_EQ(0, TestableCaseFile::getActiveCases());
 }
 
 /*
  * Test the ReEvaluateByGuid method on a list of CaseFiles that contains only
  * one CaseFile, which doesn't match the criteria
  */
 TEST_F(ReEvaluateByGuidTest, ReEvaluateByGuid_oneFalse)
 {
 	m_caseFile456 = &TestableCaseFile::Create(*m_vdev456);
 	ZfsEvent *zfs_event = static_cast<ZfsEvent*>(m_event);
 
 	EXPECT_EQ(1, TestableCaseFile::getActiveCases());
 	EXPECT_CALL(*m_caseFile456, ReEvaluate(::testing::_))
 	    .Times(::testing::Exactly(0));
 	CaseFile::ReEvaluateByGuid(Guid(789), *zfs_event);
 	EXPECT_EQ(1, TestableCaseFile::getActiveCases());
 }
 
 /*
  * Test the ReEvaluateByGuid method on a list of CaseFiles that contains only
  * one CaseFile, which does match the criteria
  */
 TEST_F(ReEvaluateByGuidTest, ReEvaluateByGuid_oneTrue)
 {
 	m_caseFile456 = &TestableCaseFile::Create(*m_vdev456);
 	ZfsEvent *zfs_event = static_cast<ZfsEvent*>(m_event);
 
 	EXPECT_EQ(1, TestableCaseFile::getActiveCases());
 	EXPECT_CALL(*m_caseFile456, ReEvaluate(::testing::_))
 	    .Times(::testing::Exactly(1))
 	    .WillRepeatedly(::testing::Return(false));
 	CaseFile::ReEvaluateByGuid(Guid(456), *zfs_event);
 	EXPECT_EQ(1, TestableCaseFile::getActiveCases());
 }
 
 /*
  * Test the ReEvaluateByGuid method on a long list of CaseFiles that contains a
  * few cases which meet the criteria
  */
 TEST_F(ReEvaluateByGuidTest, ReEvaluateByGuid_five)
 {
 	TestableCaseFile *CaseFile1 = &TestableCaseFile::Create(*m_vdev456);
 	TestableCaseFile *CaseFile2 = &TestableCaseFile::Create(*m_vdev789);
 	TestableCaseFile *CaseFile3 = &TestableCaseFile::Create(*m_vdev456);
 	TestableCaseFile *CaseFile4 = &TestableCaseFile::Create(*m_vdev789);
 	TestableCaseFile *CaseFile5 = &TestableCaseFile::Create(*m_vdev789);
 	ZfsEvent *zfs_event = static_cast<ZfsEvent*>(m_event);
 
 	EXPECT_EQ(5, TestableCaseFile::getActiveCases());
 	EXPECT_CALL(*CaseFile1, ReEvaluate(::testing::_))
 	    .Times(::testing::Exactly(1))
 	    .WillRepeatedly(::testing::Return(false));
 	EXPECT_CALL(*CaseFile3, ReEvaluate(::testing::_))
 	    .Times(::testing::Exactly(1))
 	    .WillRepeatedly(::testing::Return(false));
 	EXPECT_CALL(*CaseFile2, ReEvaluate(::testing::_))
 	    .Times(::testing::Exactly(0));
 	EXPECT_CALL(*CaseFile4, ReEvaluate(::testing::_))
 	    .Times(::testing::Exactly(0));
 	EXPECT_CALL(*CaseFile5, ReEvaluate(::testing::_))
 	    .Times(::testing::Exactly(0));
 	CaseFile::ReEvaluateByGuid(Guid(456), *zfs_event);
 	EXPECT_EQ(5, TestableCaseFile::getActiveCases());
 	delete CaseFile1;
 	delete CaseFile2;
 	delete CaseFile3;
 	delete CaseFile4;
 	delete CaseFile5;
 }
Index: projects/clang390-import/cddl/usr.sbin/zfsd/vdev.h
===================================================================
--- projects/clang390-import/cddl/usr.sbin/zfsd/vdev.h	(revision 305016)
+++ projects/clang390-import/cddl/usr.sbin/zfsd/vdev.h	(revision 305017)
@@ -1,178 +1,188 @@
 /*-
  * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    substantially similar to the "NO WARRANTY" disclaimer below
  *    ("Disclaimer") and any redistribution must be conditioned upon
  *    including a substantially similar Disclaimer requirement for further
  *    binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  *
  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
  *
  * $FreeBSD$
  */
 
 /**
  * \file vdev.h
  *
  * Definition of the Vdev class.
  *
  * Header requirements:
  *
  *    #include <string>
  *    #include <list>
  *
  *    #include <devdctl/guid.h>
  */
 #ifndef	_VDEV_H_
 #define	_VDEV_H_
 
 /*=========================== Forward Declarations ===========================*/
 struct zpool_handle;
 typedef struct zpool_handle zpool_handle_t;
 
 struct nvlist;
 typedef struct nvlist nvlist_t;
 
 /*============================= Class Definitions ============================*/
 /*----------------------------------- Vdev -----------------------------------*/
 /**
  * \brief Wrapper class for a vdev's name/value configuration list
  *        simplifying access to commonly used vdev attributes.
  */
 class Vdev
 {
 public:
 	/**
 	 * \brief Instantiate a vdev object for a vdev that is a member
 	 *        of an imported pool.
 	 *
 	 * \param pool        The pool object containing the vdev with
 	 *                    configuration data provided in vdevConfig.
 	 * \param vdevConfig  Vdev configuration data.
 	 *
 	 * This method should be used whenever dealing with vdev's
 	 * enumerated via the ZpoolList class.  The in-core configuration
 	 * data for a vdev does not contain all of the items found in
 	 * the on-disk label.  This requires the vdev class to augment
 	 * the data in vdevConfig with data found in the pool object.
 	 */
 	Vdev(zpool_handle_t *pool, nvlist_t *vdevConfig);
 
 	/**
 	 * \brief Instantiate a vdev object for a vdev that is a member
 	 *        of a pool configuration.
 	 *
 	 * \param poolConfig  The pool configuration containing the vdev
 	 *                    configuration data provided in vdevConfig.
 	 * \param vdevConfig  Vdev configuration data.
 	 *
 	 * This method should be used whenever dealing with vdev's
 	 * enumerated via the ZpoolList class.  The in-core configuration
 	 * data for a vdev does not contain all of the items found in
 	 * the on-disk label.  This requires the vdev class to augment
 	 * the data in vdevConfig with data found in the pool object.
 	 */
 	Vdev(nvlist_t *poolConfig, nvlist_t *vdevConfig);
 
 	/**
 	 * \brief Instantiate a vdev object from a ZFS label stored on
 	 *        the device.
 	 *
 	 * \param vdevConfig  The name/value list retrieved by reading
 	 *                    the label information on a leaf vdev.
 	 */
 	Vdev(nvlist_t *vdevConfig);
 
 	/**
 	 * \brief No-op copy constructor for nonexistent vdevs.
 	 */
 	Vdev();
+
+	/**
+	 * \brief No-op virtual destructor, since this class has virtual
+	 *        functions.
+	 */
+	virtual ~Vdev();
 	bool			DoesNotExist()	const;
 
 	/**
 	 * \brief Return a list of the vdev's children.
 	 */
 	std::list<Vdev>		 Children();
 
 	virtual DevdCtl::Guid	 GUID()		const;
 	bool			 IsSpare()	const;
 	virtual DevdCtl::Guid	 PoolGUID()	const;
 	virtual vdev_state	 State()	const;
 	std::string		 Path()		const;
 	virtual std::string	 PhysicalPath()	const;
 	std::string		 GUIDString()	const;
 	nvlist_t		*PoolConfig()	const;
 	nvlist_t		*Config()	const;
 	Vdev			 Parent();
 	Vdev			 RootVdev();
 	std::string		 Name(zpool_handle_t *, bool verbose)	const;
 	bool			 IsSpare();
 	bool			 IsAvailableSpare()	const;
 	bool			 IsActiveSpare()	const;
 	bool			 IsResilvering()	const;
 
 private:
 	void			 VdevLookupGuid();
 	bool			 VdevLookupPoolGuid();
 	DevdCtl::Guid		 m_poolGUID;
 	DevdCtl::Guid		 m_vdevGUID;
 	nvlist_t		*m_poolConfig;
 	nvlist_t		*m_config;
 };
 
 //- Special objects -----------------------------------------------------------
 extern Vdev NonexistentVdev;
 
 //- Vdev Inline Public Methods ------------------------------------------------
+inline Vdev::~Vdev()
+{
+}
+
 inline DevdCtl::Guid
 Vdev::PoolGUID() const
 {
 	return (m_poolGUID);
 }
 
 inline DevdCtl::Guid
 Vdev::GUID() const
 {
 	return (m_vdevGUID);
 }
 
 inline nvlist_t *
 Vdev::PoolConfig() const
 {
 	return (m_poolConfig);
 }
 
 inline nvlist_t *
 Vdev::Config() const
 {
 	return (m_config);
 }
 
 inline bool
 Vdev::DoesNotExist() const
 {
 	return (m_config == NULL);
 }
 
 #endif /* _VDEV_H_ */
Index: projects/clang390-import/cddl
===================================================================
--- projects/clang390-import/cddl	(revision 305016)
+++ projects/clang390-import/cddl	(revision 305017)

Property changes on: projects/clang390-import/cddl
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/cddl:r304885-305016
Index: projects/clang390-import/contrib/libarchive/libarchive/archive_acl.c
===================================================================
--- projects/clang390-import/contrib/libarchive/libarchive/archive_acl.c	(revision 305016)
+++ projects/clang390-import/contrib/libarchive/libarchive/archive_acl.c	(revision 305017)
@@ -1,1798 +1,1278 @@
 /*-
  * Copyright (c) 2003-2010 Tim Kientzle
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "archive_platform.h"
 __FBSDID("$FreeBSD$");
 
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 #ifdef HAVE_LIMITS_H
 #include <limits.h>
 #endif
 #ifdef HAVE_WCHAR_H
 #include <wchar.h>
 #endif
 
 #include "archive_acl_private.h"
 #include "archive_entry.h"
 #include "archive_private.h"
 
 #undef max
 #define	max(a, b)	((a)>(b)?(a):(b))
 
 #ifndef HAVE_WMEMCMP
 /* Good enough for simple equality testing, but not for sorting. */
 #define wmemcmp(a,b,i)  memcmp((a), (b), (i) * sizeof(wchar_t))
 #endif
 
 static int	acl_special(struct archive_acl *acl,
 		    int type, int permset, int tag);
 static struct archive_acl_entry *acl_new_entry(struct archive_acl *acl,
 		    int type, int permset, int tag, int id);
 static int	archive_acl_add_entry_len_l(struct archive_acl *acl,
 		    int type, int permset, int tag, int id, const char *name,
 		    size_t len, struct archive_string_conv *sc);
 static int	isint_w(const wchar_t *start, const wchar_t *end, int *result);
 static int	ismode_w(const wchar_t *start, const wchar_t *end, int *result);
-static int	parse_nfs4_flags_w(const wchar_t *start, const wchar_t *end,
-		    int *result);
-static int	parse_nfs4_perms_w(const wchar_t *start, const wchar_t *end,
-		    int *result);
 static void	next_field_w(const wchar_t **wp, const wchar_t **start,
 		    const wchar_t **end, wchar_t *sep);
 static int	prefix_w(const wchar_t *start, const wchar_t *end,
 		    const wchar_t *test);
-static void	append_entry_w(wchar_t **wp, const wchar_t *prefix, int type,
-		    int tag, const wchar_t *wname, int perm, int id);
+static void	append_entry_w(wchar_t **wp, const wchar_t *prefix, int tag,
+		    const wchar_t *wname, int perm, int id);
 static void	append_id_w(wchar_t **wp, int id);
 static int	isint(const char *start, const char *end, int *result);
 static int	ismode(const char *start, const char *end, int *result);
-static int	parse_nfs4_flags(const char *start, const char *end, int *result);
-static int	parse_nfs4_perms(const char *start, const char *end, int *result);
 static void	next_field(const char **p, const char **start,
 		    const char **end, char *sep);
 static int	prefix_c(const char *start, const char *end,
 		    const char *test);
-static void	append_entry(char **p, const char *prefix, int type,
-		    int tag, const char *name, int perm, int id);
+static void	append_entry(char **p, const char *prefix, int tag,
+		    const char *name, int perm, int id);
 static void	append_id(char **p, int id);
 
 void
 archive_acl_clear(struct archive_acl *acl)
 {
 	struct archive_acl_entry *ap;
 
 	while (acl->acl_head != NULL) {
 		ap = acl->acl_head->next;
 		archive_mstring_clean(&acl->acl_head->name);
 		free(acl->acl_head);
 		acl->acl_head = ap;
 	}
 	if (acl->acl_text_w != NULL) {
 		free(acl->acl_text_w);
 		acl->acl_text_w = NULL;
 	}
 	if (acl->acl_text != NULL) {
 		free(acl->acl_text);
 		acl->acl_text = NULL;
 	}
 	acl->acl_p = NULL;
 	acl->acl_state = 0; /* Not counting. */
 }
 
 void
 archive_acl_copy(struct archive_acl *dest, struct archive_acl *src)
 {
 	struct archive_acl_entry *ap, *ap2;
 
 	archive_acl_clear(dest);
 
 	dest->mode = src->mode;
 	ap = src->acl_head;
 	while (ap != NULL) {
 		ap2 = acl_new_entry(dest,
 		    ap->type, ap->permset, ap->tag, ap->id);
 		if (ap2 != NULL)
 			archive_mstring_copy(&ap2->name, &ap->name);
 		ap = ap->next;
 	}
 }
 
 int
 archive_acl_add_entry(struct archive_acl *acl,
     int type, int permset, int tag, int id, const char *name)
 {
 	struct archive_acl_entry *ap;
 
 	if (acl_special(acl, type, permset, tag) == 0)
 		return ARCHIVE_OK;
 	ap = acl_new_entry(acl, type, permset, tag, id);
 	if (ap == NULL) {
 		/* XXX Error XXX */
 		return ARCHIVE_FAILED;
 	}
 	if (name != NULL  &&  *name != '\0')
 		archive_mstring_copy_mbs(&ap->name, name);
 	else
 		archive_mstring_clean(&ap->name);
 	return ARCHIVE_OK;
 }
 
 int
 archive_acl_add_entry_w_len(struct archive_acl *acl,
     int type, int permset, int tag, int id, const wchar_t *name, size_t len)
 {
 	struct archive_acl_entry *ap;
 
 	if (acl_special(acl, type, permset, tag) == 0)
 		return ARCHIVE_OK;
 	ap = acl_new_entry(acl, type, permset, tag, id);
 	if (ap == NULL) {
 		/* XXX Error XXX */
 		return ARCHIVE_FAILED;
 	}
 	if (name != NULL  &&  *name != L'\0' && len > 0)
 		archive_mstring_copy_wcs_len(&ap->name, name, len);
 	else
 		archive_mstring_clean(&ap->name);
 	return ARCHIVE_OK;
 }
 
 static int
 archive_acl_add_entry_len_l(struct archive_acl *acl,
     int type, int permset, int tag, int id, const char *name, size_t len,
     struct archive_string_conv *sc)
 {
 	struct archive_acl_entry *ap;
 	int r;
 
 	if (acl_special(acl, type, permset, tag) == 0)
 		return ARCHIVE_OK;
 	ap = acl_new_entry(acl, type, permset, tag, id);
 	if (ap == NULL) {
 		/* XXX Error XXX */
 		return ARCHIVE_FAILED;
 	}
 	if (name != NULL  &&  *name != '\0' && len > 0) {
 		r = archive_mstring_copy_mbs_len_l(&ap->name, name, len, sc);
 	} else {
 		r = 0;
 		archive_mstring_clean(&ap->name);
 	}
 	if (r == 0)
 		return (ARCHIVE_OK);
 	else if (errno == ENOMEM)
 		return (ARCHIVE_FATAL);
 	else
 		return (ARCHIVE_WARN);
 }
 
 /*
  * If this ACL entry is part of the standard POSIX permissions set,
  * store the permissions in the stat structure and return zero.
  */
 static int
 acl_special(struct archive_acl *acl, int type, int permset, int tag)
 {
 	if (type == ARCHIVE_ENTRY_ACL_TYPE_ACCESS
 	    && ((permset & ~007) == 0)) {
 		switch (tag) {
 		case ARCHIVE_ENTRY_ACL_USER_OBJ:
 			acl->mode &= ~0700;
 			acl->mode |= (permset & 7) << 6;
 			return (0);
 		case ARCHIVE_ENTRY_ACL_GROUP_OBJ:
 			acl->mode &= ~0070;
 			acl->mode |= (permset & 7) << 3;
 			return (0);
 		case ARCHIVE_ENTRY_ACL_OTHER:
 			acl->mode &= ~0007;
 			acl->mode |= permset & 7;
 			return (0);
 		}
 	}
 	return (1);
 }
 
 /*
  * Allocate and populate a new ACL entry with everything but the
  * name.
  */
 static struct archive_acl_entry *
 acl_new_entry(struct archive_acl *acl,
     int type, int permset, int tag, int id)
 {
 	struct archive_acl_entry *ap, *aq;
 
 	/* Type argument must be a valid NFS4 or POSIX.1e type.
 	 * The type must agree with anything already set and
 	 * the permset must be compatible. */
 	if (type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) {
 		if (acl->acl_types & ~ARCHIVE_ENTRY_ACL_TYPE_NFS4) {
 			return (NULL);
 		}
 		if (permset &
 		    ~(ARCHIVE_ENTRY_ACL_PERMS_NFS4
 			| ARCHIVE_ENTRY_ACL_INHERITANCE_NFS4)) {
 			return (NULL);
 		}
 	} else	if (type & ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) {
 		if (acl->acl_types & ~ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) {
 			return (NULL);
 		}
 		if (permset & ~ARCHIVE_ENTRY_ACL_PERMS_POSIX1E) {
 			return (NULL);
 		}
 	} else {
 		return (NULL);
 	}
 
 	/* Verify the tag is valid and compatible with NFS4 or POSIX.1e. */
 	switch (tag) {
 	case ARCHIVE_ENTRY_ACL_USER:
 	case ARCHIVE_ENTRY_ACL_USER_OBJ:
 	case ARCHIVE_ENTRY_ACL_GROUP:
 	case ARCHIVE_ENTRY_ACL_GROUP_OBJ:
 		/* Tags valid in both NFS4 and POSIX.1e */
 		break;
 	case ARCHIVE_ENTRY_ACL_MASK:
 	case ARCHIVE_ENTRY_ACL_OTHER:
 		/* Tags valid only in POSIX.1e. */
 		if (type & ~ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) {
 			return (NULL);
 		}
 		break;
 	case ARCHIVE_ENTRY_ACL_EVERYONE:
 		/* Tags valid only in NFS4. */
 		if (type & ~ARCHIVE_ENTRY_ACL_TYPE_NFS4) {
 			return (NULL);
 		}
 		break;
 	default:
 		/* No other values are valid. */
 		return (NULL);
 	}
 
 	if (acl->acl_text_w != NULL) {
 		free(acl->acl_text_w);
 		acl->acl_text_w = NULL;
 	}
 	if (acl->acl_text != NULL) {
 		free(acl->acl_text);
 		acl->acl_text = NULL;
 	}
 
 	/* If there's a matching entry already in the list, overwrite it. */
 	ap = acl->acl_head;
 	aq = NULL;
 	while (ap != NULL) {
 		if (ap->type == type && ap->tag == tag && ap->id == id) {
 			ap->permset = permset;
 			return (ap);
 		}
 		aq = ap;
 		ap = ap->next;
 	}
 
 	/* Add a new entry to the end of the list. */
 	ap = (struct archive_acl_entry *)malloc(sizeof(*ap));
 	if (ap == NULL)
 		return (NULL);
 	memset(ap, 0, sizeof(*ap));
 	if (aq == NULL)
 		acl->acl_head = ap;
 	else
 		aq->next = ap;
 	ap->type = type;
 	ap->tag = tag;
 	ap->id = id;
 	ap->permset = permset;
 	acl->acl_types |= type;
 	return (ap);
 }
 
 /*
  * Return a count of entries matching "want_type".
  */
 int
 archive_acl_count(struct archive_acl *acl, int want_type)
 {
 	int count;
 	struct archive_acl_entry *ap;
 
 	count = 0;
 	ap = acl->acl_head;
 	while (ap != NULL) {
 		if ((ap->type & want_type) != 0)
 			count++;
 		ap = ap->next;
 	}
 
 	if (count > 0 && ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0))
 		count += 3;
 	return (count);
 }
 
 /*
  * Prepare for reading entries from the ACL data.  Returns a count
  * of entries matching "want_type", or zero if there are no
  * non-extended ACL entries of that type.
  */
 int
 archive_acl_reset(struct archive_acl *acl, int want_type)
 {
 	int count, cutoff;
 
 	count = archive_acl_count(acl, want_type);
 
 	/*
 	 * If the only entries are the three standard ones,
 	 * then don't return any ACL data.  (In this case,
 	 * client can just use chmod(2) to set permissions.)
 	 */
 	if ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0)
 		cutoff = 3;
 	else
 		cutoff = 0;
 
 	if (count > cutoff)
 		acl->acl_state = ARCHIVE_ENTRY_ACL_USER_OBJ;
 	else
 		acl->acl_state = 0;
 	acl->acl_p = acl->acl_head;
 	return (count);
 }
 
 
 /*
  * Return the next ACL entry in the list.  Fake entries for the
  * standard permissions and include them in the returned list.
  */
 int
 archive_acl_next(struct archive *a, struct archive_acl *acl, int want_type, int *type,
     int *permset, int *tag, int *id, const char **name)
 {
 	*name = NULL;
 	*id = -1;
 
 	/*
 	 * The acl_state is either zero (no entries available), -1
 	 * (reading from list), or an entry type (retrieve that type
 	 * from ae_stat.aest_mode).
 	 */
 	if (acl->acl_state == 0)
 		return (ARCHIVE_WARN);
 
 	/* The first three access entries are special. */
 	if ((want_type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) {
 		switch (acl->acl_state) {
 		case ARCHIVE_ENTRY_ACL_USER_OBJ:
 			*permset = (acl->mode >> 6) & 7;
 			*type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS;
 			*tag = ARCHIVE_ENTRY_ACL_USER_OBJ;
 			acl->acl_state = ARCHIVE_ENTRY_ACL_GROUP_OBJ;
 			return (ARCHIVE_OK);
 		case ARCHIVE_ENTRY_ACL_GROUP_OBJ:
 			*permset = (acl->mode >> 3) & 7;
 			*type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS;
 			*tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ;
 			acl->acl_state = ARCHIVE_ENTRY_ACL_OTHER;
 			return (ARCHIVE_OK);
 		case ARCHIVE_ENTRY_ACL_OTHER:
 			*permset = acl->mode & 7;
 			*type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS;
 			*tag = ARCHIVE_ENTRY_ACL_OTHER;
 			acl->acl_state = -1;
 			acl->acl_p = acl->acl_head;
 			return (ARCHIVE_OK);
 		default:
 			break;
 		}
 	}
 
 	while (acl->acl_p != NULL && (acl->acl_p->type & want_type) == 0)
 		acl->acl_p = acl->acl_p->next;
 	if (acl->acl_p == NULL) {
 		acl->acl_state = 0;
 		*type = 0;
 		*permset = 0;
 		*tag = 0;
 		*id = -1;
 		*name = NULL;
 		return (ARCHIVE_EOF); /* End of ACL entries. */
 	}
 	*type = acl->acl_p->type;
 	*permset = acl->acl_p->permset;
 	*tag = acl->acl_p->tag;
 	*id = acl->acl_p->id;
 	if (archive_mstring_get_mbs(a, &acl->acl_p->name, name) != 0) {
 		if (errno == ENOMEM)
 			return (ARCHIVE_FATAL);
 		*name = NULL;
 	}
 	acl->acl_p = acl->acl_p->next;
 	return (ARCHIVE_OK);
 }
 
 /*
  * Generate a text version of the ACL.  The flags parameter controls
  * the style of the generated ACL.
  */
 const wchar_t *
 archive_acl_text_w(struct archive *a, struct archive_acl *acl, int flags)
 {
 	int count;
 	size_t length;
 	const wchar_t *wname;
 	const wchar_t *prefix;
 	wchar_t separator;
 	struct archive_acl_entry *ap;
 	int id, r;
 	wchar_t *wp;
 
-	if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) &&
-	    (flags & (ARCHIVE_ENTRY_ACL_TYPE_ACCESS | ARCHIVE_ENTRY_ACL_TYPE_DEFAULT))) {
-		/* cannot convert NFSv4 ACLs and POSIX1e ACLs at the same time */	
-		return (NULL);
-	}
-	if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) && (flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT)) {
-		/* cannot have access and default at the same time */
-		return (NULL);
-	}
-
 	if (acl->acl_text_w != NULL) {
 		free (acl->acl_text_w);
 		acl->acl_text_w = NULL;
 	}
 
 	separator = L',';
 	count = 0;
 	length = 0;
 	ap = acl->acl_head;
 	while (ap != NULL) {
 		if ((ap->type & flags) != 0) {
 			count++;
 			if ((flags & ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT) &&
 			    (ap->type & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT))
 				length += 8; /* "default:" */
-			switch (ap->tag) {
-			case ARCHIVE_ENTRY_ACL_USER_OBJ:
-				if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) {
-					length += 6; /* "owner@" */
-					break;
-				}
-				/* FALLTHROUGH */
-			case ARCHIVE_ENTRY_ACL_USER:
-				length += 4; /* "user" */
-				break;
-			case ARCHIVE_ENTRY_ACL_GROUP_OBJ:
-				if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) {
-					length += 6; /* "group@" */
-					break;
-				}
-				/* FALLTHROUGH */
-			case ARCHIVE_ENTRY_ACL_GROUP:
-			case ARCHIVE_ENTRY_ACL_OTHER:
-				length += 5; /* "group", "other" */
-				break;
-			case ARCHIVE_ENTRY_ACL_EVERYONE:
-				length += 9; /* "everyone@" */ 
-				break;
-			}
+			length += 5; /* tag name */
 			length += 1; /* colon */
-			if (((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) == 0) ||
-			    ap->tag == ARCHIVE_ENTRY_ACL_USER ||
-			    ap->tag == ARCHIVE_ENTRY_ACL_GROUP) {
-				r = archive_mstring_get_wcs(a, &ap->name, &wname);
-				if (r == 0 && wname != NULL)
-					length += wcslen(wname);
-				else if (r < 0 && errno == ENOMEM)
-					return (NULL);
-				else
-					length += sizeof(uid_t) * 3 + 1;
-				length += 1; /* colon */
-			}
-			if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0)
-				length += 14; /* rwxpdDaARWcCos */
+			r = archive_mstring_get_wcs(a, &ap->name, &wname);
+			if (r == 0 && wname != NULL)
+				length += wcslen(wname);
+			else if (r < 0 && errno == ENOMEM)
+				return (NULL);
 			else
-				length += 3; /* rwx */
+				length += sizeof(uid_t) * 3 + 1;
+			length ++; /* colon */
+			length += 3; /* rwx */
 			length += 1; /* colon */
-			if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) {
-				length += 7; /* fdinSFI */
-				length += 1; /* colon */
-				if ((ap->type & ARCHIVE_ENTRY_ACL_TYPE_DENY) != 0)
-					length += 4; /* deny */
-				else
-					length += 5; /* allow, alarm, audit */
-				length += 1; /* colon */
-			}
-			length += 1; /* colon */
 			length += max(sizeof(uid_t), sizeof(gid_t)) * 3 + 1;
 			length ++; /* newline */
 		}
 		ap = ap->next;
 	}
 
-	if (count == 0)
-		return (NULL);
-
-	if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) {
+	if (count > 0 && ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0)) {
 		length += 10; /* "user::rwx\n" */
 		length += 11; /* "group::rwx\n" */
 		length += 11; /* "other::rwx\n" */
 	}
 
+	if (count == 0)
+		return (NULL);
+
 	/* Now, allocate the string and actually populate it. */
 	wp = acl->acl_text_w = (wchar_t *)malloc(length * sizeof(wchar_t));
 	if (wp == NULL)
 		return (NULL);
 	count = 0;
-
 	if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) {
-		append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS,
-		    ARCHIVE_ENTRY_ACL_USER_OBJ, NULL, acl->mode & 0700, -1);
+		append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_USER_OBJ, NULL,
+		    acl->mode & 0700, -1);
 		*wp++ = ',';
-		append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS,
-		    ARCHIVE_ENTRY_ACL_GROUP_OBJ, NULL, acl->mode & 0070, -1);
+		append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_GROUP_OBJ, NULL,
+		    acl->mode & 0070, -1);
 		*wp++ = ',';
-		append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS,
-		    ARCHIVE_ENTRY_ACL_OTHER, NULL, acl->mode & 0007, -1);
+		append_entry_w(&wp, NULL, ARCHIVE_ENTRY_ACL_OTHER, NULL,
+		    acl->mode & 0007, -1);
 		count += 3;
-	}
 
-	if ((flags & (ARCHIVE_ENTRY_ACL_TYPE_ACCESS |
-	    ARCHIVE_ENTRY_ACL_TYPE_NFS4)) != 0) {
 		ap = acl->acl_head;
 		while (ap != NULL) {
-			if ((ap->type & (ARCHIVE_ENTRY_ACL_TYPE_ACCESS |
-			    ARCHIVE_ENTRY_ACL_TYPE_NFS4)) != 0) {
+			if ((ap->type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) {
 				r = archive_mstring_get_wcs(a, &ap->name, &wname);
 				if (r == 0) {
 					*wp++ = separator;
 					if (flags & ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID)
 						id = ap->id;
 					else
 						id = -1;
-					append_entry_w(&wp, NULL, ap->type, ap->tag,
-					    wname, ap->permset, id);
+					append_entry_w(&wp, NULL, ap->tag, wname,
+					    ap->permset, id);
 					count++;
 				} else if (r < 0 && errno == ENOMEM)
 					return (NULL);
 			}
 			ap = ap->next;
 		}
 	}
 
+
 	if ((flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) != 0) {
 		if (flags & ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT)
 			prefix = L"default:";
 		else
 			prefix = NULL;
 		ap = acl->acl_head;
 		count = 0;
 		while (ap != NULL) {
 			if ((ap->type & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) != 0) {
 				r = archive_mstring_get_wcs(a, &ap->name, &wname);
 				if (r == 0) {
 					if (count > 0)
 						*wp++ = separator;
 					if (flags & ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID)
 						id = ap->id;
 					else
 						id = -1;
-					append_entry_w(&wp, prefix, ap->type,
-					    ap->tag, wname, ap->permset, id);
+					append_entry_w(&wp, prefix, ap->tag,
+					    wname, ap->permset, id);
 					count ++;
 				} else if (r < 0 && errno == ENOMEM)
 					return (NULL);
 			}
 			ap = ap->next;
 		}
 	}
 
 	return (acl->acl_text_w);
 }
 
 
 static void
 append_id_w(wchar_t **wp, int id)
 {
 	if (id < 0)
 		id = 0;
 	if (id > 9)
 		append_id_w(wp, id / 10);
 	*(*wp)++ = L"0123456789"[id % 10];
 }
 
 static void
-append_entry_w(wchar_t **wp, const wchar_t *prefix, int type,
-    int tag, const wchar_t *wname, int perm, int id)
+append_entry_w(wchar_t **wp, const wchar_t *prefix, int tag,
+    const wchar_t *wname, int perm, int id)
 {
 	if (prefix != NULL) {
 		wcscpy(*wp, prefix);
 		*wp += wcslen(*wp);
 	}
 	switch (tag) {
 	case ARCHIVE_ENTRY_ACL_USER_OBJ:
 		wname = NULL;
 		id = -1;
-		if (type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) {
-			wcscpy(*wp, L"owner@");
-			break;
-		}
 		/* FALLTHROUGH */
-		/* FALLTHROUGH */
 	case ARCHIVE_ENTRY_ACL_USER:
 		wcscpy(*wp, L"user");
 		break;
 	case ARCHIVE_ENTRY_ACL_GROUP_OBJ:
 		wname = NULL;
 		id = -1;
 		/* FALLTHROUGH */
 	case ARCHIVE_ENTRY_ACL_GROUP:
 		wcscpy(*wp, L"group");
 		break;
 	case ARCHIVE_ENTRY_ACL_MASK:
 		wcscpy(*wp, L"mask");
 		wname = NULL;
 		id = -1;
 		break;
 	case ARCHIVE_ENTRY_ACL_OTHER:
 		wcscpy(*wp, L"other");
 		wname = NULL;
 		id = -1;
 		break;
 	}
 	*wp += wcslen(*wp);
 	*(*wp)++ = L':';
-	if ((type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) == 0 ||
-	    tag == ARCHIVE_ENTRY_ACL_USER ||
-	    tag == ARCHIVE_ENTRY_ACL_GROUP) {
-		if (wname != NULL) {
-			wcscpy(*wp, wname);
-			*wp += wcslen(*wp);
-		} else if (tag == ARCHIVE_ENTRY_ACL_USER
-		    || tag == ARCHIVE_ENTRY_ACL_GROUP) {
-			append_id_w(wp, id);
-			id = -1;
-		}
-		*(*wp)++ = L':';
-	}
-	*(*wp)++ = (perm & (ARCHIVE_ENTRY_ACL_READ |
-	    ARCHIVE_ENTRY_ACL_READ_DATA |
-	    ARCHIVE_ENTRY_ACL_LIST_DIRECTORY)) ? L'r' : L'-';
-	*(*wp)++ = (perm & (ARCHIVE_ENTRY_ACL_WRITE |
-	    ARCHIVE_ENTRY_ACL_WRITE_DATA |
-	    ARCHIVE_ENTRY_ACL_ADD_FILE)) ? L'w' : L'-';
-	*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_EXECUTE) ? L'x' : L'-';
-	if (type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) {
-		*(*wp)++ = (perm & (ARCHIVE_ENTRY_ACL_APPEND_DATA | ARCHIVE_ENTRY_ACL_ADD_SUBDIRECTORY)) ? L'p' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_DELETE) ? L'd' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_DELETE_CHILD) ? L'D' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES) ? L'a' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES) ? L'A' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS) ? L'R' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS) ? L'W' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_READ_ACL) ? L'c' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_ACL) ? L'C' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_OWNER) ? L'o' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_SYNCHRONIZE) ? L's' : L'-';
-		*(*wp)++ = L':';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT) ? L'f' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT) ? L'd' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY) ? L'i' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT) ? L'n' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS) ? L'S' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS) ? L'F' : L'-';
-		*(*wp)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_INHERITED) ? L'I' : L'-';
-		*(*wp)++ = L':';
-		if (type & ARCHIVE_ENTRY_ACL_TYPE_ALLOW)
-			wcscpy(*wp, L"allow");
-		else if (type & ARCHIVE_ENTRY_ACL_TYPE_DENY)
-			wcscpy(*wp, L"deny");
-		else if (type & ARCHIVE_ENTRY_ACL_TYPE_AUDIT)
-			wcscpy(*wp, L"audit");
-		else if (type & ARCHIVE_ENTRY_ACL_TYPE_ALARM)
-			wcscpy(*wp, L"alarm");
+	if (wname != NULL) {
+		wcscpy(*wp, wname);
 		*wp += wcslen(*wp);
+	} else if (tag == ARCHIVE_ENTRY_ACL_USER
+	    || tag == ARCHIVE_ENTRY_ACL_GROUP) {
+		append_id_w(wp, id);
+		id = -1;
 	}
+	*(*wp)++ = L':';
+	*(*wp)++ = (perm & 0444) ? L'r' : L'-';
+	*(*wp)++ = (perm & 0222) ? L'w' : L'-';
+	*(*wp)++ = (perm & 0111) ? L'x' : L'-';
 	if (id != -1) {
 		*(*wp)++ = L':';
 		append_id_w(wp, id);
 	}
 	**wp = L'\0';
 }
 
 int
 archive_acl_text_l(struct archive_acl *acl, int flags,
     const char **acl_text, size_t *acl_text_len,
     struct archive_string_conv *sc)
 {
 	int count;
 	size_t length;
 	const char *name;
 	const char *prefix;
 	char separator;
 	struct archive_acl_entry *ap;
 	size_t len;
 	int id, r;
 	char *p;
 
-	if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) &&
-	    (flags & (ARCHIVE_ENTRY_ACL_TYPE_ACCESS | ARCHIVE_ENTRY_ACL_TYPE_DEFAULT))) {
-		/* cannot convert NFSv4 ACLs and POSIX1e ACLs at the same time */	
-		return (-1);
-	}
-	if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) && (flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT)) {
-		/* cannot have access and default at the same time */
-		return (-1);
-	}
-
 	if (acl->acl_text != NULL) {
 		free (acl->acl_text);
 		acl->acl_text = NULL;
 	}
 
 	*acl_text = NULL;
 	if (acl_text_len != NULL)
 		*acl_text_len = 0;
 	separator = ',';
 	count = 0;
 	length = 0;
 	ap = acl->acl_head;
 	while (ap != NULL) {
 		if ((ap->type & flags) != 0) {
 			count++;
 			if ((flags & ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT) &&
 			    (ap->type & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT))
 				length += 8; /* "default:" */
-			switch (ap->tag) {
-			case ARCHIVE_ENTRY_ACL_USER_OBJ:
-				if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) {
-					length += 6; /* "owner@" */
-					break;
-				}
-				/* FALLTHROUGH */
-			case ARCHIVE_ENTRY_ACL_USER:
-				length += 4; /* "user" */
-				break;
-			case ARCHIVE_ENTRY_ACL_GROUP_OBJ:
-				if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) {
-					length += 6; /* "group@" */
-					break;
-				}
-				/* FALLTHROUGH */
-			case ARCHIVE_ENTRY_ACL_GROUP:
-			case ARCHIVE_ENTRY_ACL_OTHER:
-				length += 5; /* "group", "other" */
-				break;
-			case ARCHIVE_ENTRY_ACL_EVERYONE:
-				length += 9; /* "everyone@" */ 
-				break;
-			}
-
+			length += 5; /* tag name */
 			length += 1; /* colon */
-			if (((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) == 0) ||
-			    ap->tag == ARCHIVE_ENTRY_ACL_USER ||
-			    ap->tag == ARCHIVE_ENTRY_ACL_GROUP) {
-				r = archive_mstring_get_mbs_l(
-				    &ap->name, &name, &len, sc);
-				if (r != 0)
-					return (-1);
-				if (len > 0 && name != NULL)
-					length += len;
-				else
-					length += sizeof(uid_t) * 3 + 1;
-				length += 1; /* colon */
-			}
-			if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0)
-				length += 14; /* rwxpdDaARWcCos */
+			r = archive_mstring_get_mbs_l(
+			    &ap->name, &name, &len, sc);
+			if (r != 0)
+				return (-1);
+			if (len > 0 && name != NULL)
+				length += len;
 			else
-				length += 3; /* rwx */
+				length += sizeof(uid_t) * 3 + 1;
+			length ++; /* colon */
+			length += 3; /* rwx */
 			length += 1; /* colon */
-			if ((flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) {
-				length += 7; /* fdinSFI */
-				length += 1; /* colon */
-				if ((flags & ARCHIVE_ENTRY_ACL_TYPE_DENY) != 0)
-					length += 4; /* deny */
-				else
-					length += 5; /* allow, alarm, audit */
-				length += 1; /* colon */
-			}
-	
 			length += max(sizeof(uid_t), sizeof(gid_t)) * 3 + 1;
 			length ++; /* newline */
 		}
 		ap = ap->next;
 	}
 
-	if (count == 0)
-		return (0);
-
-	if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) {
+	if (count > 0 && ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0)) {
 		length += 10; /* "user::rwx\n" */
 		length += 11; /* "group::rwx\n" */
 		length += 11; /* "other::rwx\n" */
 	}
 
+	if (count == 0)
+		return (0);
+
 	/* Now, allocate the string and actually populate it. */
 	p = acl->acl_text = (char *)malloc(length);
 	if (p == NULL)
 		return (-1);
 	count = 0;
 	if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0) {
-		append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS,
-		    ARCHIVE_ENTRY_ACL_USER_OBJ, NULL, acl->mode & 0700, -1);
+		append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_USER_OBJ, NULL,
+		    acl->mode & 0700, -1);
 		*p++ = ',';
-		append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS,
-		    ARCHIVE_ENTRY_ACL_GROUP_OBJ, NULL, acl->mode & 0070, -1);
+		append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_GROUP_OBJ, NULL,
+		    acl->mode & 0070, -1);
 		*p++ = ',';
-		append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_TYPE_ACCESS,
-		    ARCHIVE_ENTRY_ACL_OTHER, NULL, acl->mode & 0007, -1);
+		append_entry(&p, NULL, ARCHIVE_ENTRY_ACL_OTHER, NULL,
+		    acl->mode & 0007, -1);
 		count += 3;
-	}
 
-	if ((flags & (ARCHIVE_ENTRY_ACL_TYPE_ACCESS |
-	    ARCHIVE_ENTRY_ACL_TYPE_NFS4)) != 0) {
 		for (ap = acl->acl_head; ap != NULL; ap = ap->next) {
-			if ((ap->type & (ARCHIVE_ENTRY_ACL_TYPE_ACCESS |
-			    ARCHIVE_ENTRY_ACL_TYPE_NFS4)) == 0)
+			if ((ap->type & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) == 0)
 				continue;
 			r = archive_mstring_get_mbs_l(
 			    &ap->name, &name, &len, sc);
 			if (r != 0)
 				return (-1);
-			if (count > 0)
-				*p++ = separator;
+			*p++ = separator;
 			if (flags & ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID)
 				id = ap->id;
 			else
 				id = -1;
-			append_entry(&p, NULL, ap->type, ap->tag, name,
+			append_entry(&p, NULL, ap->tag, name,
 			    ap->permset, id);
 			count++;
 		}
 	}
 
 
 	if ((flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) != 0) {
 		if (flags & ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT)
 			prefix = "default:";
 		else
 			prefix = NULL;
 		count = 0;
 		for (ap = acl->acl_head; ap != NULL; ap = ap->next) {
 			if ((ap->type & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) == 0)
 				continue;
 			r = archive_mstring_get_mbs_l(
 			    &ap->name, &name, &len, sc);
 			if (r != 0)
 				return (-1);
 			if (count > 0)
 				*p++ = separator;
 			if (flags & ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID)
 				id = ap->id;
 			else
 				id = -1;
-			append_entry(&p, prefix, ap->type, ap->tag,
+			append_entry(&p, prefix, ap->tag,
 			    name, ap->permset, id);
 			count ++;
 		}
 	}
 
 	*acl_text = acl->acl_text;
 	if (acl_text_len != NULL)
 		*acl_text_len = strlen(acl->acl_text);
 	return (0);
 }
 
 static void
 append_id(char **p, int id)
 {
 	if (id < 0)
 		id = 0;
 	if (id > 9)
 		append_id(p, id / 10);
 	*(*p)++ = "0123456789"[id % 10];
 }
 
 static void
-append_entry(char **p, const char *prefix, int type,
-    int tag, const char *name, int perm, int id)
+append_entry(char **p, const char *prefix, int tag,
+    const char *name, int perm, int id)
 {
 	if (prefix != NULL) {
 		strcpy(*p, prefix);
 		*p += strlen(*p);
 	}
 	switch (tag) {
 	case ARCHIVE_ENTRY_ACL_USER_OBJ:
 		name = NULL;
 		id = -1;
-		if (type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) {
-			strcpy(*p, "owner@");
-			break;
-		}
 		/* FALLTHROUGH */
 	case ARCHIVE_ENTRY_ACL_USER:
 		strcpy(*p, "user");
 		break;
 	case ARCHIVE_ENTRY_ACL_GROUP_OBJ:
 		name = NULL;
 		id = -1;
-		if (type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) {
-			strcpy(*p, "group@");
-			break;
-		}
 		/* FALLTHROUGH */
 	case ARCHIVE_ENTRY_ACL_GROUP:
 		strcpy(*p, "group");
 		break;
 	case ARCHIVE_ENTRY_ACL_MASK:
 		strcpy(*p, "mask");
 		name = NULL;
 		id = -1;
 		break;
 	case ARCHIVE_ENTRY_ACL_OTHER:
 		strcpy(*p, "other");
 		name = NULL;
 		id = -1;
 		break;
-	case ARCHIVE_ENTRY_ACL_EVERYONE:
-		strcpy(*p, "everyone@");
-		name = NULL;
-		id = -1;
-		break;
 	}
 	*p += strlen(*p);
 	*(*p)++ = ':';
-	if ((type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) == 0 ||
-	    tag == ARCHIVE_ENTRY_ACL_USER ||
-	    tag == ARCHIVE_ENTRY_ACL_GROUP) {
-		if (name != NULL) {
-			strcpy(*p, name);
-			*p += strlen(*p);
-		} else if (tag == ARCHIVE_ENTRY_ACL_USER
-		    || tag == ARCHIVE_ENTRY_ACL_GROUP) {
-			append_id(p, id);
-			id = -1;
-		}
-		*(*p)++ = ':';
-	}
-	*(*p)++ = (perm & (ARCHIVE_ENTRY_ACL_READ |
-	    ARCHIVE_ENTRY_ACL_READ_DATA |
-	    ARCHIVE_ENTRY_ACL_LIST_DIRECTORY)) ? 'r' : '-';
-	*(*p)++ = (perm & (ARCHIVE_ENTRY_ACL_WRITE |
-	    ARCHIVE_ENTRY_ACL_WRITE_DATA |
-	    ARCHIVE_ENTRY_ACL_ADD_FILE)) ? 'w' : '-';
-	*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_EXECUTE) ? 'x' : '-';
-	if (type & ARCHIVE_ENTRY_ACL_TYPE_NFS4) {
-		*(*p)++ = (perm & (ARCHIVE_ENTRY_ACL_APPEND_DATA | ARCHIVE_ENTRY_ACL_ADD_SUBDIRECTORY)) ? 'p' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_DELETE) ? 'd' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_DELETE_CHILD) ? 'D' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES) ? 'a' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES) ? 'A' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS) ? 'R' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS) ? 'W' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_READ_ACL) ? 'c' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_ACL) ? 'C' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_WRITE_OWNER) ? 'o' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_SYNCHRONIZE) ? 's' : '-';
-		*(*p)++ = ':';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT) ? 'f' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT) ? 'd' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY) ? 'i' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT) ? 'n' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS) ? 'S' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS) ? 'F' : '-';
-		*(*p)++ = (perm & ARCHIVE_ENTRY_ACL_ENTRY_INHERITED) ? 'I' : '-'; 
-		*(*p)++ = ':';
-		if (type & ARCHIVE_ENTRY_ACL_TYPE_ALLOW)
-			strcpy(*p, "allow");
-		else if (type & ARCHIVE_ENTRY_ACL_TYPE_DENY)
-			strcpy(*p, "deny");
-		else if (type & ARCHIVE_ENTRY_ACL_TYPE_AUDIT)
-			strcpy(*p, "audit");
-		else if (type & ARCHIVE_ENTRY_ACL_TYPE_ALARM)
-			strcpy(*p, "alarm");
+	if (name != NULL) {
+		strcpy(*p, name);
 		*p += strlen(*p);
+	} else if (tag == ARCHIVE_ENTRY_ACL_USER
+	    || tag == ARCHIVE_ENTRY_ACL_GROUP) {
+		append_id(p, id);
+		id = -1;
 	}
+	*(*p)++ = ':';
+	*(*p)++ = (perm & 0444) ? 'r' : '-';
+	*(*p)++ = (perm & 0222) ? 'w' : '-';
+	*(*p)++ = (perm & 0111) ? 'x' : '-';
 	if (id != -1) {
 		*(*p)++ = ':';
 		append_id(p, id);
 	}
 	**p = '\0';
 }
 
 /*
  * Parse a textual ACL.  This automatically recognizes and supports
  * extensions described above.  The 'type' argument is used to
  * indicate the type that should be used for any entries not
  * explicitly marked as "default:".
  */
 int
 archive_acl_parse_w(struct archive_acl *acl,
     const wchar_t *text, int default_type)
 {
 	struct {
 		const wchar_t *start;
 		const wchar_t *end;
-	} field[6], name;
+	} field[4], name;
 
-	int numfields, fields, n;
+	int fields, n;
 	int type, tag, permset, id;
-	int offset;
 	wchar_t sep;
 
-	if (default_type == ARCHIVE_ENTRY_ACL_TYPE_NFS4)
-		numfields = 6;
-	else
-		numfields = 4;	
-
-
 	while (text != NULL  &&  *text != L'\0') {
 		/*
 		 * Parse the fields out of the next entry,
 		 * advance 'text' to start of next entry.
 		 */
 		fields = 0;
 		do {
 			const wchar_t *start, *end;
 			next_field_w(&text, &start, &end, &sep);
-			if (fields < numfields) {
+			if (fields < 4) {
 				field[fields].start = start;
 				field[fields].end = end;
 			}
 			++fields;
 		} while (sep == L':');
 
 		/* Set remaining fields to blank. */
-		for (n = fields; n < numfields; ++n)
+		for (n = fields; n < 4; ++n)
 			field[n].start = field[n].end = NULL;
 
-		if (default_type != ARCHIVE_ENTRY_ACL_TYPE_NFS4) {
-			/* POSIX.1e ACLs */
-			/* Check for a numeric ID in field 1 or 3. */
-			id = -1;
-			isint_w(field[1].start, field[1].end, &id);
-			/* Field 3 is optional. */
-			if (id == -1 && fields > 3)
-				isint_w(field[3].start, field[3].end, &id);
+		/* Check for a numeric ID in field 1 or 3. */
+		id = -1;
+		isint_w(field[1].start, field[1].end, &id);
+		/* Field 3 is optional. */
+		if (id == -1 && fields > 3)
+			isint_w(field[3].start, field[3].end, &id);
 
-			/*
-			 * Solaris extension:  "defaultuser::rwx" is the
-			 * default ACL corresponding to "user::rwx", etc.
-			 */
-			if (field[0].end - field[0].start > 7
-			    && wmemcmp(field[0].start, L"default", 7) == 0) {
-				type = ARCHIVE_ENTRY_ACL_TYPE_DEFAULT;
-				field[0].start += 7;
-			} else
-				type = default_type;
+		/*
+		 * Solaris extension:  "defaultuser::rwx" is the
+		 * default ACL corresponding to "user::rwx", etc.
+		 */
+		if (field[0].end - field[0].start > 7
+		    && wmemcmp(field[0].start, L"default", 7) == 0) {
+			type = ARCHIVE_ENTRY_ACL_TYPE_DEFAULT;
+			field[0].start += 7;
+		} else
+			type = default_type;
 
-			name.start = name.end = NULL;
-			if (prefix_w(field[0].start, field[0].end, L"user")) {
-				if (!ismode_w(field[2].start, field[2].end,
-				    &permset))
+		name.start = name.end = NULL;
+		if (prefix_w(field[0].start, field[0].end, L"user")) {
+			if (!ismode_w(field[2].start, field[2].end, &permset))
 				return (ARCHIVE_WARN);
-				if (id != -1 || field[1].start < field[1].end) {
-					tag = ARCHIVE_ENTRY_ACL_USER;
-					name = field[1];
-				} else
-					tag = ARCHIVE_ENTRY_ACL_USER_OBJ;
-			} else if (prefix_w(field[0].start, field[0].end,
-			    L"group")) {
-				if (!ismode_w(field[2].start, field[2].end,
-				    &permset))
-					return (ARCHIVE_WARN);
-				if (id != -1 || field[1].start < field[1].end) {
-					tag = ARCHIVE_ENTRY_ACL_GROUP;
-					name = field[1];
-				} else
-					tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ;
-			} else if (prefix_w(field[0].start, field[0].end,
-			    L"other")) {
-				if (fields == 2
-				    && field[1].start < field[1].end
-				    && ismode_w(field[1].start, field[1].end,
-				    &permset)) {
-					/* This is Solaris-style "other:rwx" */
-				} else if (fields == 3
-				    && field[1].start == field[1].end
-				    && field[2].start < field[2].end
-				    && ismode_w(field[2].start, field[2].end,
-				    &permset)) {
-					/* This is FreeBSD-style "other::rwx" */
-				} else
-					return (ARCHIVE_WARN);
-				tag = ARCHIVE_ENTRY_ACL_OTHER;
-			} else if (prefix_w(field[0].start, field[0].end,
-			    L"mask")) {
-				if (fields == 2
-				    && field[1].start < field[1].end
-				    && ismode_w(field[1].start, field[1].end,
-				    &permset)) {
-					/* This is Solaris-style "mask:rwx" */
-				} else if (fields == 3
-				    && field[1].start == field[1].end
-				    && field[2].start < field[2].end
-				    && ismode_w(field[2].start, field[2].end,
-				    &permset)) {
-					/* This is FreeBSD-style "mask::rwx" */
-				} else
-					return (ARCHIVE_WARN);
-				tag = ARCHIVE_ENTRY_ACL_MASK;
+			if (id != -1 || field[1].start < field[1].end) {
+				tag = ARCHIVE_ENTRY_ACL_USER;
+				name = field[1];
 			} else
+				tag = ARCHIVE_ENTRY_ACL_USER_OBJ;
+		} else if (prefix_w(field[0].start, field[0].end, L"group")) {
+			if (!ismode_w(field[2].start, field[2].end, &permset))
 				return (ARCHIVE_WARN);
-		} else {
-			/* NFSv4 ACLs */
-			if (wcsncmp(field[0].start, L"user",
-			    field[0].end - field[0].start) == 0)
-				tag = ARCHIVE_ENTRY_ACL_USER;
-			else if (wcsncmp(field[0].start, L"group",
-			    field[0].end - field[0].start) == 0)
+			if (id != -1 || field[1].start < field[1].end) {
 				tag = ARCHIVE_ENTRY_ACL_GROUP;
-			else if (wcsncmp(field[0].start, L"owner@",
-			    field[0].end - field[0].start) == 0)
-				tag = ARCHIVE_ENTRY_ACL_USER_OBJ;
-			else if (wcsncmp(field[0].start, L"group@",
-			    field[0].end - field[0].start) == 0)
+				name = field[1];
+			} else
 				tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ;
-			else if (wcsncmp(field[0].start, L"everyone@",
-			    field[0].end - field[0].start) == 0)
-				tag = ARCHIVE_ENTRY_ACL_EVERYONE;
-			else {
-				/* Unknown entry */
+		} else if (prefix_w(field[0].start, field[0].end, L"other")) {
+			if (fields == 2
+			    && field[1].start < field[1].end
+			    && ismode_w(field[1].start, field[1].end, &permset)) {
+				/* This is Solaris-style "other:rwx" */
+			} else if (fields == 3
+			    && field[1].start == field[1].end
+			    && field[2].start < field[2].end
+			    && ismode_w(field[2].start, field[2].end, &permset)) {
+				/* This is FreeBSD-style "other::rwx" */
+			} else
 				return (ARCHIVE_WARN);
-			}
-
-			permset = 0;
-			name.start = name.end = NULL;
-
-			if (tag == ARCHIVE_ENTRY_ACL_USER ||
-			    tag == ARCHIVE_ENTRY_ACL_GROUP) {
-				offset = 1;	
-				name = field[1];
+			tag = ARCHIVE_ENTRY_ACL_OTHER;
+		} else if (prefix_w(field[0].start, field[0].end, L"mask")) {
+			if (fields == 2
+			    && field[1].start < field[1].end
+			    && ismode_w(field[1].start, field[1].end, &permset)) {
+				/* This is Solaris-style "mask:rwx" */
+			} else if (fields == 3
+			    && field[1].start == field[1].end
+			    && field[2].start < field[2].end
+			    && ismode_w(field[2].start, field[2].end, &permset)) {
+				/* This is FreeBSD-style "mask::rwx" */
 			} else
-				offset = 0;
-			
-			if (parse_nfs4_perms_w(field[1 + offset].start, 
-			    field[1 + offset].end, &permset) != 0) {
-				/* NFS4 perms are invalid */
 				return (ARCHIVE_WARN);
-			}
-			if (parse_nfs4_flags_w(field[2 + offset].start,
-			    field[2 + offset].end, &permset) != 0) {
-				/* NFS4 flags are invalid */
-				return (ARCHIVE_WARN);
-			}
-			if (wcsncmp(field[3 + offset].start, L"allow",
-			    field[3 + offset].end - field[3 + offset].start)
-			    == 0)
-				type = ARCHIVE_ENTRY_ACL_TYPE_ALLOW; 
-			else if (wcsncmp(field[3 + offset].start, L"deny",
-			    field[3 + offset].end - field[3 + offset].start)
-			    == 0)
-				type = ARCHIVE_ENTRY_ACL_TYPE_DENY;
-			else if (wcsncmp(field[3 + offset].start, L"audit",
-			    field[3 + offset].end - field[3 + offset].start)
-			    == 0)
-				type = ARCHIVE_ENTRY_ACL_TYPE_AUDIT;
-			else if (wcsncmp(field[3 + offset].start, L"alarm",
-			    field[3 + offset].end - field[3 + offset].start)
-			    == 0)
-				type = ARCHIVE_ENTRY_ACL_TYPE_ALARM;
-			else {
-				/* Unknown type */
-				return (ARCHIVE_WARN);
-			}
-			isint_w(field[4 + offset].start, field[4 + offset].end,
-			    &id);
-		}
+			tag = ARCHIVE_ENTRY_ACL_MASK;
+		} else
+			return (ARCHIVE_WARN);
 
 		/* Add entry to the internal list. */
 		archive_acl_add_entry_w_len(acl, type, permset,
 		    tag, id, name.start, name.end - name.start);
 	}
 	return (ARCHIVE_OK);
 }
 
 /*
  * Parse a string to a positive decimal integer.  Returns true if
  * the string is non-empty and consists only of decimal digits,
  * false otherwise.
  */
 static int
 isint_w(const wchar_t *start, const wchar_t *end, int *result)
 {
 	int n = 0;
 	if (start >= end)
 		return (0);
 	while (start < end) {
 		if (*start < '0' || *start > '9')
 			return (0);
 		if (n > (INT_MAX / 10) ||
 		    (n == INT_MAX / 10 && (*start - '0') > INT_MAX % 10)) {
 			n = INT_MAX;
 		} else {
 			n *= 10;
 			n += *start - '0';
 		}
 		start++;
 	}
 	*result = n;
 	return (1);
 }
 
 /*
  * Parse a string as a mode field.  Returns true if
  * the string is non-empty and consists only of mode characters,
  * false otherwise.
  */
 static int
 ismode_w(const wchar_t *start, const wchar_t *end, int *permset)
 {
 	const wchar_t *p;
 
 	if (start >= end)
 		return (0);
 	p = start;
 	*permset = 0;
 	while (p < end) {
 		switch (*p++) {
 		case 'r': case 'R':
 			*permset |= ARCHIVE_ENTRY_ACL_READ;
 			break;
 		case 'w': case 'W':
 			*permset |= ARCHIVE_ENTRY_ACL_WRITE;
 			break;
 		case 'x': case 'X':
 			*permset |= ARCHIVE_ENTRY_ACL_EXECUTE;
 			break;
 		case '-':
 			break;
 		default:
 			return (0);
 		}
 	}
 	return (1);
 }
 
-/* Parse a wstring as a strict NFSv4 ACL permission field. */
-static int
-parse_nfs4_perms_w(const wchar_t *start, const wchar_t *end, int *permset)
-{
-	const wchar_t *p;
-	int pos;
-	const wchar_t *letter = L"rwxpdDaARWcCos";
-	const int perms[14] = {
-	    ARCHIVE_ENTRY_ACL_READ_DATA,
-	    ARCHIVE_ENTRY_ACL_WRITE_DATA,
-	    ARCHIVE_ENTRY_ACL_EXECUTE,
-	    ARCHIVE_ENTRY_ACL_APPEND_DATA,
-	    ARCHIVE_ENTRY_ACL_DELETE,
-	    ARCHIVE_ENTRY_ACL_DELETE_CHILD,
-	    ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES,
-	    ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES,
-	    ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS,
-	    ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS,
-	    ARCHIVE_ENTRY_ACL_READ_ACL,
-	    ARCHIVE_ENTRY_ACL_WRITE_ACL,
-	    ARCHIVE_ENTRY_ACL_WRITE_OWNER,
-	    ARCHIVE_ENTRY_ACL_SYNCHRONIZE
-	};
-
-	if (start >= end)
-		return (0);
-	p = start;
-	pos = 0;
-	while (p < end && pos < 14) {
-		if (*p == letter[pos])
-			*permset |= perms[pos];
-		else if (*p != '-')
-			return (-1);
-		p = p + sizeof(wchar_t);
-		pos++;
-	}
-	return (0);
-}
-
-/* Parse a string as a strict NFSv4 ACL flags field. */
-static int
-parse_nfs4_flags_w(const wchar_t *start, const wchar_t *end, int *permset)
-{
-	const wchar_t *p;
-	int pos;
-	const wchar_t *letter = L"fdinSFI";
-	const int perms[7] = {
-	    ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT,
-	    ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT,
-	    ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY,
-	    ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT,
-	    ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS,
-	    ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS,
-	    ARCHIVE_ENTRY_ACL_ENTRY_INHERITED
-	};
-
-	if (start >= end)
-		return (0);
-	p = start;
-	pos = 0;
-	while (p < end && pos < 7) {
-		if (*p == letter[pos])
-			*permset |= perms[pos];
-		else if (*p != '-')
-			return (-1);
-		p = p + sizeof(wchar_t);
-		pos++;
-	}
-	return (0);
-}
-
-
 /*
  * Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]".  *wp is updated
  * to point to just after the separator.  *start points to the first
  * character of the matched text and *end just after the last
  * character of the matched identifier.  In particular *end - *start
  * is the length of the field body, not including leading or trailing
  * whitespace.
  */
 static void
 next_field_w(const wchar_t **wp, const wchar_t **start,
     const wchar_t **end, wchar_t *sep)
 {
 	/* Skip leading whitespace to find start of field. */
 	while (**wp == L' ' || **wp == L'\t' || **wp == L'\n') {
 		(*wp)++;
 	}
 	*start = *wp;
 
 	/* Scan for the separator. */
 	while (**wp != L'\0' && **wp != L',' && **wp != L':' &&
 	    **wp != L'\n') {
 		(*wp)++;
 	}
 	*sep = **wp;
 
 	/* Trim trailing whitespace to locate end of field. */
 	*end = *wp - 1;
 	while (**end == L' ' || **end == L'\t' || **end == L'\n') {
 		(*end)--;
 	}
 	(*end)++;
 
 	/* Adjust scanner location. */
 	if (**wp != L'\0')
 		(*wp)++;
 }
 
 /*
  * Return true if the characters [start...end) are a prefix of 'test'.
  * This makes it easy to handle the obvious abbreviations: 'u' for 'user', etc.
  */
 static int
 prefix_w(const wchar_t *start, const wchar_t *end, const wchar_t *test)
 {
 	if (start == end)
 		return (0);
 
 	if (*start++ != *test++)
 		return (0);
 
 	while (start < end  &&  *start++ == *test++)
 		;
 
 	if (start < end)
 		return (0);
 
 	return (1);
 }
 
 /*
  * Parse a textual ACL.  This automatically recognizes and supports
  * extensions described above.  The 'type' argument is used to
  * indicate the type that should be used for any entries not
  * explicitly marked as "default:".
  */
 int
 archive_acl_parse_l(struct archive_acl *acl,
     const char *text, int default_type, struct archive_string_conv *sc)
 {
 	struct {
 		const char *start;
 		const char *end;
-	} field[6], name;
+	} field[4], name;
 
-	int numfields, fields, n, r, ret = ARCHIVE_OK;
+	int fields, n, r, ret = ARCHIVE_OK;
 	int type, tag, permset, id;
-	int offset;
 	char sep;
 
-	if (default_type == ARCHIVE_ENTRY_ACL_TYPE_NFS4)
-		numfields = 6;
-	else
-		numfields = 4;	
-
 	while (text != NULL  &&  *text != '\0') {
 		/*
 		 * Parse the fields out of the next entry,
 		 * advance 'text' to start of next entry.
 		 */
 		fields = 0;
 		do {
 			const char *start, *end;
 			next_field(&text, &start, &end, &sep);
-			if (fields < numfields) {
+			if (fields < 4) {
 				field[fields].start = start;
 				field[fields].end = end;
 			}
 			++fields;
 		} while (sep == ':');
 
 		/* Set remaining fields to blank. */
-		for (n = fields; n < numfields; ++n)
+		for (n = fields; n < 4; ++n)
 			field[n].start = field[n].end = NULL;
 
-		if (default_type != ARCHIVE_ENTRY_ACL_TYPE_NFS4) {
-			/* POSIX.1e ACLs */
-			/* Check for a numeric ID in field 1 or 3. */
-			id = -1;
-			isint(field[1].start, field[1].end, &id);
-			/* Field 3 is optional. */
-			if (id == -1 && fields > 3)
-				isint(field[3].start, field[3].end, &id);
+		/* Check for a numeric ID in field 1 or 3. */
+		id = -1;
+		isint(field[1].start, field[1].end, &id);
+		/* Field 3 is optional. */
+		if (id == -1 && fields > 3)
+			isint(field[3].start, field[3].end, &id);
 
-			/*
-			 * Solaris extension:  "defaultuser::rwx" is the
-			 * default ACL corresponding to "user::rwx", etc.
-			 */
-			if (field[0].end - field[0].start > 7
-			    && memcmp(field[0].start, "default", 7) == 0) {
-				type = ARCHIVE_ENTRY_ACL_TYPE_DEFAULT;
-				field[0].start += 7;
-			} else
-				type = default_type;
+		/*
+		 * Solaris extension:  "defaultuser::rwx" is the
+		 * default ACL corresponding to "user::rwx", etc.
+		 */
+		if (field[0].end - field[0].start > 7
+		    && memcmp(field[0].start, "default", 7) == 0) {
+			type = ARCHIVE_ENTRY_ACL_TYPE_DEFAULT;
+			field[0].start += 7;
+		} else
+			type = default_type;
 
-			name.start = name.end = NULL;
-			if (prefix_c(field[0].start, field[0].end, "user")) {
-				if (!ismode(field[2].start, field[2].end,
-				    &permset))
-					return (ARCHIVE_WARN);
-				if (id != -1 || field[1].start < field[1].end) {
-					tag = ARCHIVE_ENTRY_ACL_USER;
-					name = field[1];
-				} else
-					tag = ARCHIVE_ENTRY_ACL_USER_OBJ;
-			} else if (prefix_c(field[0].start, field[0].end,
-			    "group")) {
-				if (!ismode(field[2].start, field[2].end,
-				    &permset))
-					return (ARCHIVE_WARN);
-				if (id != -1 || field[1].start < field[1].end) {
-					tag = ARCHIVE_ENTRY_ACL_GROUP;
-					name = field[1];
-				} else
-					tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ;
-			} else if (prefix_c(field[0].start, field[0].end,
-			    "other")) {
-				if (fields == 2
-				    && field[1].start < field[1].end
-				    && ismode(field[1].start, field[1].end,
-				    &permset)) {
-					/* This is Solaris-style "other:rwx" */
-				} else if (fields == 3
-				    && field[1].start == field[1].end
-				    && field[2].start < field[2].end
-				    && ismode(field[2].start, field[2].end,
-				    &permset)) {
-					/* This is FreeBSD-style "other::rwx" */
-				} else
-					return (ARCHIVE_WARN);
-				tag = ARCHIVE_ENTRY_ACL_OTHER;
-			} else if (prefix_c(field[0].start, field[0].end,
-			    "mask")) {
-				if (fields == 2
-				    && field[1].start < field[1].end
-				    && ismode(field[1].start, field[1].end,
-				    &permset)) {
-					/* This is Solaris-style "mask:rwx" */
-				} else if (fields == 3
-				    && field[1].start == field[1].end
-				    && field[2].start < field[2].end
-				    && ismode(field[2].start, field[2].end,
-				    &permset)) {
-					/* This is FreeBSD-style "mask::rwx" */
-				} else
-					return (ARCHIVE_WARN);
-				tag = ARCHIVE_ENTRY_ACL_MASK;
-			} else
+		name.start = name.end = NULL;
+		if (prefix_c(field[0].start, field[0].end, "user")) {
+			if (!ismode(field[2].start, field[2].end, &permset))
 				return (ARCHIVE_WARN);
-		} else {
-			/* NFSv4 ACLs */
-			if (strncmp(field[0].start, "user",
-			    field[0].end - field[0].start) == 0)
+			if (id != -1 || field[1].start < field[1].end) {
 				tag = ARCHIVE_ENTRY_ACL_USER;
-			else if (strncmp(field[0].start, "group",
-			    field[0].end - field[0].start) == 0)
-				tag = ARCHIVE_ENTRY_ACL_GROUP;
-			else if (strncmp(field[0].start, "owner@",
-			    field[0].end - field[0].start) == 0)
+				name = field[1];
+			} else
 				tag = ARCHIVE_ENTRY_ACL_USER_OBJ;
-			else if (strncmp(field[0].start, "group@",
-			    field[0].end - field[0].start) == 0)
-				tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ;
-			else if (strncmp(field[0].start, "everyone@",
-			    field[0].end - field[0].start) == 0)
-				tag = ARCHIVE_ENTRY_ACL_EVERYONE;
-			else {
-				/* Unknown entry */
+		} else if (prefix_c(field[0].start, field[0].end, "group")) {
+			if (!ismode(field[2].start, field[2].end, &permset))
 				return (ARCHIVE_WARN);
-			}
-
-			permset = 0;
-			name.start = name.end = NULL;
-
-			if (tag == ARCHIVE_ENTRY_ACL_USER ||
-			    tag == ARCHIVE_ENTRY_ACL_GROUP) {
-				offset = 1;	
+			if (id != -1 || field[1].start < field[1].end) {
+				tag = ARCHIVE_ENTRY_ACL_GROUP;
 				name = field[1];
 			} else
-				offset = 0;
-			
-			if (parse_nfs4_perms(field[1 + offset].start, 
-			    field[1 + offset].end, &permset) != 0) {
-				/* NFS4 perms are invalid */
+				tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ;
+		} else if (prefix_c(field[0].start, field[0].end, "other")) {
+			if (fields == 2
+			    && field[1].start < field[1].end
+			    && ismode(field[1].start, field[1].end, &permset)) {
+				/* This is Solaris-style "other:rwx" */
+			} else if (fields == 3
+			    && field[1].start == field[1].end
+			    && field[2].start < field[2].end
+			    && ismode(field[2].start, field[2].end, &permset)) {
+				/* This is FreeBSD-style "other::rwx" */
+			} else
 				return (ARCHIVE_WARN);
-			}
-			if (parse_nfs4_flags(field[2 + offset].start,
-			    field[2 + offset].end, &permset) != 0) {
-				/* NFS4 flags are invalid */
+			tag = ARCHIVE_ENTRY_ACL_OTHER;
+		} else if (prefix_c(field[0].start, field[0].end, "mask")) {
+			if (fields == 2
+			    && field[1].start < field[1].end
+			    && ismode(field[1].start, field[1].end, &permset)) {
+				/* This is Solaris-style "mask:rwx" */
+			} else if (fields == 3
+			    && field[1].start == field[1].end
+			    && field[2].start < field[2].end
+			    && ismode(field[2].start, field[2].end, &permset)) {
+				/* This is FreeBSD-style "mask::rwx" */
+			} else
 				return (ARCHIVE_WARN);
-			}
-			if (strncmp(field[3 + offset].start, "allow",
-			    field[3 + offset].end - field[3 + offset].start)
-			    == 0)
-				type = ARCHIVE_ENTRY_ACL_TYPE_ALLOW; 
-			else if (strncmp(field[3 + offset].start, "deny",
-			    field[3 + offset].end - field[3 + offset].start)
-			    == 0)
-				type = ARCHIVE_ENTRY_ACL_TYPE_DENY;
-			else if (strncmp(field[3 + offset].start, "audit",
-			    field[3 + offset].end - field[3 + offset].start)
-			    == 0)
-				type = ARCHIVE_ENTRY_ACL_TYPE_AUDIT;
-			else if (strncmp(field[3 + offset].start, "alarm",
-			    field[3 + offset].end - field[3 + offset].start)
-			    == 0)
-				type = ARCHIVE_ENTRY_ACL_TYPE_ALARM;
-			else {
-				/* Unknown type */
-				return (ARCHIVE_WARN);
-			}
-			isint(field[4 + offset].start, field[4 + offset].end,
-			    &id);
-		}
+			tag = ARCHIVE_ENTRY_ACL_MASK;
+		} else
+			return (ARCHIVE_WARN);
 
 		/* Add entry to the internal list. */
 		r = archive_acl_add_entry_len_l(acl, type, permset,
 		    tag, id, name.start, name.end - name.start, sc);
 		if (r < ARCHIVE_WARN)
 			return (r);
 		if (r != ARCHIVE_OK)
 			ret = ARCHIVE_WARN;
 	}
 	return (ret);
 }
 
 /*
  * Parse a string to a positive decimal integer.  Returns true if
  * the string is non-empty and consists only of decimal digits,
  * false otherwise.
  */
 static int
 isint(const char *start, const char *end, int *result)
 {
 	int n = 0;
 	if (start >= end)
 		return (0);
 	while (start < end) {
 		if (*start < '0' || *start > '9')
 			return (0);
 		if (n > (INT_MAX / 10) ||
 		    (n == INT_MAX / 10 && (*start - '0') > INT_MAX % 10)) {
 			n = INT_MAX;
 		} else {
 			n *= 10;
 			n += *start - '0';
 		}
 		start++;
 	}
 	*result = n;
 	return (1);
 }
 
 /*
  * Parse a string as a mode field.  Returns true if
  * the string is non-empty and consists only of mode characters,
  * false otherwise.
  */
 static int
 ismode(const char *start, const char *end, int *permset)
 {
 	const char *p;
 
 	if (start >= end)
 		return (0);
 	p = start;
 	*permset = 0;
 	while (p < end) {
 		switch (*p++) {
 		case 'r': case 'R':
 			*permset |= ARCHIVE_ENTRY_ACL_READ;
 			break;
 		case 'w': case 'W':
 			*permset |= ARCHIVE_ENTRY_ACL_WRITE;
 			break;
 		case 'x': case 'X':
 			*permset |= ARCHIVE_ENTRY_ACL_EXECUTE;
 			break;
 		case '-':
 			break;
 		default:
 			return (0);
 		}
 	}
 	return (1);
-}
-
-/* Parse a string as a strict NFSv4 ACL permission field. */
-static int
-parse_nfs4_perms(const char *start, const char *end, int *permset)
-{
-	const char *p;
-	int pos;
-	const char *letter = "rwxpdDaARWcCos";
-	const int perms[14] = {
-	    ARCHIVE_ENTRY_ACL_READ_DATA,
-	    ARCHIVE_ENTRY_ACL_WRITE_DATA,
-	    ARCHIVE_ENTRY_ACL_EXECUTE,
-	    ARCHIVE_ENTRY_ACL_APPEND_DATA,
-	    ARCHIVE_ENTRY_ACL_DELETE,
-	    ARCHIVE_ENTRY_ACL_DELETE_CHILD,
-	    ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES,
-	    ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES,
-	    ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS,
-	    ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS,
-	    ARCHIVE_ENTRY_ACL_READ_ACL,
-	    ARCHIVE_ENTRY_ACL_WRITE_ACL,
-	    ARCHIVE_ENTRY_ACL_WRITE_OWNER,
-	    ARCHIVE_ENTRY_ACL_SYNCHRONIZE
-	};
-
-	if (start >= end)
-		return (0);
-	p = start;
-	pos = 0;
-	while (p < end && pos < 14) {
-		if (*p == letter[pos])
-			*permset |= perms[pos];
-		else if (*p != '-')
-			return (-1);
-		p = p + sizeof(char);
-		pos++;
-	}
-	return (0);
-}
-
-/* Parse a string as a strict NFSv4 ACL flags field. */
-static int
-parse_nfs4_flags(const char *start, const char *end, int *permset)
-{
-	const char *p;
-	int pos;
-	const char *letter = "fdinSFI";
-	const int perms[7] = {
-	    ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT,
-	    ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT,
-	    ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY,
-	    ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT,
-	    ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS,
-	    ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS,
-	    ARCHIVE_ENTRY_ACL_ENTRY_INHERITED
-	};
-
-	if (start >= end)
-		return (0);
-	p = start;
-	pos = 0;
-	while (p < end && pos < 7) {
-		if (*p == letter[pos])
-			*permset |= perms[pos];
-		else if (*p != '-')
-			return (-1);
-		p = p + sizeof(char);
-		pos++;
-	}
-	return (0);
 }
 
 /*
  * Match "[:whitespace:]*(.*)[:whitespace:]*[:,\n]".  *wp is updated
  * to point to just after the separator.  *start points to the first
  * character of the matched text and *end just after the last
  * character of the matched identifier.  In particular *end - *start
  * is the length of the field body, not including leading or trailing
  * whitespace.
  */
 static void
 next_field(const char **p, const char **start,
     const char **end, char *sep)
 {
 	/* Skip leading whitespace to find start of field. */
 	while (**p == ' ' || **p == '\t' || **p == '\n') {
 		(*p)++;
 	}
 	*start = *p;
 
 	/* Scan for the separator. */
 	while (**p != '\0' && **p != ',' && **p != ':' && **p != '\n') {
 		(*p)++;
 	}
 	*sep = **p;
 
 	/* Trim trailing whitespace to locate end of field. */
 	*end = *p - 1;
 	while (**end == ' ' || **end == '\t' || **end == '\n') {
 		(*end)--;
 	}
 	(*end)++;
 
 	/* Adjust scanner location. */
 	if (**p != '\0')
 		(*p)++;
 }
 
 /*
  * Return true if the characters [start...end) are a prefix of 'test'.
  * This makes it easy to handle the obvious abbreviations: 'u' for 'user', etc.
  */
 static int
 prefix_c(const char *start, const char *end, const char *test)
 {
 	if (start == end)
 		return (0);
 
 	if (*start++ != *test++)
 		return (0);
 
 	while (start < end  &&  *start++ == *test++)
 		;
 
 	if (start < end)
 		return (0);
 
 	return (1);
 }
Index: projects/clang390-import/contrib/libarchive/libarchive/archive_entry.h
===================================================================
--- projects/clang390-import/contrib/libarchive/libarchive/archive_entry.h	(revision 305016)
+++ projects/clang390-import/contrib/libarchive/libarchive/archive_entry.h	(revision 305017)
@@ -1,643 +1,642 @@
 /*-
  * Copyright (c) 2003-2008 Tim Kientzle
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef ARCHIVE_ENTRY_H_INCLUDED
 #define	ARCHIVE_ENTRY_H_INCLUDED
 
 /* Note: Compiler will complain if this does not match archive.h! */
 #define	ARCHIVE_VERSION_NUMBER 3002001
 
 /*
  * Note: archive_entry.h is for use outside of libarchive; the
  * configuration headers (config.h, archive_platform.h, etc.) are
  * purely internal.  Do NOT use HAVE_XXX configuration macros to
  * control the behavior of this header!  If you must conditionalize,
  * use predefined compiler and/or platform macros.
  */
 
 #include <sys/types.h>
 #include <stddef.h>  /* for wchar_t */
 #include <time.h>
 
 #if defined(_WIN32) && !defined(__CYGWIN__)
 #include <windows.h>
 #endif
 
 /* Get a suitable 64-bit integer type. */
 #if !defined(__LA_INT64_T_DEFINED)
 # if ARCHIVE_VERSION_NUMBER < 4000000
 #define __LA_INT64_T la_int64_t
 # endif
 #define __LA_INT64_T_DEFINED
 # if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__WATCOMC__)
 typedef __int64 la_int64_t;
 # else
 #include <unistd.h>
 #  if defined(_SCO_DS) || defined(__osf__)
 typedef long long la_int64_t;
 #  else
 typedef int64_t la_int64_t;
 #  endif
 # endif
 #endif
 
 /* Get a suitable definition for mode_t */
 #if ARCHIVE_VERSION_NUMBER >= 3999000
 /* Switch to plain 'int' for libarchive 4.0.  It's less broken than 'mode_t' */
 # define	__LA_MODE_T	int
 #elif defined(_WIN32) && !defined(__CYGWIN__) && !defined(__BORLANDC__) && !defined(__WATCOMC__)
 # define	__LA_MODE_T	unsigned short
 #else
 # define	__LA_MODE_T	mode_t
 #endif
 
 /* Large file support for Android */
 #ifdef __ANDROID__
 #include "android_lf.h"
 #endif
 
 /*
  * On Windows, define LIBARCHIVE_STATIC if you're building or using a
  * .lib.  The default here assumes you're building a DLL.  Only
  * libarchive source should ever define __LIBARCHIVE_BUILD.
  */
 #if ((defined __WIN32__) || (defined _WIN32) || defined(__CYGWIN__)) && (!defined LIBARCHIVE_STATIC)
 # ifdef __LIBARCHIVE_BUILD
 #  ifdef __GNUC__
 #   define __LA_DECL	__attribute__((dllexport)) extern
 #  else
 #   define __LA_DECL	__declspec(dllexport)
 #  endif
 # else
 #  ifdef __GNUC__
 #   define __LA_DECL
 #  else
 #   define __LA_DECL	__declspec(dllimport)
 #  endif
 # endif
 #else
 /* Static libraries on all platforms and shared libraries on non-Windows. */
 # define __LA_DECL
 #endif
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /*
  * Description of an archive entry.
  *
  * You can think of this as "struct stat" with some text fields added in.
  *
  * TODO: Add "comment", "charset", and possibly other entries that are
  * supported by "pax interchange" format.  However, GNU, ustar, cpio,
  * and other variants don't support these features, so they're not an
  * excruciatingly high priority right now.
  *
  * TODO: "pax interchange" format allows essentially arbitrary
  * key/value attributes to be attached to any entry.  Supporting
  * such extensions may make this library useful for special
  * applications (e.g., a package manager could attach special
  * package-management attributes to each entry).
  */
 struct archive;
 struct archive_entry;
 
 /*
  * File-type constants.  These are returned from archive_entry_filetype()
  * and passed to archive_entry_set_filetype().
  *
  * These values match S_XXX defines on every platform I've checked,
  * including Windows, AIX, Linux, Solaris, and BSD.  They're
  * (re)defined here because platforms generally don't define the ones
  * they don't support.  For example, Windows doesn't define S_IFLNK or
  * S_IFBLK.  Instead of having a mass of conditional logic and system
  * checks to define any S_XXX values that aren't supported locally,
  * I've just defined a new set of such constants so that
  * libarchive-based applications can manipulate and identify archive
  * entries properly even if the hosting platform can't store them on
  * disk.
  *
  * These values are also used directly within some portable formats,
  * such as cpio.  If you find a platform that varies from these, the
  * correct solution is to leave these alone and translate from these
  * portable values to platform-native values when entries are read from
  * or written to disk.
  */
 /*
  * In libarchive 4.0, we can drop the casts here.
  * They're needed to work around Borland C's broken mode_t.
  */
 #define AE_IFMT		((__LA_MODE_T)0170000)
 #define AE_IFREG	((__LA_MODE_T)0100000)
 #define AE_IFLNK	((__LA_MODE_T)0120000)
 #define AE_IFSOCK	((__LA_MODE_T)0140000)
 #define AE_IFCHR	((__LA_MODE_T)0020000)
 #define AE_IFBLK	((__LA_MODE_T)0060000)
 #define AE_IFDIR	((__LA_MODE_T)0040000)
 #define AE_IFIFO	((__LA_MODE_T)0010000)
 
 /*
  * Basic object manipulation
  */
 
 __LA_DECL struct archive_entry	*archive_entry_clear(struct archive_entry *);
 /* The 'clone' function does a deep copy; all of the strings are copied too. */
 __LA_DECL struct archive_entry	*archive_entry_clone(struct archive_entry *);
 __LA_DECL void			 archive_entry_free(struct archive_entry *);
 __LA_DECL struct archive_entry	*archive_entry_new(void);
 
 /*
  * This form of archive_entry_new2() will pull character-set
  * conversion information from the specified archive handle.  The
  * older archive_entry_new(void) form is equivalent to calling
  * archive_entry_new2(NULL) and will result in the use of an internal
  * default character-set conversion.
  */
 __LA_DECL struct archive_entry	*archive_entry_new2(struct archive *);
 
 /*
  * Retrieve fields from an archive_entry.
  *
  * There are a number of implicit conversions among these fields.  For
  * example, if a regular string field is set and you read the _w wide
  * character field, the entry will implicitly convert narrow-to-wide
  * using the current locale.  Similarly, dev values are automatically
  * updated when you write devmajor or devminor and vice versa.
  *
  * In addition, fields can be "set" or "unset."  Unset string fields
  * return NULL, non-string fields have _is_set() functions to test
  * whether they've been set.  You can "unset" a string field by
  * assigning NULL; non-string fields have _unset() functions to
  * unset them.
  *
  * Note: There is one ambiguity in the above; string fields will
  * also return NULL when implicit character set conversions fail.
  * This is usually what you want.
  */
 __LA_DECL time_t	 archive_entry_atime(struct archive_entry *);
 __LA_DECL long		 archive_entry_atime_nsec(struct archive_entry *);
 __LA_DECL int		 archive_entry_atime_is_set(struct archive_entry *);
 __LA_DECL time_t	 archive_entry_birthtime(struct archive_entry *);
 __LA_DECL long		 archive_entry_birthtime_nsec(struct archive_entry *);
 __LA_DECL int		 archive_entry_birthtime_is_set(struct archive_entry *);
 __LA_DECL time_t	 archive_entry_ctime(struct archive_entry *);
 __LA_DECL long		 archive_entry_ctime_nsec(struct archive_entry *);
 __LA_DECL int		 archive_entry_ctime_is_set(struct archive_entry *);
 __LA_DECL dev_t		 archive_entry_dev(struct archive_entry *);
 __LA_DECL int		 archive_entry_dev_is_set(struct archive_entry *);
 __LA_DECL dev_t		 archive_entry_devmajor(struct archive_entry *);
 __LA_DECL dev_t		 archive_entry_devminor(struct archive_entry *);
 __LA_DECL __LA_MODE_T	 archive_entry_filetype(struct archive_entry *);
 __LA_DECL void		 archive_entry_fflags(struct archive_entry *,
 			    unsigned long * /* set */,
 			    unsigned long * /* clear */);
 __LA_DECL const char	*archive_entry_fflags_text(struct archive_entry *);
 __LA_DECL la_int64_t	 archive_entry_gid(struct archive_entry *);
 __LA_DECL const char	*archive_entry_gname(struct archive_entry *);
 __LA_DECL const char	*archive_entry_gname_utf8(struct archive_entry *);
 __LA_DECL const wchar_t	*archive_entry_gname_w(struct archive_entry *);
 __LA_DECL const char	*archive_entry_hardlink(struct archive_entry *);
 __LA_DECL const char	*archive_entry_hardlink_utf8(struct archive_entry *);
 __LA_DECL const wchar_t	*archive_entry_hardlink_w(struct archive_entry *);
 __LA_DECL la_int64_t	 archive_entry_ino(struct archive_entry *);
 __LA_DECL la_int64_t	 archive_entry_ino64(struct archive_entry *);
 __LA_DECL int		 archive_entry_ino_is_set(struct archive_entry *);
 __LA_DECL __LA_MODE_T	 archive_entry_mode(struct archive_entry *);
 __LA_DECL time_t	 archive_entry_mtime(struct archive_entry *);
 __LA_DECL long		 archive_entry_mtime_nsec(struct archive_entry *);
 __LA_DECL int		 archive_entry_mtime_is_set(struct archive_entry *);
 __LA_DECL unsigned int	 archive_entry_nlink(struct archive_entry *);
 __LA_DECL const char	*archive_entry_pathname(struct archive_entry *);
 __LA_DECL const char	*archive_entry_pathname_utf8(struct archive_entry *);
 __LA_DECL const wchar_t	*archive_entry_pathname_w(struct archive_entry *);
 __LA_DECL __LA_MODE_T	 archive_entry_perm(struct archive_entry *);
 __LA_DECL dev_t		 archive_entry_rdev(struct archive_entry *);
 __LA_DECL dev_t		 archive_entry_rdevmajor(struct archive_entry *);
 __LA_DECL dev_t		 archive_entry_rdevminor(struct archive_entry *);
 __LA_DECL const char	*archive_entry_sourcepath(struct archive_entry *);
 __LA_DECL const wchar_t	*archive_entry_sourcepath_w(struct archive_entry *);
 __LA_DECL la_int64_t	 archive_entry_size(struct archive_entry *);
 __LA_DECL int		 archive_entry_size_is_set(struct archive_entry *);
 __LA_DECL const char	*archive_entry_strmode(struct archive_entry *);
 __LA_DECL const char	*archive_entry_symlink(struct archive_entry *);
 __LA_DECL const char	*archive_entry_symlink_utf8(struct archive_entry *);
 __LA_DECL const wchar_t	*archive_entry_symlink_w(struct archive_entry *);
 __LA_DECL la_int64_t	 archive_entry_uid(struct archive_entry *);
 __LA_DECL const char	*archive_entry_uname(struct archive_entry *);
 __LA_DECL const char	*archive_entry_uname_utf8(struct archive_entry *);
 __LA_DECL const wchar_t	*archive_entry_uname_w(struct archive_entry *);
 __LA_DECL int archive_entry_is_data_encrypted(struct archive_entry *);
 __LA_DECL int archive_entry_is_metadata_encrypted(struct archive_entry *);
 __LA_DECL int archive_entry_is_encrypted(struct archive_entry *);
 
 /*
  * Set fields in an archive_entry.
  *
  * Note: Before libarchive 2.4, there were 'set' and 'copy' versions
  * of the string setters.  'copy' copied the actual string, 'set' just
  * stored the pointer.  In libarchive 2.4 and later, strings are
  * always copied.
  */
 
 __LA_DECL void	archive_entry_set_atime(struct archive_entry *, time_t, long);
 __LA_DECL void  archive_entry_unset_atime(struct archive_entry *);
 #if defined(_WIN32) && !defined(__CYGWIN__)
 __LA_DECL void archive_entry_copy_bhfi(struct archive_entry *, BY_HANDLE_FILE_INFORMATION *);
 #endif
 __LA_DECL void	archive_entry_set_birthtime(struct archive_entry *, time_t, long);
 __LA_DECL void  archive_entry_unset_birthtime(struct archive_entry *);
 __LA_DECL void	archive_entry_set_ctime(struct archive_entry *, time_t, long);
 __LA_DECL void  archive_entry_unset_ctime(struct archive_entry *);
 __LA_DECL void	archive_entry_set_dev(struct archive_entry *, dev_t);
 __LA_DECL void	archive_entry_set_devmajor(struct archive_entry *, dev_t);
 __LA_DECL void	archive_entry_set_devminor(struct archive_entry *, dev_t);
 __LA_DECL void	archive_entry_set_filetype(struct archive_entry *, unsigned int);
 __LA_DECL void	archive_entry_set_fflags(struct archive_entry *,
 	    unsigned long /* set */, unsigned long /* clear */);
 /* Returns pointer to start of first invalid token, or NULL if none. */
 /* Note that all recognized tokens are processed, regardless. */
 __LA_DECL const char *archive_entry_copy_fflags_text(struct archive_entry *,
 	    const char *);
 __LA_DECL const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *,
 	    const wchar_t *);
 __LA_DECL void	archive_entry_set_gid(struct archive_entry *, la_int64_t);
 __LA_DECL void	archive_entry_set_gname(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_set_gname_utf8(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_copy_gname(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *);
 __LA_DECL int	archive_entry_update_gname_utf8(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_set_hardlink(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_set_hardlink_utf8(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_copy_hardlink(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *);
 __LA_DECL int	archive_entry_update_hardlink_utf8(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_set_ino(struct archive_entry *, la_int64_t);
 __LA_DECL void	archive_entry_set_ino64(struct archive_entry *, la_int64_t);
 __LA_DECL void	archive_entry_set_link(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_set_link_utf8(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_copy_link(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_copy_link_w(struct archive_entry *, const wchar_t *);
 __LA_DECL int	archive_entry_update_link_utf8(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_set_mode(struct archive_entry *, __LA_MODE_T);
 __LA_DECL void	archive_entry_set_mtime(struct archive_entry *, time_t, long);
 __LA_DECL void  archive_entry_unset_mtime(struct archive_entry *);
 __LA_DECL void	archive_entry_set_nlink(struct archive_entry *, unsigned int);
 __LA_DECL void	archive_entry_set_pathname(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_set_pathname_utf8(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_copy_pathname(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *);
 __LA_DECL int	archive_entry_update_pathname_utf8(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_set_perm(struct archive_entry *, __LA_MODE_T);
 __LA_DECL void	archive_entry_set_rdev(struct archive_entry *, dev_t);
 __LA_DECL void	archive_entry_set_rdevmajor(struct archive_entry *, dev_t);
 __LA_DECL void	archive_entry_set_rdevminor(struct archive_entry *, dev_t);
 __LA_DECL void	archive_entry_set_size(struct archive_entry *, la_int64_t);
 __LA_DECL void	archive_entry_unset_size(struct archive_entry *);
 __LA_DECL void	archive_entry_copy_sourcepath(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_copy_sourcepath_w(struct archive_entry *, const wchar_t *);
 __LA_DECL void	archive_entry_set_symlink(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_set_symlink_utf8(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_copy_symlink(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *);
 __LA_DECL int	archive_entry_update_symlink_utf8(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_set_uid(struct archive_entry *, la_int64_t);
 __LA_DECL void	archive_entry_set_uname(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_set_uname_utf8(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_copy_uname(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *);
 __LA_DECL int	archive_entry_update_uname_utf8(struct archive_entry *, const char *);
 __LA_DECL void	archive_entry_set_is_data_encrypted(struct archive_entry *, char is_encrypted);
 __LA_DECL void	archive_entry_set_is_metadata_encrypted(struct archive_entry *, char is_encrypted);
 /*
  * Routines to bulk copy fields to/from a platform-native "struct
  * stat."  Libarchive used to just store a struct stat inside of each
  * archive_entry object, but this created issues when trying to
  * manipulate archives on systems different than the ones they were
  * created on.
  *
  * TODO: On Linux and other LFS systems, provide both stat32 and
  * stat64 versions of these functions and all of the macro glue so
  * that archive_entry_stat is magically defined to
  * archive_entry_stat32 or archive_entry_stat64 as appropriate.
  */
 __LA_DECL const struct stat	*archive_entry_stat(struct archive_entry *);
 __LA_DECL void	archive_entry_copy_stat(struct archive_entry *, const struct stat *);
 
 /*
  * Storage for Mac OS-specific AppleDouble metadata information.
  * Apple-format tar files store a separate binary blob containing
  * encoded metadata with ACL, extended attributes, etc.
  * This provides a place to store that blob.
  */
 
 __LA_DECL const void * archive_entry_mac_metadata(struct archive_entry *, size_t *);
 __LA_DECL void archive_entry_copy_mac_metadata(struct archive_entry *, const void *, size_t);
 
 /*
  * ACL routines.  This used to simply store and return text-format ACL
  * strings, but that proved insufficient for a number of reasons:
  *   = clients need control over uname/uid and gname/gid mappings
  *   = there are many different ACL text formats
  *   = would like to be able to read/convert archives containing ACLs
  *     on platforms that lack ACL libraries
  *
  *  This last point, in particular, forces me to implement a reasonably
  *  complete set of ACL support routines.
  */
 
 /*
  * Permission bits.
  */
 #define	ARCHIVE_ENTRY_ACL_EXECUTE             0x00000001
 #define	ARCHIVE_ENTRY_ACL_WRITE               0x00000002
 #define	ARCHIVE_ENTRY_ACL_READ                0x00000004
 #define	ARCHIVE_ENTRY_ACL_READ_DATA           0x00000008
 #define	ARCHIVE_ENTRY_ACL_LIST_DIRECTORY      0x00000008
 #define	ARCHIVE_ENTRY_ACL_WRITE_DATA          0x00000010
 #define	ARCHIVE_ENTRY_ACL_ADD_FILE            0x00000010
 #define	ARCHIVE_ENTRY_ACL_APPEND_DATA         0x00000020
 #define	ARCHIVE_ENTRY_ACL_ADD_SUBDIRECTORY    0x00000020
 #define	ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS    0x00000040
 #define	ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS   0x00000080
 #define	ARCHIVE_ENTRY_ACL_DELETE_CHILD        0x00000100
 #define	ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES     0x00000200
 #define	ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES    0x00000400
 #define	ARCHIVE_ENTRY_ACL_DELETE              0x00000800
 #define	ARCHIVE_ENTRY_ACL_READ_ACL            0x00001000
 #define	ARCHIVE_ENTRY_ACL_WRITE_ACL           0x00002000
 #define	ARCHIVE_ENTRY_ACL_WRITE_OWNER         0x00004000
 #define	ARCHIVE_ENTRY_ACL_SYNCHRONIZE         0x00008000
 
 #define	ARCHIVE_ENTRY_ACL_PERMS_POSIX1E			\
 	(ARCHIVE_ENTRY_ACL_EXECUTE			\
 	    | ARCHIVE_ENTRY_ACL_WRITE			\
 	    | ARCHIVE_ENTRY_ACL_READ)
 
 #define ARCHIVE_ENTRY_ACL_PERMS_NFS4			\
 	(ARCHIVE_ENTRY_ACL_EXECUTE			\
 	    | ARCHIVE_ENTRY_ACL_READ_DATA		\
 	    | ARCHIVE_ENTRY_ACL_LIST_DIRECTORY 		\
 	    | ARCHIVE_ENTRY_ACL_WRITE_DATA		\
 	    | ARCHIVE_ENTRY_ACL_ADD_FILE		\
 	    | ARCHIVE_ENTRY_ACL_APPEND_DATA		\
 	    | ARCHIVE_ENTRY_ACL_ADD_SUBDIRECTORY	\
 	    | ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS	\
 	    | ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS	\
 	    | ARCHIVE_ENTRY_ACL_DELETE_CHILD		\
 	    | ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES		\
 	    | ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES	\
 	    | ARCHIVE_ENTRY_ACL_DELETE			\
 	    | ARCHIVE_ENTRY_ACL_READ_ACL		\
 	    | ARCHIVE_ENTRY_ACL_WRITE_ACL		\
 	    | ARCHIVE_ENTRY_ACL_WRITE_OWNER		\
 	    | ARCHIVE_ENTRY_ACL_SYNCHRONIZE)
 
 /*
  * Inheritance values (NFS4 ACLs only); included in permset.
  */
 #define	ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT                0x02000000
 #define	ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT           0x04000000
 #define	ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT        0x08000000
 #define	ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY                0x10000000
 #define	ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS           0x20000000
 #define	ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS               0x40000000
-#define ARCHIVE_ENTRY_ACL_ENTRY_INHERITED                   0x80000000
 
 #define	ARCHIVE_ENTRY_ACL_INHERITANCE_NFS4			\
 	(ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT			\
 	    | ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT		\
 	    | ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT	\
 	    | ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY		\
 	    | ARCHIVE_ENTRY_ACL_ENTRY_SUCCESSFUL_ACCESS		\
 	    | ARCHIVE_ENTRY_ACL_ENTRY_FAILED_ACCESS)
 
 /* We need to be able to specify combinations of these. */
 #define	ARCHIVE_ENTRY_ACL_TYPE_ACCESS	256  /* POSIX.1e only */
 #define	ARCHIVE_ENTRY_ACL_TYPE_DEFAULT	512  /* POSIX.1e only */
 #define	ARCHIVE_ENTRY_ACL_TYPE_ALLOW	1024 /* NFS4 only */
 #define	ARCHIVE_ENTRY_ACL_TYPE_DENY	2048 /* NFS4 only */
 #define	ARCHIVE_ENTRY_ACL_TYPE_AUDIT	4096 /* NFS4 only */
 #define	ARCHIVE_ENTRY_ACL_TYPE_ALARM	8192 /* NFS4 only */
 #define	ARCHIVE_ENTRY_ACL_TYPE_POSIX1E	(ARCHIVE_ENTRY_ACL_TYPE_ACCESS \
 	    | ARCHIVE_ENTRY_ACL_TYPE_DEFAULT)
 #define	ARCHIVE_ENTRY_ACL_TYPE_NFS4	(ARCHIVE_ENTRY_ACL_TYPE_ALLOW \
 	    | ARCHIVE_ENTRY_ACL_TYPE_DENY \
 	    | ARCHIVE_ENTRY_ACL_TYPE_AUDIT \
 	    | ARCHIVE_ENTRY_ACL_TYPE_ALARM)
 
 /* Tag values mimic POSIX.1e */
 #define	ARCHIVE_ENTRY_ACL_USER		10001	/* Specified user. */
 #define	ARCHIVE_ENTRY_ACL_USER_OBJ 	10002	/* User who owns the file. */
 #define	ARCHIVE_ENTRY_ACL_GROUP		10003	/* Specified group. */
 #define	ARCHIVE_ENTRY_ACL_GROUP_OBJ	10004	/* Group who owns the file. */
 #define	ARCHIVE_ENTRY_ACL_MASK		10005	/* Modify group access (POSIX.1e only) */
 #define	ARCHIVE_ENTRY_ACL_OTHER		10006	/* Public (POSIX.1e only) */
 #define	ARCHIVE_ENTRY_ACL_EVERYONE	10107   /* Everyone (NFS4 only) */
 
 /*
  * Set the ACL by clearing it and adding entries one at a time.
  * Unlike the POSIX.1e ACL routines, you must specify the type
  * (access/default) for each entry.  Internally, the ACL data is just
  * a soup of entries.  API calls here allow you to retrieve just the
  * entries of interest.  This design (which goes against the spirit of
  * POSIX.1e) is useful for handling archive formats that combine
  * default and access information in a single ACL list.
  */
 __LA_DECL void	 archive_entry_acl_clear(struct archive_entry *);
 __LA_DECL int	 archive_entry_acl_add_entry(struct archive_entry *,
 	    int /* type */, int /* permset */, int /* tag */,
 	    int /* qual */, const char * /* name */);
 __LA_DECL int	 archive_entry_acl_add_entry_w(struct archive_entry *,
 	    int /* type */, int /* permset */, int /* tag */,
 	    int /* qual */, const wchar_t * /* name */);
 
 /*
  * To retrieve the ACL, first "reset", then repeatedly ask for the
  * "next" entry.  The want_type parameter allows you to request only
  * certain types of entries.
  */
 __LA_DECL int	 archive_entry_acl_reset(struct archive_entry *, int /* want_type */);
 __LA_DECL int	 archive_entry_acl_next(struct archive_entry *, int /* want_type */,
 	    int * /* type */, int * /* permset */, int * /* tag */,
 	    int * /* qual */, const char ** /* name */);
 __LA_DECL int	 archive_entry_acl_next_w(struct archive_entry *, int /* want_type */,
 	    int * /* type */, int * /* permset */, int * /* tag */,
 	    int * /* qual */, const wchar_t ** /* name */);
 
 /*
  * Construct a text-format ACL.  The flags argument is a bitmask that
  * can include any of the following:
  *
  * ARCHIVE_ENTRY_ACL_TYPE_ACCESS - Include POSIX.1e "access" entries.
  * ARCHIVE_ENTRY_ACL_TYPE_DEFAULT - Include POSIX.1e "default" entries.
  * ARCHIVE_ENTRY_ACL_TYPE_NFS4 - Include NFS4 entries.
  * ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID - Include extra numeric ID field in
  *    each ACL entry.  ('star' introduced this for POSIX.1e, this flag
  *    also applies to NFS4.)
  * ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT - Include "default:" before each
  *    default ACL entry, as used in old Solaris ACLs.
  */
-#define	ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID	16384
-#define	ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT	32768
+#define	ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID	1024
+#define	ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT	2048
 __LA_DECL const wchar_t	*archive_entry_acl_text_w(struct archive_entry *,
 		    int /* flags */);
 __LA_DECL const char *archive_entry_acl_text(struct archive_entry *,
 		    int /* flags */);
 
 /* Return a count of entries matching 'want_type' */
 __LA_DECL int	 archive_entry_acl_count(struct archive_entry *, int /* want_type */);
 
 /* Return an opaque ACL object. */
 /* There's not yet anything clients can actually do with this... */
 struct archive_acl;
 __LA_DECL struct archive_acl *archive_entry_acl(struct archive_entry *);
 
 /*
  * extended attributes
  */
 
 __LA_DECL void	 archive_entry_xattr_clear(struct archive_entry *);
 __LA_DECL void	 archive_entry_xattr_add_entry(struct archive_entry *,
 	    const char * /* name */, const void * /* value */,
 	    size_t /* size */);
 
 /*
  * To retrieve the xattr list, first "reset", then repeatedly ask for the
  * "next" entry.
  */
 
 __LA_DECL int	archive_entry_xattr_count(struct archive_entry *);
 __LA_DECL int	archive_entry_xattr_reset(struct archive_entry *);
 __LA_DECL int	archive_entry_xattr_next(struct archive_entry *,
 	    const char ** /* name */, const void ** /* value */, size_t *);
 
 /*
  * sparse
  */
 
 __LA_DECL void	 archive_entry_sparse_clear(struct archive_entry *);
 __LA_DECL void	 archive_entry_sparse_add_entry(struct archive_entry *,
 	    la_int64_t /* offset */, la_int64_t /* length */);
 
 /*
  * To retrieve the xattr list, first "reset", then repeatedly ask for the
  * "next" entry.
  */
 
 __LA_DECL int	archive_entry_sparse_count(struct archive_entry *);
 __LA_DECL int	archive_entry_sparse_reset(struct archive_entry *);
 __LA_DECL int	archive_entry_sparse_next(struct archive_entry *,
 	    la_int64_t * /* offset */, la_int64_t * /* length */);
 
 /*
  * Utility to match up hardlinks.
  *
  * The 'struct archive_entry_linkresolver' is a cache of archive entries
  * for files with multiple links.  Here's how to use it:
  *   1. Create a lookup object with archive_entry_linkresolver_new()
  *   2. Tell it the archive format you're using.
  *   3. Hand each archive_entry to archive_entry_linkify().
  *      That function will return 0, 1, or 2 entries that should
  *      be written.
  *   4. Call archive_entry_linkify(resolver, NULL) until
  *      no more entries are returned.
  *   5. Call archive_entry_linkresolver_free(resolver) to free resources.
  *
  * The entries returned have their hardlink and size fields updated
  * appropriately.  If an entry is passed in that does not refer to
  * a file with multiple links, it is returned unchanged.  The intention
  * is that you should be able to simply filter all entries through
  * this machine.
  *
  * To make things more efficient, be sure that each entry has a valid
  * nlinks value.  The hardlink cache uses this to track when all links
  * have been found.  If the nlinks value is zero, it will keep every
  * name in the cache indefinitely, which can use a lot of memory.
  *
  * Note that archive_entry_size() is reset to zero if the file
  * body should not be written to the archive.  Pay attention!
  */
 struct archive_entry_linkresolver;
 
 /*
  * There are three different strategies for marking hardlinks.
  * The descriptions below name them after the best-known
  * formats that rely on each strategy:
  *
  * "Old cpio" is the simplest, it always returns any entry unmodified.
  *    As far as I know, only cpio formats use this.  Old cpio archives
  *    store every link with the full body; the onus is on the dearchiver
  *    to detect and properly link the files as they are restored.
  * "tar" is also pretty simple; it caches a copy the first time it sees
  *    any link.  Subsequent appearances are modified to be hardlink
  *    references to the first one without any body.  Used by all tar
  *    formats, although the newest tar formats permit the "old cpio" strategy
  *    as well.  This strategy is very simple for the dearchiver,
  *    and reasonably straightforward for the archiver.
  * "new cpio" is trickier.  It stores the body only with the last
  *    occurrence.  The complication is that we might not
  *    see every link to a particular file in a single session, so
  *    there's no easy way to know when we've seen the last occurrence.
  *    The solution here is to queue one link until we see the next.
  *    At the end of the session, you can enumerate any remaining
  *    entries by calling archive_entry_linkify(NULL) and store those
  *    bodies.  If you have a file with three links l1, l2, and l3,
  *    you'll get the following behavior if you see all three links:
  *           linkify(l1) => NULL   (the resolver stores l1 internally)
  *           linkify(l2) => l1     (resolver stores l2, you write l1)
  *           linkify(l3) => l2, l3 (all links seen, you can write both).
  *    If you only see l1 and l2, you'll get this behavior:
  *           linkify(l1) => NULL
  *           linkify(l2) => l1
  *           linkify(NULL) => l2   (at end, you retrieve remaining links)
  *    As the name suggests, this strategy is used by newer cpio variants.
  *    It's noticeably more complex for the archiver, slightly more complex
  *    for the dearchiver than the tar strategy, but makes it straightforward
  *    to restore a file using any link by simply continuing to scan until
  *    you see a link that is stored with a body.  In contrast, the tar
  *    strategy requires you to rescan the archive from the beginning to
  *    correctly extract an arbitrary link.
  */
 
 __LA_DECL struct archive_entry_linkresolver *archive_entry_linkresolver_new(void);
 __LA_DECL void archive_entry_linkresolver_set_strategy(
 	struct archive_entry_linkresolver *, int /* format_code */);
 __LA_DECL void archive_entry_linkresolver_free(struct archive_entry_linkresolver *);
 __LA_DECL void archive_entry_linkify(struct archive_entry_linkresolver *,
     struct archive_entry **, struct archive_entry **);
 __LA_DECL struct archive_entry *archive_entry_partial_links(
     struct archive_entry_linkresolver *res, unsigned int *links);
 
 #ifdef __cplusplus
 }
 #endif
 
 /* This is meaningless outside of this header. */
 #undef __LA_DECL
 
 #endif /* !ARCHIVE_ENTRY_H_INCLUDED */
Index: projects/clang390-import/contrib/libarchive/libarchive/archive_read_disk_entry_from_file.c
===================================================================
--- projects/clang390-import/contrib/libarchive/libarchive/archive_read_disk_entry_from_file.c	(revision 305016)
+++ projects/clang390-import/contrib/libarchive/libarchive/archive_read_disk_entry_from_file.c	(revision 305017)
@@ -1,1349 +1,1264 @@
 /*-
  * Copyright (c) 2003-2009 Tim Kientzle
  * Copyright (c) 2010-2012 Michihiro NAKAJIMA
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "archive_platform.h"
 __FBSDID("$FreeBSD$");
 
 /* This is the tree-walking code for POSIX systems. */
 #if !defined(_WIN32) || defined(__CYGWIN__)
 
 #ifdef HAVE_SYS_TYPES_H
 /* Mac OSX requires sys/types.h before sys/acl.h. */
 #include <sys/types.h>
 #endif
 #ifdef HAVE_SYS_ACL_H
 #include <sys/acl.h>
 #endif
 #ifdef HAVE_SYS_EXTATTR_H
 #include <sys/extattr.h>
 #endif
 #ifdef HAVE_SYS_IOCTL_H
 #include <sys/ioctl.h>
 #endif
 #ifdef HAVE_SYS_PARAM_H
 #include <sys/param.h>
 #endif
 #ifdef HAVE_SYS_STAT_H
 #include <sys/stat.h>
 #endif
 #if defined(HAVE_SYS_XATTR_H)
 #include <sys/xattr.h>
 #elif defined(HAVE_ATTR_XATTR_H)
 #include <attr/xattr.h>
 #endif
 #ifdef HAVE_SYS_EA_H
 #include <sys/ea.h>
 #endif
 #ifdef HAVE_ACL_LIBACL_H
 #include <acl/libacl.h>
 #endif
 #ifdef HAVE_COPYFILE_H
 #include <copyfile.h>
 #endif
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 #ifdef HAVE_FCNTL_H
 #include <fcntl.h>
 #endif
 #ifdef HAVE_LIMITS_H
 #include <limits.h>
 #endif
 #ifdef HAVE_LINUX_TYPES_H
 #include <linux/types.h>
 #endif
 #ifdef HAVE_LINUX_FIEMAP_H
 #include <linux/fiemap.h>
 #endif
 #ifdef HAVE_LINUX_FS_H
 #include <linux/fs.h>
 #endif
 /*
  * Some Linux distributions have both linux/ext2_fs.h and ext2fs/ext2_fs.h.
  * As the include guards don't agree, the order of include is important.
  */
 #ifdef HAVE_LINUX_EXT2_FS_H
 #include <linux/ext2_fs.h>      /* for Linux file flags */
 #endif
 #if defined(HAVE_EXT2FS_EXT2_FS_H) && !defined(__CYGWIN__)
 #include <ext2fs/ext2_fs.h>     /* Linux file flags, broken on Cygwin */
 #endif
 #ifdef HAVE_PATHS_H
 #include <paths.h>
 #endif
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 
 #include "archive.h"
 #include "archive_entry.h"
 #include "archive_private.h"
 #include "archive_read_disk_private.h"
 
 #ifndef O_CLOEXEC
 #define O_CLOEXEC	0
 #endif
 
 /*
  * Linux and FreeBSD plug this obvious hole in POSIX.1e in
  * different ways.
  */
 #if HAVE_ACL_GET_PERM
 #define	ACL_GET_PERM acl_get_perm
 #elif HAVE_ACL_GET_PERM_NP
 #define	ACL_GET_PERM acl_get_perm_np
 #endif
 
 static int setup_acls(struct archive_read_disk *,
     struct archive_entry *, int *fd);
 static int setup_mac_metadata(struct archive_read_disk *,
     struct archive_entry *, int *fd);
 static int setup_xattrs(struct archive_read_disk *,
     struct archive_entry *, int *fd);
 static int setup_sparse(struct archive_read_disk *,
     struct archive_entry *, int *fd);
 
 int
 archive_read_disk_entry_from_file(struct archive *_a,
     struct archive_entry *entry,
     int fd,
     const struct stat *st)
 {
 	struct archive_read_disk *a = (struct archive_read_disk *)_a;
 	const char *path, *name;
 	struct stat s;
 	int initial_fd = fd;
 	int r, r1;
 
 	archive_clear_error(_a);
 	path = archive_entry_sourcepath(entry);
 	if (path == NULL)
 		path = archive_entry_pathname(entry);
 
 	if (a->tree == NULL) {
 		if (st == NULL) {
 #if HAVE_FSTAT
 			if (fd >= 0) {
 				if (fstat(fd, &s) != 0) {
 					archive_set_error(&a->archive, errno,
 					    "Can't fstat");
 					return (ARCHIVE_FAILED);
 				}
 			} else
 #endif
 #if HAVE_LSTAT
 			if (!a->follow_symlinks) {
 				if (lstat(path, &s) != 0) {
 					archive_set_error(&a->archive, errno,
 					    "Can't lstat %s", path);
 					return (ARCHIVE_FAILED);
 				}
 			} else
 #endif
 			if (stat(path, &s) != 0) {
 				archive_set_error(&a->archive, errno,
 				    "Can't stat %s", path);
 				return (ARCHIVE_FAILED);
 			}
 			st = &s;
 		}
 		archive_entry_copy_stat(entry, st);
 	}
 
 	/* Lookup uname/gname */
 	name = archive_read_disk_uname(_a, archive_entry_uid(entry));
 	if (name != NULL)
 		archive_entry_copy_uname(entry, name);
 	name = archive_read_disk_gname(_a, archive_entry_gid(entry));
 	if (name != NULL)
 		archive_entry_copy_gname(entry, name);
 
 #ifdef HAVE_STRUCT_STAT_ST_FLAGS
 	/* On FreeBSD, we get flags for free with the stat. */
 	/* TODO: Does this belong in copy_stat()? */
 	if (st->st_flags != 0)
 		archive_entry_set_fflags(entry, st->st_flags, 0);
 #endif
 
 #if defined(EXT2_IOC_GETFLAGS) && defined(HAVE_WORKING_EXT2_IOC_GETFLAGS)
 	/* Linux requires an extra ioctl to pull the flags.  Although
 	 * this is an extra step, it has a nice side-effect: We get an
 	 * open file descriptor which we can use in the subsequent lookups. */
 	if ((S_ISREG(st->st_mode) || S_ISDIR(st->st_mode))) {
 		if (fd < 0) {
 			if (a->tree != NULL)
 				fd = a->open_on_current_dir(a->tree, path,
 					O_RDONLY | O_NONBLOCK | O_CLOEXEC);
 			else
 				fd = open(path, O_RDONLY | O_NONBLOCK |
 						O_CLOEXEC);
 			__archive_ensure_cloexec_flag(fd);
 		}
 		if (fd >= 0) {
 			int stflags;
 			r = ioctl(fd, EXT2_IOC_GETFLAGS, &stflags);
 			if (r == 0 && stflags != 0)
 				archive_entry_set_fflags(entry, stflags, 0);
 		}
 	}
 #endif
 
 #if defined(HAVE_READLINK) || defined(HAVE_READLINKAT)
 	if (S_ISLNK(st->st_mode)) {
 		size_t linkbuffer_len = st->st_size + 1;
 		char *linkbuffer;
 		int lnklen;
 
 		linkbuffer = malloc(linkbuffer_len);
 		if (linkbuffer == NULL) {
 			archive_set_error(&a->archive, ENOMEM,
 			    "Couldn't read link data");
 			return (ARCHIVE_FAILED);
 		}
 		if (a->tree != NULL) {
 #ifdef HAVE_READLINKAT
 			lnklen = readlinkat(a->tree_current_dir_fd(a->tree),
 			    path, linkbuffer, linkbuffer_len);
 #else
 			if (a->tree_enter_working_dir(a->tree) != 0) {
 				archive_set_error(&a->archive, errno,
 				    "Couldn't read link data");
 				free(linkbuffer);
 				return (ARCHIVE_FAILED);
 			}
 			lnklen = readlink(path, linkbuffer, linkbuffer_len);
 #endif /* HAVE_READLINKAT */
 		} else
 			lnklen = readlink(path, linkbuffer, linkbuffer_len);
 		if (lnklen < 0) {
 			archive_set_error(&a->archive, errno,
 			    "Couldn't read link data");
 			free(linkbuffer);
 			return (ARCHIVE_FAILED);
 		}
 		linkbuffer[lnklen] = 0;
 		archive_entry_set_symlink(entry, linkbuffer);
 		free(linkbuffer);
 	}
 #endif /* HAVE_READLINK || HAVE_READLINKAT */
 
 	r = setup_acls(a, entry, &fd);
 	if (!a->suppress_xattr) {
 		r1 = setup_xattrs(a, entry, &fd);
 		if (r1 < r)
 			r = r1;
 	}
 	if (a->enable_copyfile) {
 		r1 = setup_mac_metadata(a, entry, &fd);
 		if (r1 < r)
 			r = r1;
 	}
 	r1 = setup_sparse(a, entry, &fd);
 	if (r1 < r)
 		r = r1;
 
 	/* If we opened the file earlier in this function, close it. */
 	if (initial_fd != fd)
 		close(fd);
 	return (r);
 }
 
 #if defined(__APPLE__) && defined(HAVE_COPYFILE_H)
 /*
  * The Mac OS "copyfile()" API copies the extended metadata for a
  * file into a separate file in AppleDouble format (see RFC 1740).
  *
  * Mac OS tar and cpio implementations store this extended
  * metadata as a separate entry just before the regular entry
  * with a "._" prefix added to the filename.
  *
  * Note that this is currently done unconditionally; the tar program has
  * an option to discard this information before the archive is written.
  *
  * TODO: If there's a failure, report it and return ARCHIVE_WARN.
  */
 static int
 setup_mac_metadata(struct archive_read_disk *a,
     struct archive_entry *entry, int *fd)
 {
 	int tempfd = -1;
 	int copyfile_flags = COPYFILE_NOFOLLOW | COPYFILE_ACL | COPYFILE_XATTR;
 	struct stat copyfile_stat;
 	int ret = ARCHIVE_OK;
 	void *buff = NULL;
 	int have_attrs;
 	const char *name, *tempdir;
 	struct archive_string tempfile;
 
 	(void)fd; /* UNUSED */
 	name = archive_entry_sourcepath(entry);
 	if (name == NULL)
 		name = archive_entry_pathname(entry);
 	if (name == NULL) {
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 		    "Can't open file to read extended attributes: No name");
 		return (ARCHIVE_WARN);
 	}
 
 	if (a->tree != NULL) {
 		if (a->tree_enter_working_dir(a->tree) != 0) {
 			archive_set_error(&a->archive, errno,
 				    "Couldn't change dir");
 				return (ARCHIVE_FAILED);
 		}
 	}
 
 	/* Short-circuit if there's nothing to do. */
 	have_attrs = copyfile(name, NULL, 0, copyfile_flags | COPYFILE_CHECK);
 	if (have_attrs == -1) {
 		archive_set_error(&a->archive, errno,
 			"Could not check extended attributes");
 		return (ARCHIVE_WARN);
 	}
 	if (have_attrs == 0)
 		return (ARCHIVE_OK);
 
 	tempdir = NULL;
 	if (issetugid() == 0)
 		tempdir = getenv("TMPDIR");
 	if (tempdir == NULL)
 		tempdir = _PATH_TMP;
 	archive_string_init(&tempfile);
 	archive_strcpy(&tempfile, tempdir);
 	archive_strcat(&tempfile, "tar.md.XXXXXX");
 	tempfd = mkstemp(tempfile.s);
 	if (tempfd < 0) {
 		archive_set_error(&a->archive, errno,
 		    "Could not open extended attribute file");
 		ret = ARCHIVE_WARN;
 		goto cleanup;
 	}
 	__archive_ensure_cloexec_flag(tempfd);
 
 	/* XXX I wish copyfile() could pack directly to a memory
 	 * buffer; that would avoid the temp file here.  For that
 	 * matter, it would be nice if fcopyfile() actually worked,
 	 * that would reduce the many open/close races here. */
 	if (copyfile(name, tempfile.s, 0, copyfile_flags | COPYFILE_PACK)) {
 		archive_set_error(&a->archive, errno,
 		    "Could not pack extended attributes");
 		ret = ARCHIVE_WARN;
 		goto cleanup;
 	}
 	if (fstat(tempfd, &copyfile_stat)) {
 		archive_set_error(&a->archive, errno,
 		    "Could not check size of extended attributes");
 		ret = ARCHIVE_WARN;
 		goto cleanup;
 	}
 	buff = malloc(copyfile_stat.st_size);
 	if (buff == NULL) {
 		archive_set_error(&a->archive, errno,
 		    "Could not allocate memory for extended attributes");
 		ret = ARCHIVE_WARN;
 		goto cleanup;
 	}
 	if (copyfile_stat.st_size != read(tempfd, buff, copyfile_stat.st_size)) {
 		archive_set_error(&a->archive, errno,
 		    "Could not read extended attributes into memory");
 		ret = ARCHIVE_WARN;
 		goto cleanup;
 	}
 	archive_entry_copy_mac_metadata(entry, buff, copyfile_stat.st_size);
 
 cleanup:
 	if (tempfd >= 0) {
 		close(tempfd);
 		unlink(tempfile.s);
 	}
 	archive_string_free(&tempfile);
 	free(buff);
 	return (ret);
 }
 
 #else
 
 /*
  * Stub implementation for non-Mac systems.
  */
 static int
 setup_mac_metadata(struct archive_read_disk *a,
     struct archive_entry *entry, int *fd)
 {
 	(void)a; /* UNUSED */
 	(void)entry; /* UNUSED */
 	(void)fd; /* UNUSED */
 	return (ARCHIVE_OK);
 }
 #endif
 
 
 #ifdef HAVE_POSIX_ACL
 static int translate_acl(struct archive_read_disk *a,
     struct archive_entry *entry, acl_t acl, int archive_entry_acl_type);
 
 static int
 setup_acls(struct archive_read_disk *a,
     struct archive_entry *entry, int *fd)
 {
 	const char	*accpath;
 	acl_t		 acl;
+#if HAVE_ACL_IS_TRIVIAL_NP
 	int		r;
+#endif
 
 	accpath = archive_entry_sourcepath(entry);
 	if (accpath == NULL)
 		accpath = archive_entry_pathname(entry);
 
-	if (*fd < 0 && a->tree != NULL) {
-		if (a->follow_symlinks ||
-		    archive_entry_filetype(entry) != AE_IFLNK)
-			*fd = a->open_on_current_dir(a->tree,
-			    accpath, O_RDONLY | O_NONBLOCK);
-		if (*fd < 0) {
-			if (a->tree_enter_working_dir(a->tree) != 0) {
-				archive_set_error(&a->archive, errno,
-				    "Couldn't access %s", accpath);
-				return (ARCHIVE_FAILED);
-			}
-		}
-	}
-
 	archive_entry_acl_clear(entry);
 
-	acl = NULL;
-
 #ifdef ACL_TYPE_NFS4
 	/* Try NFS4 ACL first. */
-#if HAVE_ACL_GET_FD_NP
 	if (*fd >= 0)
-		acl = acl_get_fd_np(*fd, ACL_TYPE_NFS4);
-#endif
-	if (acl == NULL) {
+		acl = acl_get_fd(*fd);
 #if HAVE_ACL_GET_LINK_NP
-		if (!a->follow_symlinks)
-			acl = acl_get_link_np(accpath, ACL_TYPE_NFS4);
+	else if (!a->follow_symlinks)
+		acl = acl_get_link_np(accpath, ACL_TYPE_NFS4);
 #else
-		if ((!a->follow_symlinks)
-	    	    && (archive_entry_filetype(entry) == AE_IFLNK)) }
-			/* We can't get the ACL of a symlink, so we assume
-			   it can't have one. */
-			acl = NULL;
-		}
+	else if ((!a->follow_symlinks)
+	    && (archive_entry_filetype(entry) == AE_IFLNK))
+		/* We can't get the ACL of a symlink, so we assume it can't
+		   have one. */
+		acl = NULL;
 #endif
-	}
-	if (acl == NULL)	
+	else
 		acl = acl_get_file(accpath, ACL_TYPE_NFS4);
-
-	if (acl != NULL) {
 #if HAVE_ACL_IS_TRIVIAL_NP
-		/* Ignore "trivial" ACLs that just mirror the file mode. */
-		if (acl_is_trivial_np(acl, &r) == 0) {
-			if (r) {
-				acl_free(acl);
-				acl = NULL;
-				return (ARCHIVE_OK);
-			}
-		}
+	/* Ignore "trivial" ACLs that just mirror the file mode. */
+	acl_is_trivial_np(acl, &r);
+	if (r) {
+		acl_free(acl);
+		acl = NULL;
+	}
 #endif
-		r = translate_acl(a, entry, acl, ARCHIVE_ENTRY_ACL_TYPE_NFS4);
+	if (acl != NULL) {
+		translate_acl(a, entry, acl, ARCHIVE_ENTRY_ACL_TYPE_NFS4);
 		acl_free(acl);
-		return (r);
+		return (ARCHIVE_OK);
 	}
 #endif
 
 	/* Retrieve access ACL from file. */
 	if (*fd >= 0)
 		acl = acl_get_fd(*fd);
-	if (acl == NULL) {
 #if HAVE_ACL_GET_LINK_NP
-		if (!a->follow_symlinks)
-			acl = acl_get_link_np(accpath, ACL_TYPE_ACCESS);
+	else if (!a->follow_symlinks)
+		acl = acl_get_link_np(accpath, ACL_TYPE_ACCESS);
 #else
-		if ((!a->follow_symlinks)
-		    && (archive_entry_filetype(entry) == AE_IFLNK)) {
-			/* We can't get the ACL of a symlink, so we assume it
-			   can't have one. */
-			acl = NULL;
-		}
+	else if ((!a->follow_symlinks)
+	    && (archive_entry_filetype(entry) == AE_IFLNK))
+		/* We can't get the ACL of a symlink, so we assume it can't
+		   have one. */
+		acl = NULL;
 #endif
-	}
-	if (acl == NULL)
+	else
 		acl = acl_get_file(accpath, ACL_TYPE_ACCESS);
-
 	if (acl != NULL) {
-#if HAVE_ACL_IS_TRIVIAL_NP
-		/* Ignore "trivial" ACLs that just mirror the file mode. */
-		if (acl_is_trivial_np(acl, &r) == 0) {
-			if (r) {
-				acl_free(acl);
-				acl = NULL;
-				return (ARCHIVE_OK);
-			}
-		}
-#endif
-		r = translate_acl(a, entry, acl,
+		translate_acl(a, entry, acl,
 		    ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
 		acl_free(acl);
-		acl = NULL;
-		if (r != 0)
-			return (r);
 	}
 
 	/* Only directories can have default ACLs. */
 	if (S_ISDIR(archive_entry_mode(entry))) {
 		acl = acl_get_file(accpath, ACL_TYPE_DEFAULT);
 		if (acl != NULL) {
-			r = translate_acl(a, entry, acl,
+			translate_acl(a, entry, acl,
 			    ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
 			acl_free(acl);
-			if (r != 0)
-				return (r);
 		}
 	}
 	return (ARCHIVE_OK);
 }
 
 /*
  * Translate system ACL into libarchive internal structure.
  */
 
 static struct {
         int archive_perm;
         int platform_perm;
 } acl_perm_map[] = {
         {ARCHIVE_ENTRY_ACL_EXECUTE, ACL_EXECUTE},
         {ARCHIVE_ENTRY_ACL_WRITE, ACL_WRITE},
         {ARCHIVE_ENTRY_ACL_READ, ACL_READ},
 #ifdef ACL_TYPE_NFS4
         {ARCHIVE_ENTRY_ACL_READ_DATA, ACL_READ_DATA},
         {ARCHIVE_ENTRY_ACL_LIST_DIRECTORY, ACL_LIST_DIRECTORY},
         {ARCHIVE_ENTRY_ACL_WRITE_DATA, ACL_WRITE_DATA},
         {ARCHIVE_ENTRY_ACL_ADD_FILE, ACL_ADD_FILE},
         {ARCHIVE_ENTRY_ACL_APPEND_DATA, ACL_APPEND_DATA},
         {ARCHIVE_ENTRY_ACL_ADD_SUBDIRECTORY, ACL_ADD_SUBDIRECTORY},
         {ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS, ACL_READ_NAMED_ATTRS},
         {ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS},
         {ARCHIVE_ENTRY_ACL_DELETE_CHILD, ACL_DELETE_CHILD},
         {ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES, ACL_READ_ATTRIBUTES},
         {ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES},
         {ARCHIVE_ENTRY_ACL_DELETE, ACL_DELETE},
         {ARCHIVE_ENTRY_ACL_READ_ACL, ACL_READ_ACL},
         {ARCHIVE_ENTRY_ACL_WRITE_ACL, ACL_WRITE_ACL},
         {ARCHIVE_ENTRY_ACL_WRITE_OWNER, ACL_WRITE_OWNER},
         {ARCHIVE_ENTRY_ACL_SYNCHRONIZE, ACL_SYNCHRONIZE}
 #endif
 };
 
 #ifdef ACL_TYPE_NFS4
 static struct {
         int archive_inherit;
         int platform_inherit;
 } acl_inherit_map[] = {
         {ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT, ACL_ENTRY_FILE_INHERIT},
 	{ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT, ACL_ENTRY_DIRECTORY_INHERIT},
 	{ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT, ACL_ENTRY_NO_PROPAGATE_INHERIT},
 	{ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY, ACL_ENTRY_INHERIT_ONLY}
 };
 #endif
 static int
 translate_acl(struct archive_read_disk *a,
     struct archive_entry *entry, acl_t acl, int default_entry_acl_type)
 {
 	acl_tag_t	 acl_tag;
 #ifdef ACL_TYPE_NFS4
 	acl_entry_type_t acl_type;
 	acl_flagset_t	 acl_flagset;
-	int brand;
+	int brand, r;
 #endif
 	acl_entry_t	 acl_entry;
 	acl_permset_t	 acl_permset;
 	int		 i, entry_acl_type;
-	int		 r, s, ae_id, ae_tag, ae_perm;
+	int		 s, ae_id, ae_tag, ae_perm;
 	const char	*ae_name;
 
 #ifdef ACL_TYPE_NFS4
 	// FreeBSD "brands" ACLs as POSIX.1e or NFSv4
 	// Make sure the "brand" on this ACL is consistent
 	// with the default_entry_acl_type bits provided.
-	if (acl_get_brand_np(acl, &brand) != 0) {
-		archive_set_error(&a->archive, errno, 
-		    "Failed to read ACL brand");
-		return (ARCHIVE_FAILED);
-	}
+	acl_get_brand_np(acl, &brand);
 	switch (brand) {
 	case ACL_BRAND_POSIX:
 		switch (default_entry_acl_type) {
 		case ARCHIVE_ENTRY_ACL_TYPE_ACCESS:
 		case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT:
 			break;
 		default:
-			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
-			    "Invalid ACL entry type for POSIX.1e ACL");
-			return (ARCHIVE_FAILED);
+			// XXX set warning message?
+			return ARCHIVE_FAILED;
 		}
 		break;
 	case ACL_BRAND_NFS4:
 		if (default_entry_acl_type & ~ARCHIVE_ENTRY_ACL_TYPE_NFS4) {
-			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
-			    "ACL brand mismatch");
-			return (ARCHIVE_FAILED);
+			// XXX set warning message?
+			return ARCHIVE_FAILED;
 		}
 		break;
 	default:
-		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
-		    "Invalid ACL brand");
-		return (ARCHIVE_FAILED);
+		// XXX set warning message?
+		return ARCHIVE_FAILED;
 		break;
 	}
 #endif
 
 
 	s = acl_get_entry(acl, ACL_FIRST_ENTRY, &acl_entry);
-	if (s == -1) {
-		archive_set_error(&a->archive, errno,
-		    "Failed to get ACL entry");
-		return (ARCHIVE_FAILED);
-	}
 	while (s == 1) {
 		ae_id = -1;
 		ae_name = NULL;
 		ae_perm = 0;
 
-		if (acl_get_tag_type(acl_entry, &acl_tag) != 0) {
-			archive_set_error(&a->archive, errno,
-			    "Failed to get ACL tag type");
-			return (ARCHIVE_FAILED);
-		}
+		acl_get_tag_type(acl_entry, &acl_tag);
 		switch (acl_tag) {
 		case ACL_USER:
 			ae_id = (int)*(uid_t *)acl_get_qualifier(acl_entry);
 			ae_name = archive_read_disk_uname(&a->archive, ae_id);
 			ae_tag = ARCHIVE_ENTRY_ACL_USER;
 			break;
 		case ACL_GROUP:
 			ae_id = (int)*(gid_t *)acl_get_qualifier(acl_entry);
 			ae_name = archive_read_disk_gname(&a->archive, ae_id);
 			ae_tag = ARCHIVE_ENTRY_ACL_GROUP;
 			break;
 		case ACL_MASK:
 			ae_tag = ARCHIVE_ENTRY_ACL_MASK;
 			break;
 		case ACL_USER_OBJ:
 			ae_tag = ARCHIVE_ENTRY_ACL_USER_OBJ;
 			break;
 		case ACL_GROUP_OBJ:
 			ae_tag = ARCHIVE_ENTRY_ACL_GROUP_OBJ;
 			break;
 		case ACL_OTHER:
 			ae_tag = ARCHIVE_ENTRY_ACL_OTHER;
 			break;
 #ifdef ACL_TYPE_NFS4
 		case ACL_EVERYONE:
 			ae_tag = ARCHIVE_ENTRY_ACL_EVERYONE;
 			break;
 #endif
 		default:
 			/* Skip types that libarchive can't support. */
 			s = acl_get_entry(acl, ACL_NEXT_ENTRY, &acl_entry);
 			continue;
 		}
 
 		// XXX acl type maps to allow/deny/audit/YYYY bits
 		// XXX acl_get_entry_type_np on FreeBSD returns EINVAL for
 		// non-NFSv4 ACLs
 		entry_acl_type = default_entry_acl_type;
 #ifdef ACL_TYPE_NFS4
-		if (default_entry_acl_type == ARCHIVE_ENTRY_ACL_TYPE_NFS4) {
-			if (acl_get_entry_type_np(acl_entry, &acl_type) != 0) {
-				archive_set_error(&a->archive, errno,
-				    "Failed to get ACL type from an NFSv4 ACL entry");
-				return (ARCHIVE_FAILED);
-			}
+		r = acl_get_entry_type_np(acl_entry, &acl_type);
+		if (r == 0) {
 			switch (acl_type) {
 			case ACL_ENTRY_TYPE_ALLOW:
 				entry_acl_type = ARCHIVE_ENTRY_ACL_TYPE_ALLOW;
 				break;
 			case ACL_ENTRY_TYPE_DENY:
 				entry_acl_type = ARCHIVE_ENTRY_ACL_TYPE_DENY;
 				break;
 			case ACL_ENTRY_TYPE_AUDIT:
 				entry_acl_type = ARCHIVE_ENTRY_ACL_TYPE_AUDIT;
 				break;
 			case ACL_ENTRY_TYPE_ALARM:
 				entry_acl_type = ARCHIVE_ENTRY_ACL_TYPE_ALARM;
 				break;
-			default:
-				archive_set_error(&a->archive, errno,
-				    "Unknown NFSv4 ACL entry type: %d", acl_type);
-				return (ARCHIVE_FAILED);
 			}
+		}
 
-			/*
-			 * Libarchive stores "flag" (NFSv4 inheritance bits)
-			 * in the ae_perm bitmap.
-			 */
-			if (acl_get_flagset_np(acl_entry, &acl_flagset) != 0) {
-				archive_set_error(&a->archive, errno,
-				    "Failed to get flagset from an NFSv4 ACL entry");
-				return (ARCHIVE_FAILED);
-			}
-			for (i = 0; i < (int)(sizeof(acl_inherit_map) / sizeof(acl_inherit_map[0])); ++i) {
-				r = acl_get_flag_np(acl_flagset, acl_inherit_map[i].platform_inherit);
-				if (r == -1) {
-					archive_set_error(&a->archive, errno,
-					    "Failed to check flag in a NFSv4 ACL flagset");
-					return (ARCHIVE_FAILED);
-				} else if (r)
+		/*
+		 * Libarchive stores "flag" (NFSv4 inheritance bits)
+		 * in the ae_perm bitmap.
+		 */
+		// XXX acl_get_flagset_np on FreeBSD returns EINVAL for
+		// non-NFSv4 ACLs
+		r = acl_get_flagset_np(acl_entry, &acl_flagset);
+		if (r == 0) {
+	                for (i = 0; i < (int)(sizeof(acl_inherit_map) / sizeof(acl_inherit_map[0])); ++i) {
+				if (acl_get_flag_np(acl_flagset,
+						    acl_inherit_map[i].platform_inherit))
 					ae_perm |= acl_inherit_map[i].archive_inherit;
                 	}
 		}
 #endif
 
-		if (acl_get_permset(acl_entry, &acl_permset) != 0) {
-			archive_set_error(&a->archive, errno,
-			    "Failed to get ACL permission set");
-			return (ARCHIVE_FAILED);
-		}
+		acl_get_permset(acl_entry, &acl_permset);
 		for (i = 0; i < (int)(sizeof(acl_perm_map) / sizeof(acl_perm_map[0])); ++i) {
 			/*
 			 * acl_get_perm() is spelled differently on different
 			 * platforms; see above.
 			 */
-			r = ACL_GET_PERM(acl_permset, acl_perm_map[i].platform_perm);
-			if (r == -1) {
-				archive_set_error(&a->archive, errno,
-				    "Failed to check permission in an ACL permission set");
-				return (ARCHIVE_FAILED);
-			} else if (r)
+			if (ACL_GET_PERM(acl_permset, acl_perm_map[i].platform_perm))
 				ae_perm |= acl_perm_map[i].archive_perm;
 		}
 
-		r = archive_entry_acl_add_entry(entry, entry_acl_type,
+		archive_entry_acl_add_entry(entry, entry_acl_type,
 					    ae_perm, ae_tag,
 					    ae_id, ae_name);
-		if (r != 0)
-			return (r);
 
 		s = acl_get_entry(acl, ACL_NEXT_ENTRY, &acl_entry);
-		if (s == -1) {
-			archive_set_error(&a->archive, errno,
-			    "Failed to get ACL entry");
-			return (ARCHIVE_FAILED);
-		}
 	}
 	return (ARCHIVE_OK);
 }
 #else
 static int
 setup_acls(struct archive_read_disk *a,
     struct archive_entry *entry, int *fd)
 {
 	(void)a;      /* UNUSED */
 	(void)entry;  /* UNUSED */
 	(void)fd;     /* UNUSED */
 	return (ARCHIVE_OK);
 }
 #endif
 
 #if (HAVE_FGETXATTR && HAVE_FLISTXATTR && HAVE_LISTXATTR && \
     HAVE_LLISTXATTR && HAVE_GETXATTR && HAVE_LGETXATTR) || \
     (HAVE_FGETEA && HAVE_FLISTEA && HAVE_LISTEA)
 
 /*
  * Linux and AIX extended attribute support.
  *
  * TODO:  By using a stack-allocated buffer for the first
  * call to getxattr(), we might be able to avoid the second
  * call entirely.  We only need the second call if the
  * stack-allocated buffer is too small.  But a modest buffer
  * of 1024 bytes or so will often be big enough.  Same applies
  * to listxattr().
  */
 
 
 static int
 setup_xattr(struct archive_read_disk *a,
     struct archive_entry *entry, const char *name, int fd)
 {
 	ssize_t size;
 	void *value = NULL;
 	const char *accpath;
 
 	accpath = archive_entry_sourcepath(entry);
 	if (accpath == NULL)
 		accpath = archive_entry_pathname(entry);
 
 #if HAVE_FGETXATTR
 	if (fd >= 0)
 		size = fgetxattr(fd, name, NULL, 0);
 	else if (!a->follow_symlinks)
 		size = lgetxattr(accpath, name, NULL, 0);
 	else
 		size = getxattr(accpath, name, NULL, 0);
 #elif HAVE_FGETEA
 	if (fd >= 0)
 		size = fgetea(fd, name, NULL, 0);
 	else if (!a->follow_symlinks)
 		size = lgetea(accpath, name, NULL, 0);
 	else
 		size = getea(accpath, name, NULL, 0);
 #endif
 
 	if (size == -1) {
 		archive_set_error(&a->archive, errno,
 		    "Couldn't query extended attribute");
 		return (ARCHIVE_WARN);
 	}
 
 	if (size > 0 && (value = malloc(size)) == NULL) {
 		archive_set_error(&a->archive, errno, "Out of memory");
 		return (ARCHIVE_FATAL);
 	}
 
 #if HAVE_FGETXATTR
 	if (fd >= 0)
 		size = fgetxattr(fd, name, value, size);
 	else if (!a->follow_symlinks)
 		size = lgetxattr(accpath, name, value, size);
 	else
 		size = getxattr(accpath, name, value, size);
 #elif HAVE_FGETEA
 	if (fd >= 0)
 		size = fgetea(fd, name, value, size);
 	else if (!a->follow_symlinks)
 		size = lgetea(accpath, name, value, size);
 	else
 		size = getea(accpath, name, value, size);
 #endif
 
 	if (size == -1) {
 		archive_set_error(&a->archive, errno,
 		    "Couldn't read extended attribute");
 		return (ARCHIVE_WARN);
 	}
 
 	archive_entry_xattr_add_entry(entry, name, value, size);
 
 	free(value);
 	return (ARCHIVE_OK);
 }
 
 static int
 setup_xattrs(struct archive_read_disk *a,
     struct archive_entry *entry, int *fd)
 {
 	char *list, *p;
 	const char *path;
 	ssize_t list_size;
 
 	path = archive_entry_sourcepath(entry);
 	if (path == NULL)
 		path = archive_entry_pathname(entry);
 
 	if (*fd < 0 && a->tree != NULL) {
 		if (a->follow_symlinks ||
 		    archive_entry_filetype(entry) != AE_IFLNK)
 			*fd = a->open_on_current_dir(a->tree, path,
 				O_RDONLY | O_NONBLOCK);
 		if (*fd < 0) {
 			if (a->tree_enter_working_dir(a->tree) != 0) {
 				archive_set_error(&a->archive, errno,
 				    "Couldn't access %s", path);
 				return (ARCHIVE_FAILED);
 			}
 		}
 	}
 
 #if HAVE_FLISTXATTR
 	if (*fd >= 0)
 		list_size = flistxattr(*fd, NULL, 0);
 	else if (!a->follow_symlinks)
 		list_size = llistxattr(path, NULL, 0);
 	else
 		list_size = listxattr(path, NULL, 0);
 #elif HAVE_FLISTEA
 	if (*fd >= 0)
 		list_size = flistea(*fd, NULL, 0);
 	else if (!a->follow_symlinks)
 		list_size = llistea(path, NULL, 0);
 	else
 		list_size = listea(path, NULL, 0);
 #endif
 
 	if (list_size == -1) {
 		if (errno == ENOTSUP || errno == ENOSYS)
 			return (ARCHIVE_OK);
 		archive_set_error(&a->archive, errno,
 			"Couldn't list extended attributes");
 		return (ARCHIVE_WARN);
 	}
 
 	if (list_size == 0)
 		return (ARCHIVE_OK);
 
 	if ((list = malloc(list_size)) == NULL) {
 		archive_set_error(&a->archive, errno, "Out of memory");
 		return (ARCHIVE_FATAL);
 	}
 
 #if HAVE_FLISTXATTR
 	if (*fd >= 0)
 		list_size = flistxattr(*fd, list, list_size);
 	else if (!a->follow_symlinks)
 		list_size = llistxattr(path, list, list_size);
 	else
 		list_size = listxattr(path, list, list_size);
 #elif HAVE_FLISTEA
 	if (*fd >= 0)
 		list_size = flistea(*fd, list, list_size);
 	else if (!a->follow_symlinks)
 		list_size = llistea(path, list, list_size);
 	else
 		list_size = listea(path, list, list_size);
 #endif
 
 	if (list_size == -1) {
 		archive_set_error(&a->archive, errno,
 			"Couldn't retrieve extended attributes");
 		free(list);
 		return (ARCHIVE_WARN);
 	}
 
 	for (p = list; (p - list) < list_size; p += strlen(p) + 1) {
 		if (strncmp(p, "system.", 7) == 0 ||
 				strncmp(p, "xfsroot.", 8) == 0)
 			continue;
 		setup_xattr(a, entry, p, *fd);
 	}
 
 	free(list);
 	return (ARCHIVE_OK);
 }
 
 #elif HAVE_EXTATTR_GET_FILE && HAVE_EXTATTR_LIST_FILE && \
     HAVE_DECL_EXTATTR_NAMESPACE_USER
 
 /*
  * FreeBSD extattr interface.
  */
 
 /* TODO: Implement this.  Follow the Linux model above, but
  * with FreeBSD-specific system calls, of course.  Be careful
  * to not include the system extattrs that hold ACLs; we handle
  * those separately.
  */
 static int
 setup_xattr(struct archive_read_disk *a, struct archive_entry *entry,
     int namespace, const char *name, const char *fullname, int fd);
 
 static int
 setup_xattr(struct archive_read_disk *a, struct archive_entry *entry,
     int namespace, const char *name, const char *fullname, int fd)
 {
 	ssize_t size;
 	void *value = NULL;
 	const char *accpath;
 
 	accpath = archive_entry_sourcepath(entry);
 	if (accpath == NULL)
 		accpath = archive_entry_pathname(entry);
 
 	if (fd >= 0)
 		size = extattr_get_fd(fd, namespace, name, NULL, 0);
 	else if (!a->follow_symlinks)
 		size = extattr_get_link(accpath, namespace, name, NULL, 0);
 	else
 		size = extattr_get_file(accpath, namespace, name, NULL, 0);
 
 	if (size == -1) {
 		archive_set_error(&a->archive, errno,
 		    "Couldn't query extended attribute");
 		return (ARCHIVE_WARN);
 	}
 
 	if (size > 0 && (value = malloc(size)) == NULL) {
 		archive_set_error(&a->archive, errno, "Out of memory");
 		return (ARCHIVE_FATAL);
 	}
 
 	if (fd >= 0)
 		size = extattr_get_fd(fd, namespace, name, value, size);
 	else if (!a->follow_symlinks)
 		size = extattr_get_link(accpath, namespace, name, value, size);
 	else
 		size = extattr_get_file(accpath, namespace, name, value, size);
 
 	if (size == -1) {
 		free(value);
 		archive_set_error(&a->archive, errno,
 		    "Couldn't read extended attribute");
 		return (ARCHIVE_WARN);
 	}
 
 	archive_entry_xattr_add_entry(entry, fullname, value, size);
 
 	free(value);
 	return (ARCHIVE_OK);
 }
 
 static int
 setup_xattrs(struct archive_read_disk *a,
     struct archive_entry *entry, int *fd)
 {
 	char buff[512];
 	char *list, *p;
 	ssize_t list_size;
 	const char *path;
 	int namespace = EXTATTR_NAMESPACE_USER;
 
 	path = archive_entry_sourcepath(entry);
 	if (path == NULL)
 		path = archive_entry_pathname(entry);
 
 	if (*fd < 0 && a->tree != NULL) {
 		if (a->follow_symlinks ||
 		    archive_entry_filetype(entry) != AE_IFLNK)
 			*fd = a->open_on_current_dir(a->tree, path,
 				O_RDONLY | O_NONBLOCK);
 		if (*fd < 0) {
 			if (a->tree_enter_working_dir(a->tree) != 0) {
 				archive_set_error(&a->archive, errno,
 				    "Couldn't access %s", path);
 				return (ARCHIVE_FAILED);
 			}
 		}
 	}
 
 	if (*fd >= 0)
 		list_size = extattr_list_fd(*fd, namespace, NULL, 0);
 	else if (!a->follow_symlinks)
 		list_size = extattr_list_link(path, namespace, NULL, 0);
 	else
 		list_size = extattr_list_file(path, namespace, NULL, 0);
 
 	if (list_size == -1 && errno == EOPNOTSUPP)
 		return (ARCHIVE_OK);
 	if (list_size == -1) {
 		archive_set_error(&a->archive, errno,
 			"Couldn't list extended attributes");
 		return (ARCHIVE_WARN);
 	}
 
 	if (list_size == 0)
 		return (ARCHIVE_OK);
 
 	if ((list = malloc(list_size)) == NULL) {
 		archive_set_error(&a->archive, errno, "Out of memory");
 		return (ARCHIVE_FATAL);
 	}
 
 	if (*fd >= 0)
 		list_size = extattr_list_fd(*fd, namespace, list, list_size);
 	else if (!a->follow_symlinks)
 		list_size = extattr_list_link(path, namespace, list, list_size);
 	else
 		list_size = extattr_list_file(path, namespace, list, list_size);
 
 	if (list_size == -1) {
 		archive_set_error(&a->archive, errno,
 			"Couldn't retrieve extended attributes");
 		free(list);
 		return (ARCHIVE_WARN);
 	}
 
 	p = list;
 	while ((p - list) < list_size) {
 		size_t len = 255 & (int)*p;
 		char *name;
 
 		strcpy(buff, "user.");
 		name = buff + strlen(buff);
 		memcpy(name, p + 1, len);
 		name[len] = '\0';
 		setup_xattr(a, entry, namespace, name, buff, *fd);
 		p += 1 + len;
 	}
 
 	free(list);
 	return (ARCHIVE_OK);
 }
 
 #else
 
 /*
  * Generic (stub) extended attribute support.
  */
 static int
 setup_xattrs(struct archive_read_disk *a,
     struct archive_entry *entry, int *fd)
 {
 	(void)a;     /* UNUSED */
 	(void)entry; /* UNUSED */
 	(void)fd;    /* UNUSED */
 	return (ARCHIVE_OK);
 }
 
 #endif
 
 #if defined(HAVE_LINUX_FIEMAP_H)
 
 /*
  * Linux sparse interface.
  *
  * The FIEMAP ioctl returns an "extent" for each physical allocation
  * on disk.  We need to process those to generate a more compact list
  * of logical file blocks.  We also need to be very careful to use
  * FIEMAP_FLAG_SYNC here, since there are reports that Linux sometimes
  * does not report allocations for newly-written data that hasn't
  * been synced to disk.
  *
  * It's important to return a minimal sparse file list because we want
  * to not trigger sparse file extensions if we don't have to, since
  * not all readers support them.
  */
 
 static int
 setup_sparse(struct archive_read_disk *a,
     struct archive_entry *entry, int *fd)
 {
 	char buff[4096];
 	struct fiemap *fm;
 	struct fiemap_extent *fe;
 	int64_t size;
 	int count, do_fiemap, iters;
 	int exit_sts = ARCHIVE_OK;
 
 	if (archive_entry_filetype(entry) != AE_IFREG
 	    || archive_entry_size(entry) <= 0
 	    || archive_entry_hardlink(entry) != NULL)
 		return (ARCHIVE_OK);
 
 	if (*fd < 0) {
 		const char *path;
 
 		path = archive_entry_sourcepath(entry);
 		if (path == NULL)
 			path = archive_entry_pathname(entry);
 		if (a->tree != NULL)
 			*fd = a->open_on_current_dir(a->tree, path,
 				O_RDONLY | O_NONBLOCK | O_CLOEXEC);
 		else
 			*fd = open(path, O_RDONLY | O_NONBLOCK | O_CLOEXEC);
 		if (*fd < 0) {
 			archive_set_error(&a->archive, errno,
 			    "Can't open `%s'", path);
 			return (ARCHIVE_FAILED);
 		}
 		__archive_ensure_cloexec_flag(*fd);
 	}
 
 	/* Initialize buffer to avoid the error valgrind complains about. */
 	memset(buff, 0, sizeof(buff));
 	count = (sizeof(buff) - sizeof(*fm))/sizeof(*fe);
 	fm = (struct fiemap *)buff;
 	fm->fm_start = 0;
 	fm->fm_length = ~0ULL;;
 	fm->fm_flags = FIEMAP_FLAG_SYNC;
 	fm->fm_extent_count = count;
 	do_fiemap = 1;
 	size = archive_entry_size(entry);
 	for (iters = 0; ; ++iters) {
 		int i, r;
 
 		r = ioctl(*fd, FS_IOC_FIEMAP, fm); 
 		if (r < 0) {
 			/* When something error happens, it is better we
 			 * should return ARCHIVE_OK because an earlier
 			 * version(<2.6.28) cannot perfom FS_IOC_FIEMAP. */
 			goto exit_setup_sparse;
 		}
 		if (fm->fm_mapped_extents == 0) {
 			if (iters == 0) {
 				/* Fully sparse file; insert a zero-length "data" entry */
 				archive_entry_sparse_add_entry(entry, 0, 0);
 			}
 			break;
 		}
 		fe = fm->fm_extents;
 		for (i = 0; i < (int)fm->fm_mapped_extents; i++, fe++) {
 			if (!(fe->fe_flags & FIEMAP_EXTENT_UNWRITTEN)) {
 				/* The fe_length of the last block does not
 				 * adjust itself to its size files. */
 				int64_t length = fe->fe_length;
 				if (fe->fe_logical + length > (uint64_t)size)
 					length -= fe->fe_logical + length - size;
 				if (fe->fe_logical == 0 && length == size) {
 					/* This is not sparse. */
 					do_fiemap = 0;
 					break;
 				}
 				if (length > 0)
 					archive_entry_sparse_add_entry(entry,
 					    fe->fe_logical, length);
 			}
 			if (fe->fe_flags & FIEMAP_EXTENT_LAST)
 				do_fiemap = 0;
 		}
 		if (do_fiemap) {
 			fe = fm->fm_extents + fm->fm_mapped_extents -1;
 			fm->fm_start = fe->fe_logical + fe->fe_length;
 		} else
 			break;
 	}
 exit_setup_sparse:
 	return (exit_sts);
 }
 
 #elif defined(SEEK_HOLE) && defined(SEEK_DATA) && defined(_PC_MIN_HOLE_SIZE)
 
 /*
  * FreeBSD and Solaris sparse interface.
  */
 
 static int
 setup_sparse(struct archive_read_disk *a,
     struct archive_entry *entry, int *fd)
 {
 	int64_t size;
 	off_t initial_off; /* FreeBSD/Solaris only, so off_t okay here */
 	off_t off_s, off_e; /* FreeBSD/Solaris only, so off_t okay here */
 	int exit_sts = ARCHIVE_OK;
 	int check_fully_sparse = 0;
 
 	if (archive_entry_filetype(entry) != AE_IFREG
 	    || archive_entry_size(entry) <= 0
 	    || archive_entry_hardlink(entry) != NULL)
 		return (ARCHIVE_OK);
 
 	/* Does filesystem support the reporting of hole ? */
 	if (*fd < 0 && a->tree != NULL) {
 		const char *path;
 
 		path = archive_entry_sourcepath(entry);
 		if (path == NULL)
 			path = archive_entry_pathname(entry);
 		*fd = a->open_on_current_dir(a->tree, path,
 				O_RDONLY | O_NONBLOCK);
 		if (*fd < 0) {
 			archive_set_error(&a->archive, errno,
 			    "Can't open `%s'", path);
 			return (ARCHIVE_FAILED);
 		}
 	}
 
 	if (*fd >= 0) {
 		if (fpathconf(*fd, _PC_MIN_HOLE_SIZE) <= 0)
 			return (ARCHIVE_OK);
 		initial_off = lseek(*fd, 0, SEEK_CUR);
 		if (initial_off != 0)
 			lseek(*fd, 0, SEEK_SET);
 	} else {
 		const char *path;
 
 		path = archive_entry_sourcepath(entry);
 		if (path == NULL)
 			path = archive_entry_pathname(entry);
 			
 		if (pathconf(path, _PC_MIN_HOLE_SIZE) <= 0)
 			return (ARCHIVE_OK);
 		*fd = open(path, O_RDONLY | O_NONBLOCK | O_CLOEXEC);
 		if (*fd < 0) {
 			archive_set_error(&a->archive, errno,
 			    "Can't open `%s'", path);
 			return (ARCHIVE_FAILED);
 		}
 		__archive_ensure_cloexec_flag(*fd);
 		initial_off = 0;
 	}
 
 	off_s = 0;
 	size = archive_entry_size(entry);
 	while (off_s < size) {
 		off_s = lseek(*fd, off_s, SEEK_DATA);
 		if (off_s == (off_t)-1) {
 			if (errno == ENXIO) {
 				/* no more hole */
 				if (archive_entry_sparse_count(entry) == 0) {
 					/* Potentially a fully-sparse file. */
 					check_fully_sparse = 1;
 				}
 				break;
 			}
 			archive_set_error(&a->archive, errno,
 			    "lseek(SEEK_HOLE) failed");
 			exit_sts = ARCHIVE_FAILED;
 			goto exit_setup_sparse;
 		}
 		off_e = lseek(*fd, off_s, SEEK_HOLE);
 		if (off_e == (off_t)-1) {
 			if (errno == ENXIO) {
 				off_e = lseek(*fd, 0, SEEK_END);
 				if (off_e != (off_t)-1)
 					break;/* no more data */
 			}
 			archive_set_error(&a->archive, errno,
 			    "lseek(SEEK_DATA) failed");
 			exit_sts = ARCHIVE_FAILED;
 			goto exit_setup_sparse;
 		}
 		if (off_s == 0 && off_e == size)
 			break;/* This is not spase. */
 		archive_entry_sparse_add_entry(entry, off_s,
 			off_e - off_s);
 		off_s = off_e;
 	}
 
 	if (check_fully_sparse) {
 		if (lseek(*fd, 0, SEEK_HOLE) == 0 &&
 			lseek(*fd, 0, SEEK_END) == size) {
 			/* Fully sparse file; insert a zero-length "data" entry */
 			archive_entry_sparse_add_entry(entry, 0, 0);
 		}
 	}
 exit_setup_sparse:
 	lseek(*fd, initial_off, SEEK_SET);
 	return (exit_sts);
 }
 
 #else
 
 /*
  * Generic (stub) sparse support.
  */
 static int
 setup_sparse(struct archive_read_disk *a,
     struct archive_entry *entry, int *fd)
 {
 	(void)a;     /* UNUSED */
 	(void)entry; /* UNUSED */
 	(void)fd;    /* UNUSED */
 	return (ARCHIVE_OK);
 }
 
 #endif
 
 #endif /* !defined(_WIN32) || defined(__CYGWIN__) */
 
Index: projects/clang390-import/contrib/libarchive/libarchive/archive_read_support_format_tar.c
===================================================================
--- projects/clang390-import/contrib/libarchive/libarchive/archive_read_support_format_tar.c	(revision 305016)
+++ projects/clang390-import/contrib/libarchive/libarchive/archive_read_support_format_tar.c	(revision 305017)
@@ -1,2792 +1,2775 @@
 /*-
  * Copyright (c) 2003-2007 Tim Kientzle
  * Copyright (c) 2011-2012 Michihiro NAKAJIMA
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "archive_platform.h"
 __FBSDID("$FreeBSD$");
 
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 #include <stddef.h>
 #ifdef HAVE_STDLIB_H
 #include <stdlib.h>
 #endif
 #ifdef HAVE_STRING_H
 #include <string.h>
 #endif
 
 #include "archive.h"
 #include "archive_acl_private.h" /* For ACL parsing routines. */
 #include "archive_entry.h"
 #include "archive_entry_locale.h"
 #include "archive_private.h"
 #include "archive_read_private.h"
 
 #define tar_min(a,b) ((a) < (b) ? (a) : (b))
 
 /*
  * Layout of POSIX 'ustar' tar header.
  */
 struct archive_entry_header_ustar {
 	char	name[100];
 	char	mode[8];
 	char	uid[8];
 	char	gid[8];
 	char	size[12];
 	char	mtime[12];
 	char	checksum[8];
 	char	typeflag[1];
 	char	linkname[100];	/* "old format" header ends here */
 	char	magic[6];	/* For POSIX: "ustar\0" */
 	char	version[2];	/* For POSIX: "00" */
 	char	uname[32];
 	char	gname[32];
 	char	rdevmajor[8];
 	char	rdevminor[8];
 	char	prefix[155];
 };
 
 /*
  * Structure of GNU tar header
  */
 struct gnu_sparse {
 	char	offset[12];
 	char	numbytes[12];
 };
 
 struct archive_entry_header_gnutar {
 	char	name[100];
 	char	mode[8];
 	char	uid[8];
 	char	gid[8];
 	char	size[12];
 	char	mtime[12];
 	char	checksum[8];
 	char	typeflag[1];
 	char	linkname[100];
 	char	magic[8];  /* "ustar  \0" (note blank/blank/null at end) */
 	char	uname[32];
 	char	gname[32];
 	char	rdevmajor[8];
 	char	rdevminor[8];
 	char	atime[12];
 	char	ctime[12];
 	char	offset[12];
 	char	longnames[4];
 	char	unused[1];
 	struct gnu_sparse sparse[4];
 	char	isextended[1];
 	char	realsize[12];
 	/*
 	 * Old GNU format doesn't use POSIX 'prefix' field; they use
 	 * the 'L' (longname) entry instead.
 	 */
 };
 
 /*
  * Data specific to this format.
  */
 struct sparse_block {
 	struct sparse_block	*next;
 	int64_t	offset;
 	int64_t	remaining;
 	int hole;
 };
 
 struct tar {
 	struct archive_string	 acl_text;
 	struct archive_string	 entry_pathname;
 	/* For "GNU.sparse.name" and other similar path extensions. */
 	struct archive_string	 entry_pathname_override;
 	struct archive_string	 entry_linkpath;
 	struct archive_string	 entry_uname;
 	struct archive_string	 entry_gname;
 	struct archive_string	 longlink;
 	struct archive_string	 longname;
 	struct archive_string	 pax_header;
 	struct archive_string	 pax_global;
 	struct archive_string	 line;
 	int			 pax_hdrcharset_binary;
 	int			 header_recursion_depth;
 	int64_t			 entry_bytes_remaining;
 	int64_t			 entry_offset;
 	int64_t			 entry_padding;
 	int64_t 		 entry_bytes_unconsumed;
 	int64_t			 realsize;
 	struct sparse_block	*sparse_list;
 	struct sparse_block	*sparse_last;
 	int64_t			 sparse_offset;
 	int64_t			 sparse_numbytes;
 	int			 sparse_gnu_major;
 	int			 sparse_gnu_minor;
 	char			 sparse_gnu_pending;
 
 	struct archive_string	 localname;
 	struct archive_string_conv *opt_sconv;
 	struct archive_string_conv *sconv;
 	struct archive_string_conv *sconv_acl;
 	struct archive_string_conv *sconv_default;
 	int			 init_default_conversion;
 	int			 compat_2x;
 	int			 process_mac_extensions;
 	int			 read_concatenated_archives;
 };
 
 static int	archive_block_is_null(const char *p);
 static char	*base64_decode(const char *, size_t, size_t *);
 static int	gnu_add_sparse_entry(struct archive_read *, struct tar *,
 		    int64_t offset, int64_t remaining);
 
 static void	gnu_clear_sparse_list(struct tar *);
 static int	gnu_sparse_old_read(struct archive_read *, struct tar *,
 		    const struct archive_entry_header_gnutar *header, size_t *);
 static int	gnu_sparse_old_parse(struct archive_read *, struct tar *,
 		    const struct gnu_sparse *sparse, int length);
 static int	gnu_sparse_01_parse(struct archive_read *, struct tar *,
 		    const char *);
 static ssize_t	gnu_sparse_10_read(struct archive_read *, struct tar *,
 			size_t *);
 static int	header_Solaris_ACL(struct archive_read *,  struct tar *,
 		    struct archive_entry *, const void *, size_t *);
 static int	header_common(struct archive_read *,  struct tar *,
 		    struct archive_entry *, const void *);
 static int	header_old_tar(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *);
 static int	header_pax_extensions(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *, size_t *);
 static int	header_pax_global(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *h, size_t *);
 static int	header_longlink(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *h, size_t *);
 static int	header_longname(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *h, size_t *);
 static int	read_mac_metadata_blob(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *h, size_t *);
 static int	header_volume(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *h, size_t *);
 static int	header_ustar(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *h);
 static int	header_gnutar(struct archive_read *, struct tar *,
 		    struct archive_entry *, const void *h, size_t *);
 static int	archive_read_format_tar_bid(struct archive_read *, int);
 static int	archive_read_format_tar_options(struct archive_read *,
 		    const char *, const char *);
 static int	archive_read_format_tar_cleanup(struct archive_read *);
 static int	archive_read_format_tar_read_data(struct archive_read *a,
 		    const void **buff, size_t *size, int64_t *offset);
 static int	archive_read_format_tar_skip(struct archive_read *a);
 static int	archive_read_format_tar_read_header(struct archive_read *,
 		    struct archive_entry *);
 static int	checksum(struct archive_read *, const void *);
 static int 	pax_attribute(struct archive_read *, struct tar *,
 		    struct archive_entry *, const char *key, const char *value);
-static int	pax_attribute_acl(struct archive_read *, struct tar *,
-		    struct archive_entry *, const char *, int);
-static int	pax_attribute_xattr(struct archive_entry *, const char *,
-		    const char *);
 static int 	pax_header(struct archive_read *, struct tar *,
 		    struct archive_entry *, char *attr);
 static void	pax_time(const char *, int64_t *sec, long *nanos);
 static ssize_t	readline(struct archive_read *, struct tar *, const char **,
 		    ssize_t limit, size_t *);
 static int	read_body_to_string(struct archive_read *, struct tar *,
 		    struct archive_string *, const void *h, size_t *);
 static int	solaris_sparse_parse(struct archive_read *, struct tar *,
 		    struct archive_entry *, const char *);
 static int64_t	tar_atol(const char *, size_t);
 static int64_t	tar_atol10(const char *, size_t);
 static int64_t	tar_atol256(const char *, size_t);
 static int64_t	tar_atol8(const char *, size_t);
 static int	tar_read_header(struct archive_read *, struct tar *,
 		    struct archive_entry *, size_t *);
 static int	tohex(int c);
 static char	*url_decode(const char *);
 static void	tar_flush_unconsumed(struct archive_read *, size_t *);
 
 
 int
 archive_read_support_format_gnutar(struct archive *a)
 {
 	archive_check_magic(a, ARCHIVE_READ_MAGIC,
 	    ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar");
 	return (archive_read_support_format_tar(a));
 }
 
 
 int
 archive_read_support_format_tar(struct archive *_a)
 {
 	struct archive_read *a = (struct archive_read *)_a;
 	struct tar *tar;
 	int r;
 
 	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
 	    ARCHIVE_STATE_NEW, "archive_read_support_format_tar");
 
 	tar = (struct tar *)calloc(1, sizeof(*tar));
 #ifdef HAVE_COPYFILE_H
 	/* Set this by default on Mac OS. */
 	tar->process_mac_extensions = 1;
 #endif
 	if (tar == NULL) {
 		archive_set_error(&a->archive, ENOMEM,
 		    "Can't allocate tar data");
 		return (ARCHIVE_FATAL);
 	}
 
 	r = __archive_read_register_format(a, tar, "tar",
 	    archive_read_format_tar_bid,
 	    archive_read_format_tar_options,
 	    archive_read_format_tar_read_header,
 	    archive_read_format_tar_read_data,
 	    archive_read_format_tar_skip,
 	    NULL,
 	    archive_read_format_tar_cleanup,
 	    NULL,
 	    NULL);
 
 	if (r != ARCHIVE_OK)
 		free(tar);
 	return (ARCHIVE_OK);
 }
 
 static int
 archive_read_format_tar_cleanup(struct archive_read *a)
 {
 	struct tar *tar;
 
 	tar = (struct tar *)(a->format->data);
 	gnu_clear_sparse_list(tar);
 	archive_string_free(&tar->acl_text);
 	archive_string_free(&tar->entry_pathname);
 	archive_string_free(&tar->entry_pathname_override);
 	archive_string_free(&tar->entry_linkpath);
 	archive_string_free(&tar->entry_uname);
 	archive_string_free(&tar->entry_gname);
 	archive_string_free(&tar->line);
 	archive_string_free(&tar->pax_global);
 	archive_string_free(&tar->pax_header);
 	archive_string_free(&tar->longname);
 	archive_string_free(&tar->longlink);
 	archive_string_free(&tar->localname);
 	free(tar);
 	(a->format->data) = NULL;
 	return (ARCHIVE_OK);
 }
 
 
 static int
 archive_read_format_tar_bid(struct archive_read *a, int best_bid)
 {
 	int bid;
 	const char *h;
 	const struct archive_entry_header_ustar *header;
 
 	(void)best_bid; /* UNUSED */
 
 	bid = 0;
 
 	/* Now let's look at the actual header and see if it matches. */
 	h = __archive_read_ahead(a, 512, NULL);
 	if (h == NULL)
 		return (-1);
 
 	/* If it's an end-of-archive mark, we can handle it. */
 	if (h[0] == 0 && archive_block_is_null(h)) {
 		/*
 		 * Usually, I bid the number of bits verified, but
 		 * in this case, 4096 seems excessive so I picked 10 as
 		 * an arbitrary but reasonable-seeming value.
 		 */
 		return (10);
 	}
 
 	/* If it's not an end-of-archive mark, it must have a valid checksum.*/
 	if (!checksum(a, h))
 		return (0);
 	bid += 48;  /* Checksum is usually 6 octal digits. */
 
 	header = (const struct archive_entry_header_ustar *)h;
 
 	/* Recognize POSIX formats. */
 	if ((memcmp(header->magic, "ustar\0", 6) == 0)
 	    && (memcmp(header->version, "00", 2) == 0))
 		bid += 56;
 
 	/* Recognize GNU tar format. */
 	if ((memcmp(header->magic, "ustar ", 6) == 0)
 	    && (memcmp(header->version, " \0", 2) == 0))
 		bid += 56;
 
 	/* Type flag must be null, digit or A-Z, a-z. */
 	if (header->typeflag[0] != 0 &&
 	    !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') &&
 	    !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') &&
 	    !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') )
 		return (0);
 	bid += 2;  /* 6 bits of variation in an 8-bit field leaves 2 bits. */
 
 	/* Sanity check: Look at first byte of mode field. */
 	switch (255 & (unsigned)header->mode[0]) {
 	case 0: case 255:
 		/* Base-256 value: No further verification possible! */
 		break;
 	case ' ': /* Not recommended, but not illegal, either. */
 		break;
 	case '0': case '1': case '2': case '3':
 	case '4': case '5': case '6': case '7':
 		/* Octal Value. */
 		/* TODO: Check format of remainder of this field. */
 		break;
 	default:
 		/* Not a valid mode; bail out here. */
 		return (0);
 	}
 	/* TODO: Sanity test uid/gid/size/mtime/rdevmajor/rdevminor fields. */
 
 	return (bid);
 }
 
 static int
 archive_read_format_tar_options(struct archive_read *a,
     const char *key, const char *val)
 {
 	struct tar *tar;
 	int ret = ARCHIVE_FAILED;
 
 	tar = (struct tar *)(a->format->data);
 	if (strcmp(key, "compat-2x")  == 0) {
 		/* Handle UTF-8 filnames as libarchive 2.x */
 		tar->compat_2x = (val != NULL && val[0] != 0);
 		tar->init_default_conversion = tar->compat_2x;
 		return (ARCHIVE_OK);
 	} else if (strcmp(key, "hdrcharset")  == 0) {
 		if (val == NULL || val[0] == 0)
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			    "tar: hdrcharset option needs a character-set name");
 		else {
 			tar->opt_sconv =
 			    archive_string_conversion_from_charset(
 				&a->archive, val, 0);
 			if (tar->opt_sconv != NULL)
 				ret = ARCHIVE_OK;
 			else
 				ret = ARCHIVE_FATAL;
 		}
 		return (ret);
 	} else if (strcmp(key, "mac-ext") == 0) {
 		tar->process_mac_extensions = (val != NULL && val[0] != 0);
 		return (ARCHIVE_OK);
 	} else if (strcmp(key, "read_concatenated_archives") == 0) {
 		tar->read_concatenated_archives = (val != NULL && val[0] != 0);
 		return (ARCHIVE_OK);
 	}
 
 	/* Note: The "warn" return is just to inform the options
 	 * supervisor that we didn't handle it.  It will generate
 	 * a suitable error if no one used this option. */
 	return (ARCHIVE_WARN);
 }
 
 /* utility function- this exists to centralize the logic of tracking
  * how much unconsumed data we have floating around, and to consume
  * anything outstanding since we're going to do read_aheads
  */
 static void
 tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed)
 {
 	if (*unconsumed) {
 /*
 		void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL);
 		 * this block of code is to poison claimed unconsumed space, ensuring
 		 * things break if it is in use still.
 		 * currently it WILL break things, so enable it only for debugging this issue
 		if (data) {
 			memset(data, 0xff, *unconsumed);
 		}
 */
 		__archive_read_consume(a, *unconsumed);
 		*unconsumed = 0;
 	}
 }
 
 /*
  * The function invoked by archive_read_next_header().  This
  * just sets up a few things and then calls the internal
  * tar_read_header() function below.
  */
 static int
 archive_read_format_tar_read_header(struct archive_read *a,
     struct archive_entry *entry)
 {
 	/*
 	 * When converting tar archives to cpio archives, it is
 	 * essential that each distinct file have a distinct inode
 	 * number.  To simplify this, we keep a static count here to
 	 * assign fake dev/inode numbers to each tar entry.  Note that
 	 * pax format archives may overwrite this with something more
 	 * useful.
 	 *
 	 * Ideally, we would track every file read from the archive so
 	 * that we could assign the same dev/ino pair to hardlinks,
 	 * but the memory required to store a complete lookup table is
 	 * probably not worthwhile just to support the relatively
 	 * obscure tar->cpio conversion case.
 	 */
 	static int default_inode;
 	static int default_dev;
 	struct tar *tar;
 	const char *p;
 	const wchar_t *wp;
 	int r;
 	size_t l, unconsumed = 0;
 
 	/* Assign default device/inode values. */
 	archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
 	archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */
 	/* Limit generated st_ino number to 16 bits. */
 	if (default_inode >= 0xffff) {
 		++default_dev;
 		default_inode = 0;
 	}
 
 	tar = (struct tar *)(a->format->data);
 	tar->entry_offset = 0;
 	gnu_clear_sparse_list(tar);
 	tar->realsize = -1; /* Mark this as "unset" */
 
 	/* Setup default string conversion. */
 	tar->sconv = tar->opt_sconv;
 	if (tar->sconv == NULL) {
 		if (!tar->init_default_conversion) {
 			tar->sconv_default =
 			    archive_string_default_conversion_for_read(&(a->archive));
 			tar->init_default_conversion = 1;
 		}
 		tar->sconv = tar->sconv_default;
 	}
 
 	r = tar_read_header(a, tar, entry, &unconsumed);
 
 	tar_flush_unconsumed(a, &unconsumed);
 
 	/*
 	 * "non-sparse" files are really just sparse files with
 	 * a single block.
 	 */
 	if (tar->sparse_list == NULL) {
 		if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining)
 		    != ARCHIVE_OK)
 			return (ARCHIVE_FATAL);
 	} else {
 		struct sparse_block *sb;
 
 		for (sb = tar->sparse_list; sb != NULL; sb = sb->next) {
 			if (!sb->hole)
 				archive_entry_sparse_add_entry(entry,
 				    sb->offset, sb->remaining);
 		}
 	}
 
 	if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) {
 		/*
 		 * "Regular" entry with trailing '/' is really
 		 * directory: This is needed for certain old tar
 		 * variants and even for some broken newer ones.
 		 */
 		if ((wp = archive_entry_pathname_w(entry)) != NULL) {
 			l = wcslen(wp);
 			if (l > 0 && wp[l - 1] == L'/') {
 				archive_entry_set_filetype(entry, AE_IFDIR);
 			}
 		} else if ((p = archive_entry_pathname(entry)) != NULL) {
 			l = strlen(p);
 			if (l > 0 && p[l - 1] == '/') {
 				archive_entry_set_filetype(entry, AE_IFDIR);
 			}
 		}
 	}
 	return (r);
 }
 
 static int
 archive_read_format_tar_read_data(struct archive_read *a,
     const void **buff, size_t *size, int64_t *offset)
 {
 	ssize_t bytes_read;
 	struct tar *tar;
 	struct sparse_block *p;
 
 	tar = (struct tar *)(a->format->data);
 
 	for (;;) {
 		/* Remove exhausted entries from sparse list. */
 		while (tar->sparse_list != NULL &&
 		    tar->sparse_list->remaining == 0) {
 			p = tar->sparse_list;
 			tar->sparse_list = p->next;
 			free(p);
 		}
 
 		if (tar->entry_bytes_unconsumed) {
 			__archive_read_consume(a, tar->entry_bytes_unconsumed);
 			tar->entry_bytes_unconsumed = 0;
 		}
 
 		/* If we're at end of file, return EOF. */
 		if (tar->sparse_list == NULL ||
 		    tar->entry_bytes_remaining == 0) {
 			if (__archive_read_consume(a, tar->entry_padding) < 0)
 				return (ARCHIVE_FATAL);
 			tar->entry_padding = 0;
 			*buff = NULL;
 			*size = 0;
 			*offset = tar->realsize;
 			return (ARCHIVE_EOF);
 		}
 
 		*buff = __archive_read_ahead(a, 1, &bytes_read);
 		if (bytes_read < 0)
 			return (ARCHIVE_FATAL);
 		if (*buff == NULL) {
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			    "Truncated tar archive");
 			return (ARCHIVE_FATAL);
 		}
 		if (bytes_read > tar->entry_bytes_remaining)
 			bytes_read = (ssize_t)tar->entry_bytes_remaining;
 		/* Don't read more than is available in the
 		 * current sparse block. */
 		if (tar->sparse_list->remaining < bytes_read)
 			bytes_read = (ssize_t)tar->sparse_list->remaining;
 		*size = bytes_read;
 		*offset = tar->sparse_list->offset;
 		tar->sparse_list->remaining -= bytes_read;
 		tar->sparse_list->offset += bytes_read;
 		tar->entry_bytes_remaining -= bytes_read;
 		tar->entry_bytes_unconsumed = bytes_read;
 
 		if (!tar->sparse_list->hole)
 			return (ARCHIVE_OK);
 		/* Current is hole data and skip this. */
 	}
 }
 
 static int
 archive_read_format_tar_skip(struct archive_read *a)
 {
 	int64_t bytes_skipped;
 	int64_t request;
 	struct sparse_block *p;
 	struct tar* tar;
 
 	tar = (struct tar *)(a->format->data);
 
 	/* Do not consume the hole of a sparse file. */
 	request = 0;
 	for (p = tar->sparse_list; p != NULL; p = p->next) {
 		if (!p->hole) {
 			if (p->remaining >= INT64_MAX - request) {
 				return ARCHIVE_FATAL;
 			}
 			request += p->remaining;
 		}
 	}
 	if (request > tar->entry_bytes_remaining)
 		request = tar->entry_bytes_remaining;
 	request += tar->entry_padding + tar->entry_bytes_unconsumed;
 
 	bytes_skipped = __archive_read_consume(a, request);
 	if (bytes_skipped < 0)
 		return (ARCHIVE_FATAL);
 
 	tar->entry_bytes_remaining = 0;
 	tar->entry_bytes_unconsumed = 0;
 	tar->entry_padding = 0;
 
 	/* Free the sparse list. */
 	gnu_clear_sparse_list(tar);
 
 	return (ARCHIVE_OK);
 }
 
 /*
  * This function recursively interprets all of the headers associated
  * with a single entry.
  */
 static int
 tar_read_header(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, size_t *unconsumed)
 {
 	ssize_t bytes;
 	int err;
 	const char *h;
 	const struct archive_entry_header_ustar *header;
 	const struct archive_entry_header_gnutar *gnuheader;
 
 	/* Loop until we find a workable header record. */
 	for (;;) {
 		tar_flush_unconsumed(a, unconsumed);
 
 		/* Read 512-byte header record */
 		h = __archive_read_ahead(a, 512, &bytes);
 		if (bytes < 0)
 			return ((int)bytes);
 		if (bytes == 0) { /* EOF at a block boundary. */
 			/* Some writers do omit the block of nulls. <sigh> */
 			return (ARCHIVE_EOF);
 		}
 		if (bytes < 512) {  /* Short block at EOF; this is bad. */
 			archive_set_error(&a->archive,
 			    ARCHIVE_ERRNO_FILE_FORMAT,
 			    "Truncated tar archive");
 			return (ARCHIVE_FATAL);
 		}
 		*unconsumed = 512;
 
 		/* Header is workable if it's not an end-of-archive mark. */
 		if (h[0] != 0 || !archive_block_is_null(h))
 			break;
 
 		/* Ensure format is set for archives with only null blocks. */
 		if (a->archive.archive_format_name == NULL) {
 			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
 			a->archive.archive_format_name = "tar";
 		}
 
 		if (!tar->read_concatenated_archives) {
 			/* Try to consume a second all-null record, as well. */
 			tar_flush_unconsumed(a, unconsumed);
 			h = __archive_read_ahead(a, 512, NULL);
 			if (h != NULL && h[0] == 0 && archive_block_is_null(h))
 				__archive_read_consume(a, 512);
 			archive_clear_error(&a->archive);
 			return (ARCHIVE_EOF);
 		}
 
 		/*
 		 * We're reading concatenated archives, ignore this block and
 		 * loop to get the next.
 		 */
 	}
 
 	/*
 	 * Note: If the checksum fails and we return ARCHIVE_RETRY,
 	 * then the client is likely to just retry.  This is a very
 	 * crude way to search for the next valid header!
 	 *
 	 * TODO: Improve this by implementing a real header scan.
 	 */
 	if (!checksum(a, h)) {
 		tar_flush_unconsumed(a, unconsumed);
 		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
 		return (ARCHIVE_RETRY); /* Retryable: Invalid header */
 	}
 
 	if (++tar->header_recursion_depth > 32) {
 		tar_flush_unconsumed(a, unconsumed);
 		archive_set_error(&a->archive, EINVAL, "Too many special headers");
 		return (ARCHIVE_WARN);
 	}
 
 	/* Determine the format variant. */
 	header = (const struct archive_entry_header_ustar *)h;
 
 	switch(header->typeflag[0]) {
 	case 'A': /* Solaris tar ACL */
 		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
 		a->archive.archive_format_name = "Solaris tar";
 		err = header_Solaris_ACL(a, tar, entry, h, unconsumed);
 		break;
 	case 'g': /* POSIX-standard 'g' header. */
 		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
 		a->archive.archive_format_name = "POSIX pax interchange format";
 		err = header_pax_global(a, tar, entry, h, unconsumed);
 		if (err == ARCHIVE_EOF)
 			return (err);
 		break;
 	case 'K': /* Long link name (GNU tar, others) */
 		err = header_longlink(a, tar, entry, h, unconsumed);
 		break;
 	case 'L': /* Long filename (GNU tar, others) */
 		err = header_longname(a, tar, entry, h, unconsumed);
 		break;
 	case 'V': /* GNU volume header */
 		err = header_volume(a, tar, entry, h, unconsumed);
 		break;
 	case 'X': /* Used by SUN tar; same as 'x'. */
 		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
 		a->archive.archive_format_name =
 		    "POSIX pax interchange format (Sun variant)";
 		err = header_pax_extensions(a, tar, entry, h, unconsumed);
 		break;
 	case 'x': /* POSIX-standard 'x' header. */
 		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
 		a->archive.archive_format_name = "POSIX pax interchange format";
 		err = header_pax_extensions(a, tar, entry, h, unconsumed);
 		break;
 	default:
 		gnuheader = (const struct archive_entry_header_gnutar *)h;
 		if (memcmp(gnuheader->magic, "ustar  \0", 8) == 0) {
 			a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
 			a->archive.archive_format_name = "GNU tar format";
 			err = header_gnutar(a, tar, entry, h, unconsumed);
 		} else if (memcmp(header->magic, "ustar", 5) == 0) {
 			if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
 				a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
 				a->archive.archive_format_name = "POSIX ustar format";
 			}
 			err = header_ustar(a, tar, entry, h);
 		} else {
 			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
 			a->archive.archive_format_name = "tar (non-POSIX)";
 			err = header_old_tar(a, tar, entry, h);
 		}
 	}
 	if (err == ARCHIVE_FATAL)
 		return (err);
 
 	tar_flush_unconsumed(a, unconsumed);
 
 	h = NULL;
 	header = NULL;
 
 	--tar->header_recursion_depth;
 	/* Yuck.  Apple's design here ends up storing long pathname
 	 * extensions for both the AppleDouble extension entry and the
 	 * regular entry.
 	 */
 	if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) &&
 	    tar->header_recursion_depth == 0 &&
 	    tar->process_mac_extensions) {
 		int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed);
 		if (err2 < err)
 			err = err2;
 	}
 
 	/* We return warnings or success as-is.  Anything else is fatal. */
 	if (err == ARCHIVE_WARN || err == ARCHIVE_OK) {
 		if (tar->sparse_gnu_pending) {
 			if (tar->sparse_gnu_major == 1 &&
 			    tar->sparse_gnu_minor == 0) {
 				ssize_t bytes_read;
 
 				tar->sparse_gnu_pending = 0;
 				/* Read initial sparse map. */
 				bytes_read = gnu_sparse_10_read(a, tar, unconsumed);
 				tar->entry_bytes_remaining -= bytes_read;
 				if (bytes_read < 0)
 					return ((int)bytes_read);
 			} else {
 				archive_set_error(&a->archive,
 				    ARCHIVE_ERRNO_MISC,
 				    "Unrecognized GNU sparse file format");
 				return (ARCHIVE_WARN);
 			}
 			tar->sparse_gnu_pending = 0;
 		}
 		return (err);
 	}
 	if (err == ARCHIVE_EOF)
 		/* EOF when recursively reading a header is bad. */
 		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
 	return (ARCHIVE_FATAL);
 }
 
 /*
  * Return true if block checksum is correct.
  */
 static int
 checksum(struct archive_read *a, const void *h)
 {
 	const unsigned char *bytes;
 	const struct archive_entry_header_ustar	*header;
 	int check, sum;
 	size_t i;
 
 	(void)a; /* UNUSED */
 	bytes = (const unsigned char *)h;
 	header = (const struct archive_entry_header_ustar *)h;
 
 	/* Checksum field must hold an octal number */
 	for (i = 0; i < sizeof(header->checksum); ++i) {
 		char c = header->checksum[i];
 		if (c != ' ' && c != '\0' && (c < '0' || c > '7'))
 			return 0;
 	}
 
 	/*
 	 * Test the checksum.  Note that POSIX specifies _unsigned_
 	 * bytes for this calculation.
 	 */
 	sum = (int)tar_atol(header->checksum, sizeof(header->checksum));
 	check = 0;
 	for (i = 0; i < 148; i++)
 		check += (unsigned char)bytes[i];
 	for (; i < 156; i++)
 		check += 32;
 	for (; i < 512; i++)
 		check += (unsigned char)bytes[i];
 	if (sum == check)
 		return (1);
 
 	/*
 	 * Repeat test with _signed_ bytes, just in case this archive
 	 * was created by an old BSD, Solaris, or HP-UX tar with a
 	 * broken checksum calculation.
 	 */
 	check = 0;
 	for (i = 0; i < 148; i++)
 		check += (signed char)bytes[i];
 	for (; i < 156; i++)
 		check += 32;
 	for (; i < 512; i++)
 		check += (signed char)bytes[i];
 	if (sum == check)
 		return (1);
 
 	return (0);
 }
 
 /*
  * Return true if this block contains only nulls.
  */
 static int
 archive_block_is_null(const char *p)
 {
 	unsigned i;
 
 	for (i = 0; i < 512; i++)
 		if (*p++)
 			return (0);
 	return (1);
 }
 
 /*
  * Interpret 'A' Solaris ACL header
  */
 static int
 header_Solaris_ACL(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
 	const struct archive_entry_header_ustar *header;
 	size_t size;
 	int err;
 	int64_t type;
 	char *acl, *p;
 
 	/*
 	 * read_body_to_string adds a NUL terminator, but we need a little
 	 * more to make sure that we don't overrun acl_text later.
 	 */
 	header = (const struct archive_entry_header_ustar *)h;
 	size = (size_t)tar_atol(header->size, sizeof(header->size));
 	err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed);
 	if (err != ARCHIVE_OK)
 		return (err);
 
 	/* Recursively read next header */
 	err = tar_read_header(a, tar, entry, unconsumed);
 	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
 		return (err);
 
 	/* TODO: Examine the first characters to see if this
 	 * is an AIX ACL descriptor.  We'll likely never support
 	 * them, but it would be polite to recognize and warn when
 	 * we do see them. */
 
 	/* Leading octal number indicates ACL type and number of entries. */
 	p = acl = tar->acl_text.s;
 	type = 0;
 	while (*p != '\0' && p < acl + size) {
 		if (*p < '0' || *p > '7') {
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			    "Malformed Solaris ACL attribute (invalid digit)");
 			return(ARCHIVE_WARN);
 		}
 		type <<= 3;
 		type += *p - '0';
 		if (type > 077777777) {
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			    "Malformed Solaris ACL attribute (count too large)");
 			return (ARCHIVE_WARN);
 		}
 		p++;
 	}
 	switch ((int)type & ~0777777) {
 	case 01000000:
 		/* POSIX.1e ACL */
 		break;
 	case 03000000:
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 		    "Solaris NFSv4 ACLs not supported");
 		return (ARCHIVE_WARN);
 	default:
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 		    "Malformed Solaris ACL attribute (unsupported type %o)",
 		    (int)type);
 		return (ARCHIVE_WARN);
 	}
 	p++;
 
 	if (p >= acl + size) {
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 		    "Malformed Solaris ACL attribute (body overflow)");
 		return(ARCHIVE_WARN);
 	}
 
 	/* ACL text is null-terminated; find the end. */
 	size -= (p - acl);
 	acl = p;
 
 	while (*p != '\0' && p < acl + size)
 		p++;
 
 	if (tar->sconv_acl == NULL) {
 		tar->sconv_acl = archive_string_conversion_from_charset(
 		    &(a->archive), "UTF-8", 1);
 		if (tar->sconv_acl == NULL)
 			return (ARCHIVE_FATAL);
 	}
 	archive_strncpy(&(tar->localname), acl, p - acl);
 	err = archive_acl_parse_l(archive_entry_acl(entry),
 	    tar->localname.s, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, tar->sconv_acl);
 	if (err != ARCHIVE_OK) {
 		if (errno == ENOMEM) {
 			archive_set_error(&a->archive, ENOMEM,
 			    "Can't allocate memory for ACL");
 		} else
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			    "Malformed Solaris ACL attribute (unparsable)");
 	}
 	return (err);
 }
 
 /*
  * Interpret 'K' long linkname header.
  */
 static int
 header_longlink(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
 	int err;
 
 	err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed);
 	if (err != ARCHIVE_OK)
 		return (err);
 	err = tar_read_header(a, tar, entry, unconsumed);
 	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
 		return (err);
 	/* Set symlink if symlink already set, else hardlink. */
 	archive_entry_copy_link(entry, tar->longlink.s);
 	return (ARCHIVE_OK);
 }
 
 static int
 set_conversion_failed_error(struct archive_read *a,
     struct archive_string_conv *sconv, const char *name)
 {
 	if (errno == ENOMEM) {
 		archive_set_error(&a->archive, ENOMEM,
 		    "Can't allocate memory for %s", name);
 		return (ARCHIVE_FATAL);
 	}
 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
 	    "%s can't be converted from %s to current locale.",
 	    name, archive_string_conversion_charset_name(sconv));
 	return (ARCHIVE_WARN);
 }
 
 /*
  * Interpret 'L' long filename header.
  */
 static int
 header_longname(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
 	int err;
 
 	err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed);
 	if (err != ARCHIVE_OK)
 		return (err);
 	/* Read and parse "real" header, then override name. */
 	err = tar_read_header(a, tar, entry, unconsumed);
 	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
 		return (err);
 	if (archive_entry_copy_pathname_l(entry, tar->longname.s,
 	    archive_strlen(&(tar->longname)), tar->sconv) != 0)
 		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
 	return (err);
 }
 
 
 /*
  * Interpret 'V' GNU tar volume header.
  */
 static int
 header_volume(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
 	(void)h;
 
 	/* Just skip this and read the next header. */
 	return (tar_read_header(a, tar, entry, unconsumed));
 }
 
 /*
  * Read body of an archive entry into an archive_string object.
  */
 static int
 read_body_to_string(struct archive_read *a, struct tar *tar,
     struct archive_string *as, const void *h, size_t *unconsumed)
 {
 	int64_t size;
 	const struct archive_entry_header_ustar *header;
 	const void *src;
 
 	(void)tar; /* UNUSED */
 	header = (const struct archive_entry_header_ustar *)h;
 	size  = tar_atol(header->size, sizeof(header->size));
 	if ((size > 1048576) || (size < 0)) {
 		archive_set_error(&a->archive, EINVAL,
 		    "Special header too large");
 		return (ARCHIVE_FATAL);
 	}
 
 	/* Fail if we can't make our buffer big enough. */
 	if (archive_string_ensure(as, (size_t)size+1) == NULL) {
 		archive_set_error(&a->archive, ENOMEM,
 		    "No memory");
 		return (ARCHIVE_FATAL);
 	}
 
 	tar_flush_unconsumed(a, unconsumed);
 
 	/* Read the body into the string. */
 	*unconsumed = (size_t)((size + 511) & ~ 511);
 	src = __archive_read_ahead(a, *unconsumed, NULL);
 	if (src == NULL) {
 		*unconsumed = 0;
 		return (ARCHIVE_FATAL);
 	}
 	memcpy(as->s, src, (size_t)size);
 	as->s[size] = '\0';
 	as->length = (size_t)size;
 	return (ARCHIVE_OK);
 }
 
 /*
  * Parse out common header elements.
  *
  * This would be the same as header_old_tar, except that the
  * filename is handled slightly differently for old and POSIX
  * entries  (POSIX entries support a 'prefix').  This factoring
  * allows header_old_tar and header_ustar
  * to handle filenames differently, while still putting most of the
  * common parsing into one place.
  */
 static int
 header_common(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h)
 {
 	const struct archive_entry_header_ustar	*header;
 	char	tartype;
 	int     err = ARCHIVE_OK;
 
 	header = (const struct archive_entry_header_ustar *)h;
 	if (header->linkname[0])
 		archive_strncpy(&(tar->entry_linkpath),
 		    header->linkname, sizeof(header->linkname));
 	else
 		archive_string_empty(&(tar->entry_linkpath));
 
 	/* Parse out the numeric fields (all are octal) */
 	archive_entry_set_mode(entry,
 		(mode_t)tar_atol(header->mode, sizeof(header->mode)));
 	archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
 	archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
 	tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size));
 	if (tar->entry_bytes_remaining < 0) {
 		tar->entry_bytes_remaining = 0;
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 		    "Tar entry has negative size");
 		return (ARCHIVE_FATAL);
 	}
 	if (tar->entry_bytes_remaining == INT64_MAX) {
 		/* Note: tar_atol returns INT64_MAX on overflow */
 		tar->entry_bytes_remaining = 0;
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 		    "Tar entry size overflow");
 		return (ARCHIVE_FATAL);
 	}
 	tar->realsize = tar->entry_bytes_remaining;
 	archive_entry_set_size(entry, tar->entry_bytes_remaining);
 	archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
 
 	/* Handle the tar type flag appropriately. */
 	tartype = header->typeflag[0];
 
 	switch (tartype) {
 	case '1': /* Hard link */
 		if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s,
 		    archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
 			err = set_conversion_failed_error(a, tar->sconv,
 			    "Linkname");
 			if (err == ARCHIVE_FATAL)
 				return (err);
 		}
 		/*
 		 * The following may seem odd, but: Technically, tar
 		 * does not store the file type for a "hard link"
 		 * entry, only the fact that it is a hard link.  So, I
 		 * leave the type zero normally.  But, pax interchange
 		 * format allows hard links to have data, which
 		 * implies that the underlying entry is a regular
 		 * file.
 		 */
 		if (archive_entry_size(entry) > 0)
 			archive_entry_set_filetype(entry, AE_IFREG);
 
 		/*
 		 * A tricky point: Traditionally, tar readers have
 		 * ignored the size field when reading hardlink
 		 * entries, and some writers put non-zero sizes even
 		 * though the body is empty.  POSIX blessed this
 		 * convention in the 1988 standard, but broke with
 		 * this tradition in 2001 by permitting hardlink
 		 * entries to store valid bodies in pax interchange
 		 * format, but not in ustar format.  Since there is no
 		 * hard and fast way to distinguish pax interchange
 		 * from earlier archives (the 'x' and 'g' entries are
 		 * optional, after all), we need a heuristic.
 		 */
 		if (archive_entry_size(entry) == 0) {
 			/* If the size is already zero, we're done. */
 		}  else if (a->archive.archive_format
 		    == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
 			/* Definitely pax extended; must obey hardlink size. */
 		} else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR
 		    || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR)
 		{
 			/* Old-style or GNU tar: we must ignore the size. */
 			archive_entry_set_size(entry, 0);
 			tar->entry_bytes_remaining = 0;
 		} else if (archive_read_format_tar_bid(a, 50) > 50) {
 			/*
 			 * We don't know if it's pax: If the bid
 			 * function sees a valid ustar header
 			 * immediately following, then let's ignore
 			 * the hardlink size.
 			 */
 			archive_entry_set_size(entry, 0);
 			tar->entry_bytes_remaining = 0;
 		}
 		/*
 		 * TODO: There are still two cases I'd like to handle:
 		 *   = a ustar non-pax archive with a hardlink entry at
 		 *     end-of-archive.  (Look for block of nulls following?)
 		 *   = a pax archive that has not seen any pax headers
 		 *     and has an entry which is a hardlink entry storing
 		 *     a body containing an uncompressed tar archive.
 		 * The first is worth addressing; I don't see any reliable
 		 * way to deal with the second possibility.
 		 */
 		break;
 	case '2': /* Symlink */
 		archive_entry_set_filetype(entry, AE_IFLNK);
 		archive_entry_set_size(entry, 0);
 		tar->entry_bytes_remaining = 0;
 		if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s,
 		    archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
 			err = set_conversion_failed_error(a, tar->sconv,
 			    "Linkname");
 			if (err == ARCHIVE_FATAL)
 				return (err);
 		}
 		break;
 	case '3': /* Character device */
 		archive_entry_set_filetype(entry, AE_IFCHR);
 		archive_entry_set_size(entry, 0);
 		tar->entry_bytes_remaining = 0;
 		break;
 	case '4': /* Block device */
 		archive_entry_set_filetype(entry, AE_IFBLK);
 		archive_entry_set_size(entry, 0);
 		tar->entry_bytes_remaining = 0;
 		break;
 	case '5': /* Dir */
 		archive_entry_set_filetype(entry, AE_IFDIR);
 		archive_entry_set_size(entry, 0);
 		tar->entry_bytes_remaining = 0;
 		break;
 	case '6': /* FIFO device */
 		archive_entry_set_filetype(entry, AE_IFIFO);
 		archive_entry_set_size(entry, 0);
 		tar->entry_bytes_remaining = 0;
 		break;
 	case 'D': /* GNU incremental directory type */
 		/*
 		 * No special handling is actually required here.
 		 * It might be nice someday to preprocess the file list and
 		 * provide it to the client, though.
 		 */
 		archive_entry_set_filetype(entry, AE_IFDIR);
 		break;
 	case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/
 		/*
 		 * As far as I can tell, this is just like a regular file
 		 * entry, except that the contents should be _appended_ to
 		 * the indicated file at the indicated offset.  This may
 		 * require some API work to fully support.
 		 */
 		break;
 	case 'N': /* Old GNU "long filename" entry. */
 		/* The body of this entry is a script for renaming
 		 * previously-extracted entries.  Ugh.  It will never
 		 * be supported by libarchive. */
 		archive_entry_set_filetype(entry, AE_IFREG);
 		break;
 	case 'S': /* GNU sparse files */
 		/*
 		 * Sparse files are really just regular files with
 		 * sparse information in the extended area.
 		 */
 		/* FALLTHROUGH */
 	default: /* Regular file  and non-standard types */
 		/*
 		 * Per POSIX: non-recognized types should always be
 		 * treated as regular files.
 		 */
 		archive_entry_set_filetype(entry, AE_IFREG);
 		break;
 	}
 	return (err);
 }
 
 /*
  * Parse out header elements for "old-style" tar archives.
  */
 static int
 header_old_tar(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h)
 {
 	const struct archive_entry_header_ustar	*header;
 	int err = ARCHIVE_OK, err2;
 
 	/* Copy filename over (to ensure null termination). */
 	header = (const struct archive_entry_header_ustar *)h;
 	if (archive_entry_copy_pathname_l(entry,
 	    header->name, sizeof(header->name), tar->sconv) != 0) {
 		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
 		if (err == ARCHIVE_FATAL)
 			return (err);
 	}
 
 	/* Grab rest of common fields */
 	err2 = header_common(a, tar, entry, h);
 	if (err > err2)
 		err = err2;
 
 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
 	return (err);
 }
 
 /*
  * Read a Mac AppleDouble-encoded blob of file metadata,
  * if there is one.
  */
 static int
 read_mac_metadata_blob(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
 	int64_t size;
 	const void *data;
 	const char *p, *name;
 	const wchar_t *wp, *wname;
 
 	(void)h; /* UNUSED */
 
 	wname = wp = archive_entry_pathname_w(entry);
 	if (wp != NULL) {
 		/* Find the last path element. */
 		for (; *wp != L'\0'; ++wp) {
 			if (wp[0] == '/' && wp[1] != L'\0')
 				wname = wp + 1;
 		}
 		/*
 		 * If last path element starts with "._", then
 		 * this is a Mac extension.
 		 */
 		if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0')
 			return ARCHIVE_OK;
 	} else {
 		/* Find the last path element. */
 		name = p = archive_entry_pathname(entry);
 		if (p == NULL)
 			return (ARCHIVE_FAILED);
 		for (; *p != '\0'; ++p) {
 			if (p[0] == '/' && p[1] != '\0')
 				name = p + 1;
 		}
 		/*
 		 * If last path element starts with "._", then
 		 * this is a Mac extension.
 		 */
 		if (name[0] != '.' || name[1] != '_' || name[2] == '\0')
 			return ARCHIVE_OK;
 	}
 
  	/* Read the body as a Mac OS metadata blob. */
 	size = archive_entry_size(entry);
 
 	/*
 	 * TODO: Look beyond the body here to peek at the next header.
 	 * If it's a regular header (not an extension header)
 	 * that has the wrong name, just return the current
 	 * entry as-is, without consuming the body here.
 	 * That would reduce the risk of us mis-identifying
 	 * an ordinary file that just happened to have
 	 * a name starting with "._".
 	 *
 	 * Q: Is the above idea really possible?  Even
 	 * when there are GNU or pax extension entries?
 	 */
 	data = __archive_read_ahead(a, (size_t)size, NULL);
 	if (data == NULL) {
 		*unconsumed = 0;
 		return (ARCHIVE_FATAL);
 	}
 	archive_entry_copy_mac_metadata(entry, data, (size_t)size);
 	*unconsumed = (size_t)((size + 511) & ~ 511);
 	tar_flush_unconsumed(a, unconsumed);
 	return (tar_read_header(a, tar, entry, unconsumed));
 }
 
 /*
  * Parse a file header for a pax extended archive entry.
  */
 static int
 header_pax_global(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
 	int err;
 
 	err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed);
 	if (err != ARCHIVE_OK)
 		return (err);
 	err = tar_read_header(a, tar, entry, unconsumed);
 	return (err);
 }
 
 static int
 header_pax_extensions(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
 	int err, err2;
 
 	err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed);
 	if (err != ARCHIVE_OK)
 		return (err);
 
 	/* Parse the next header. */
 	err = tar_read_header(a, tar, entry, unconsumed);
 	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
 		return (err);
 
 	/*
 	 * TODO: Parse global/default options into 'entry' struct here
 	 * before handling file-specific options.
 	 *
 	 * This design (parse standard header, then overwrite with pax
 	 * extended attribute data) usually works well, but isn't ideal;
 	 * it would be better to parse the pax extended attributes first
 	 * and then skip any fields in the standard header that were
 	 * defined in the pax header.
 	 */
 	err2 = pax_header(a, tar, entry, tar->pax_header.s);
 	err =  err_combine(err, err2);
 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
 	return (err);
 }
 
 
 /*
  * Parse a file header for a Posix "ustar" archive entry.  This also
  * handles "pax" or "extended ustar" entries.
  */
 static int
 header_ustar(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h)
 {
 	const struct archive_entry_header_ustar	*header;
 	struct archive_string *as;
 	int err = ARCHIVE_OK, r;
 
 	header = (const struct archive_entry_header_ustar *)h;
 
 	/* Copy name into an internal buffer to ensure null-termination. */
 	as = &(tar->entry_pathname);
 	if (header->prefix[0]) {
 		archive_strncpy(as, header->prefix, sizeof(header->prefix));
 		if (as->s[archive_strlen(as) - 1] != '/')
 			archive_strappend_char(as, '/');
 		archive_strncat(as, header->name, sizeof(header->name));
 	} else {
 		archive_strncpy(as, header->name, sizeof(header->name));
 	}
 	if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as),
 	    tar->sconv) != 0) {
 		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
 		if (err == ARCHIVE_FATAL)
 			return (err);
 	}
 
 	/* Handle rest of common fields. */
 	r = header_common(a, tar, entry, h);
 	if (r == ARCHIVE_FATAL)
 		return (r);
 	if (r < err)
 		err = r;
 
 	/* Handle POSIX ustar fields. */
 	if (archive_entry_copy_uname_l(entry,
 	    header->uname, sizeof(header->uname), tar->sconv) != 0) {
 		err = set_conversion_failed_error(a, tar->sconv, "Uname");
 		if (err == ARCHIVE_FATAL)
 			return (err);
 	}
 
 	if (archive_entry_copy_gname_l(entry,
 	    header->gname, sizeof(header->gname), tar->sconv) != 0) {
 		err = set_conversion_failed_error(a, tar->sconv, "Gname");
 		if (err == ARCHIVE_FATAL)
 			return (err);
 	}
 
 	/* Parse out device numbers only for char and block specials. */
 	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
 		archive_entry_set_rdevmajor(entry, (dev_t)
 		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
 		archive_entry_set_rdevminor(entry, (dev_t)
 		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
 	}
 
 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
 
 	return (err);
 }
 
 
 /*
  * Parse the pax extended attributes record.
  *
  * Returns non-zero if there's an error in the data.
  */
 static int
 pax_header(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, char *attr)
 {
 	size_t attr_length, l, line_length;
 	char *p;
 	char *key, *value;
 	struct archive_string *as;
 	struct archive_string_conv *sconv;
 	int err, err2;
 
 	attr_length = strlen(attr);
 	tar->pax_hdrcharset_binary = 0;
 	archive_string_empty(&(tar->entry_gname));
 	archive_string_empty(&(tar->entry_linkpath));
 	archive_string_empty(&(tar->entry_pathname));
 	archive_string_empty(&(tar->entry_pathname_override));
 	archive_string_empty(&(tar->entry_uname));
 	err = ARCHIVE_OK;
 	while (attr_length > 0) {
 		/* Parse decimal length field at start of line. */
 		line_length = 0;
 		l = attr_length;
 		p = attr; /* Record start of line. */
 		while (l>0) {
 			if (*p == ' ') {
 				p++;
 				l--;
 				break;
 			}
 			if (*p < '0' || *p > '9') {
 				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 				    "Ignoring malformed pax extended attributes");
 				return (ARCHIVE_WARN);
 			}
 			line_length *= 10;
 			line_length += *p - '0';
 			if (line_length > 999999) {
 				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 				    "Rejecting pax extended attribute > 1MB");
 				return (ARCHIVE_WARN);
 			}
 			p++;
 			l--;
 		}
 
 		/*
 		 * Parsed length must be no bigger than available data,
 		 * at least 1, and the last character of the line must
 		 * be '\n'.
 		 */
 		if (line_length > attr_length
 		    || line_length < 1
 		    || attr[line_length - 1] != '\n')
 		{
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			    "Ignoring malformed pax extended attribute");
 			return (ARCHIVE_WARN);
 		}
 
 		/* Null-terminate the line. */
 		attr[line_length - 1] = '\0';
 
 		/* Find end of key and null terminate it. */
 		key = p;
 		if (key[0] == '=')
 			return (-1);
 		while (*p && *p != '=')
 			++p;
 		if (*p == '\0') {
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			    "Invalid pax extended attributes");
 			return (ARCHIVE_WARN);
 		}
 		*p = '\0';
 
 		/* Identify null-terminated 'value' portion. */
 		value = p + 1;
 
 		/* Identify this attribute and set it in the entry. */
 		err2 = pax_attribute(a, tar, entry, key, value);
 		if (err2 == ARCHIVE_FATAL)
 			return (err2);
 		err = err_combine(err, err2);
 
 		/* Skip to next line */
 		attr += line_length;
 		attr_length -= line_length;
 	}
 
 	/*
 	 * PAX format uses UTF-8 as default charset for its metadata
 	 * unless hdrcharset=BINARY is present in its header.
 	 * We apply the charset specified by the hdrcharset option only
 	 * when the hdrcharset attribute(in PAX header) is BINARY because
 	 * we respect the charset described in PAX header and BINARY also
 	 * means that metadata(filename,uname and gname) character-set
 	 * is unknown.
 	 */
 	if (tar->pax_hdrcharset_binary)
 		sconv = tar->opt_sconv;
 	else {
 		sconv = archive_string_conversion_from_charset(
 		    &(a->archive), "UTF-8", 1);
 		if (sconv == NULL)
 			return (ARCHIVE_FATAL);
 		if (tar->compat_2x)
 			archive_string_conversion_set_opt(sconv,
 			    SCONV_SET_OPT_UTF8_LIBARCHIVE2X);
 	}
 
 	if (archive_strlen(&(tar->entry_gname)) > 0) {
 		if (archive_entry_copy_gname_l(entry, tar->entry_gname.s,
 		    archive_strlen(&(tar->entry_gname)), sconv) != 0) {
 			err = set_conversion_failed_error(a, sconv, "Gname");
 			if (err == ARCHIVE_FATAL)
 				return (err);
 			/* Use a converted an original name. */
 			archive_entry_copy_gname(entry, tar->entry_gname.s);
 		}
 	}
 	if (archive_strlen(&(tar->entry_linkpath)) > 0) {
 		if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s,
 		    archive_strlen(&(tar->entry_linkpath)), sconv) != 0) {
 			err = set_conversion_failed_error(a, sconv, "Linkname");
 			if (err == ARCHIVE_FATAL)
 				return (err);
 			/* Use a converted an original name. */
 			archive_entry_copy_link(entry, tar->entry_linkpath.s);
 		}
 	}
 	/*
 	 * Some extensions (such as the GNU sparse file extensions)
 	 * deliberately store a synthetic name under the regular 'path'
 	 * attribute and the real file name under a different attribute.
 	 * Since we're supposed to not care about the order, we
 	 * have no choice but to store all of the various filenames
 	 * we find and figure it all out afterwards.  This is the
 	 * figuring out part.
 	 */
 	as = NULL;
 	if (archive_strlen(&(tar->entry_pathname_override)) > 0)
 		as = &(tar->entry_pathname_override);
 	else if (archive_strlen(&(tar->entry_pathname)) > 0)
 		as = &(tar->entry_pathname);
 	if (as != NULL) {
 		if (archive_entry_copy_pathname_l(entry, as->s,
 		    archive_strlen(as), sconv) != 0) {
 			err = set_conversion_failed_error(a, sconv, "Pathname");
 			if (err == ARCHIVE_FATAL)
 				return (err);
 			/* Use a converted an original name. */
 			archive_entry_copy_pathname(entry, as->s);
 		}
 	}
 	if (archive_strlen(&(tar->entry_uname)) > 0) {
 		if (archive_entry_copy_uname_l(entry, tar->entry_uname.s,
 		    archive_strlen(&(tar->entry_uname)), sconv) != 0) {
 			err = set_conversion_failed_error(a, sconv, "Uname");
 			if (err == ARCHIVE_FATAL)
 				return (err);
 			/* Use a converted an original name. */
 			archive_entry_copy_uname(entry, tar->entry_uname.s);
 		}
 	}
 	return (err);
 }
 
 static int
 pax_attribute_xattr(struct archive_entry *entry,
 	const char *name, const char *value)
 {
 	char *name_decoded;
 	void *value_decoded;
 	size_t value_len;
 
 	if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0)
 		return 3;
 
 	name += 17;
 
 	/* URL-decode name */
 	name_decoded = url_decode(name);
 	if (name_decoded == NULL)
 		return 2;
 
 	/* Base-64 decode value */
 	value_decoded = base64_decode(value, strlen(value), &value_len);
 	if (value_decoded == NULL) {
 		free(name_decoded);
 		return 1;
 	}
 
 	archive_entry_xattr_add_entry(entry, name_decoded,
 		value_decoded, value_len);
 
 	free(name_decoded);
 	free(value_decoded);
 	return 0;
 }
 
-static int
-pax_attribute_acl(struct archive_read *a, struct tar *tar,
-    struct archive_entry *entry, const char *value, int type)
-{
-	int r;
-	const char* errstr;
-
-	switch (type) {
-	case ARCHIVE_ENTRY_ACL_TYPE_ACCESS:
-		errstr = "SCHILY.acl.access";
-		break;
-	case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT:
-		errstr = "SCHILY.acl.default";
-		break;
-	case ARCHIVE_ENTRY_ACL_TYPE_NFS4:
-		errstr = "SCHILY.acl.ace";
-		break;
-	default:
-		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
-		    "Unknown ACL type: %d", type);
-		return(ARCHIVE_FATAL);
-	}
-
-	if (tar->sconv_acl == NULL) {
-		tar->sconv_acl =
-		    archive_string_conversion_from_charset(
-			&(a->archive), "UTF-8", 1);
-		if (tar->sconv_acl == NULL)
-			return (ARCHIVE_FATAL);
-	}
-
-	r = archive_acl_parse_l(archive_entry_acl(entry), value, type,
-	    tar->sconv_acl);
-	if (r != ARCHIVE_OK) {
-		if (r == ARCHIVE_FATAL) {
-			archive_set_error(&a->archive, ENOMEM,
-			    "%s %s", "Can't allocate memory for ",
-			    errstr);
-			return (r);
-		}
-		archive_set_error(&a->archive,
-		    ARCHIVE_ERRNO_MISC, "%s %s", "Parse error: ", errstr);
-	}
-	return (r);
-}
-
 /*
  * Parse a single key=value attribute.  key/value pointers are
  * assumed to point into reasonably long-lived storage.
  *
  * Note that POSIX reserves all-lowercase keywords.  Vendor-specific
  * extensions should always have keywords of the form "VENDOR.attribute"
  * In particular, it's quite feasible to support many different
  * vendor extensions here.  I'm using "LIBARCHIVE" for extensions
  * unique to this library.
  *
  * Investigate other vendor-specific extensions and see if
  * any of them look useful.
  */
 static int
 pax_attribute(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const char *key, const char *value)
 {
 	int64_t s;
 	long n;
 	int err = ARCHIVE_OK, r;
 
 #ifndef __FreeBSD__
 	if (value == NULL)
 		value = "";	/* Disable compiler warning; do not pass
 				 * NULL pointer to strlen().  */
 #endif
 	switch (key[0]) {
 	case 'G':
 		/* GNU "0.0" sparse pax format. */
 		if (strcmp(key, "GNU.sparse.numblocks") == 0) {
 			tar->sparse_offset = -1;
 			tar->sparse_numbytes = -1;
 			tar->sparse_gnu_major = 0;
 			tar->sparse_gnu_minor = 0;
 		}
 		if (strcmp(key, "GNU.sparse.offset") == 0) {
 			tar->sparse_offset = tar_atol10(value, strlen(value));
 			if (tar->sparse_numbytes != -1) {
 				if (gnu_add_sparse_entry(a, tar,
 				    tar->sparse_offset, tar->sparse_numbytes)
 				    != ARCHIVE_OK)
 					return (ARCHIVE_FATAL);
 				tar->sparse_offset = -1;
 				tar->sparse_numbytes = -1;
 			}
 		}
 		if (strcmp(key, "GNU.sparse.numbytes") == 0) {
 			tar->sparse_numbytes = tar_atol10(value, strlen(value));
 			if (tar->sparse_numbytes != -1) {
 				if (gnu_add_sparse_entry(a, tar,
 				    tar->sparse_offset, tar->sparse_numbytes)
 				    != ARCHIVE_OK)
 					return (ARCHIVE_FATAL);
 				tar->sparse_offset = -1;
 				tar->sparse_numbytes = -1;
 			}
 		}
 		if (strcmp(key, "GNU.sparse.size") == 0) {
 			tar->realsize = tar_atol10(value, strlen(value));
 			archive_entry_set_size(entry, tar->realsize);
 		}
 
 		/* GNU "0.1" sparse pax format. */
 		if (strcmp(key, "GNU.sparse.map") == 0) {
 			tar->sparse_gnu_major = 0;
 			tar->sparse_gnu_minor = 1;
 			if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK)
 				return (ARCHIVE_WARN);
 		}
 
 		/* GNU "1.0" sparse pax format */
 		if (strcmp(key, "GNU.sparse.major") == 0) {
 			tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value));
 			tar->sparse_gnu_pending = 1;
 		}
 		if (strcmp(key, "GNU.sparse.minor") == 0) {
 			tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value));
 			tar->sparse_gnu_pending = 1;
 		}
 		if (strcmp(key, "GNU.sparse.name") == 0) {
 			/*
 			 * The real filename; when storing sparse
 			 * files, GNU tar puts a synthesized name into
 			 * the regular 'path' attribute in an attempt
 			 * to limit confusion. ;-)
 			 */
 			archive_strcpy(&(tar->entry_pathname_override), value);
 		}
 		if (strcmp(key, "GNU.sparse.realsize") == 0) {
 			tar->realsize = tar_atol10(value, strlen(value));
 			archive_entry_set_size(entry, tar->realsize);
 		}
 		break;
 	case 'L':
 		/* Our extensions */
 /* TODO: Handle arbitrary extended attributes... */
 /*
 		if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0)
 			archive_entry_set_xxxxxx(entry, value);
 */
 		if (strcmp(key, "LIBARCHIVE.creationtime") == 0) {
 			pax_time(value, &s, &n);
 			archive_entry_set_birthtime(entry, s, n);
 		}
 		if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0)
 			pax_attribute_xattr(entry, key, value);
 		break;
 	case 'S':
 		/* We support some keys used by the "star" archiver */
 		if (strcmp(key, "SCHILY.acl.access") == 0) {
-			r = pax_attribute_acl(a, tar, entry, value,
-			    ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
-			if (r == ARCHIVE_FATAL)
-				return (r);
+			if (tar->sconv_acl == NULL) {
+				tar->sconv_acl =
+				    archive_string_conversion_from_charset(
+					&(a->archive), "UTF-8", 1);
+				if (tar->sconv_acl == NULL)
+					return (ARCHIVE_FATAL);
+			}
+
+			r = archive_acl_parse_l(archive_entry_acl(entry),
+			    value, ARCHIVE_ENTRY_ACL_TYPE_ACCESS,
+			    tar->sconv_acl);
+			if (r != ARCHIVE_OK) {
+				err = r;
+				if (err == ARCHIVE_FATAL) {
+					archive_set_error(&a->archive, ENOMEM,
+					    "Can't allocate memory for "
+					    "SCHILY.acl.access");
+					return (err);
+				}
+				archive_set_error(&a->archive,
+				    ARCHIVE_ERRNO_MISC,
+				    "Parse error: SCHILY.acl.access");
+			}
 		} else if (strcmp(key, "SCHILY.acl.default") == 0) {
-			r = pax_attribute_acl(a, tar, entry, value,
-			    ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
-			if (r == ARCHIVE_FATAL)
-				return (r);
-		} else if (strcmp(key, "SCHILY.acl.ace") == 0) {
-			r = pax_attribute_acl(a, tar, entry, value,
-			    ARCHIVE_ENTRY_ACL_TYPE_NFS4);
-			if (r == ARCHIVE_FATAL)
-				return (r);
+			if (tar->sconv_acl == NULL) {
+				tar->sconv_acl =
+				    archive_string_conversion_from_charset(
+					&(a->archive), "UTF-8", 1);
+				if (tar->sconv_acl == NULL)
+					return (ARCHIVE_FATAL);
+			}
+
+			r = archive_acl_parse_l(archive_entry_acl(entry),
+			    value, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT,
+			    tar->sconv_acl);
+			if (r != ARCHIVE_OK) {
+				err = r;
+				if (err == ARCHIVE_FATAL) {
+					archive_set_error(&a->archive, ENOMEM,
+					    "Can't allocate memory for "
+					    "SCHILY.acl.default");
+					return (err);
+				}
+				archive_set_error(&a->archive,
+				    ARCHIVE_ERRNO_MISC,
+				    "Parse error: SCHILY.acl.default");
+			}
 		} else if (strcmp(key, "SCHILY.devmajor") == 0) {
 			archive_entry_set_rdevmajor(entry,
 			    (dev_t)tar_atol10(value, strlen(value)));
 		} else if (strcmp(key, "SCHILY.devminor") == 0) {
 			archive_entry_set_rdevminor(entry,
 			    (dev_t)tar_atol10(value, strlen(value)));
 		} else if (strcmp(key, "SCHILY.fflags") == 0) {
 			archive_entry_copy_fflags_text(entry, value);
 		} else if (strcmp(key, "SCHILY.dev") == 0) {
 			archive_entry_set_dev(entry,
 			    (dev_t)tar_atol10(value, strlen(value)));
 		} else if (strcmp(key, "SCHILY.ino") == 0) {
 			archive_entry_set_ino(entry,
 			    tar_atol10(value, strlen(value)));
 		} else if (strcmp(key, "SCHILY.nlink") == 0) {
 			archive_entry_set_nlink(entry, (unsigned)
 			    tar_atol10(value, strlen(value)));
 		} else if (strcmp(key, "SCHILY.realsize") == 0) {
 			tar->realsize = tar_atol10(value, strlen(value));
 			archive_entry_set_size(entry, tar->realsize);
 		} else if (strcmp(key, "SUN.holesdata") == 0) {
 			/* A Solaris extension for sparse. */
 			r = solaris_sparse_parse(a, tar, entry, value);
 			if (r < err) {
 				if (r == ARCHIVE_FATAL)
 					return (r);
 				err = r;
 				archive_set_error(&a->archive,
 				    ARCHIVE_ERRNO_MISC,
 				    "Parse error: SUN.holesdata");
 			}
 		}
 		break;
 	case 'a':
 		if (strcmp(key, "atime") == 0) {
 			pax_time(value, &s, &n);
 			archive_entry_set_atime(entry, s, n);
 		}
 		break;
 	case 'c':
 		if (strcmp(key, "ctime") == 0) {
 			pax_time(value, &s, &n);
 			archive_entry_set_ctime(entry, s, n);
 		} else if (strcmp(key, "charset") == 0) {
 			/* TODO: Publish charset information in entry. */
 		} else if (strcmp(key, "comment") == 0) {
 			/* TODO: Publish comment in entry. */
 		}
 		break;
 	case 'g':
 		if (strcmp(key, "gid") == 0) {
 			archive_entry_set_gid(entry,
 			    tar_atol10(value, strlen(value)));
 		} else if (strcmp(key, "gname") == 0) {
 			archive_strcpy(&(tar->entry_gname), value);
 		}
 		break;
 	case 'h':
 		if (strcmp(key, "hdrcharset") == 0) {
 			if (strcmp(value, "BINARY") == 0)
 				/* Binary  mode. */
 				tar->pax_hdrcharset_binary = 1;
 			else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0)
 				tar->pax_hdrcharset_binary = 0;
 		}
 		break;
 	case 'l':
 		/* pax interchange doesn't distinguish hardlink vs. symlink. */
 		if (strcmp(key, "linkpath") == 0) {
 			archive_strcpy(&(tar->entry_linkpath), value);
 		}
 		break;
 	case 'm':
 		if (strcmp(key, "mtime") == 0) {
 			pax_time(value, &s, &n);
 			archive_entry_set_mtime(entry, s, n);
 		}
 		break;
 	case 'p':
 		if (strcmp(key, "path") == 0) {
 			archive_strcpy(&(tar->entry_pathname), value);
 		}
 		break;
 	case 'r':
 		/* POSIX has reserved 'realtime.*' */
 		break;
 	case 's':
 		/* POSIX has reserved 'security.*' */
 		/* Someday: if (strcmp(key, "security.acl") == 0) { ... } */
 		if (strcmp(key, "size") == 0) {
 			/* "size" is the size of the data in the entry. */
 			tar->entry_bytes_remaining
 			    = tar_atol10(value, strlen(value));
 			/*
 			 * But, "size" is not necessarily the size of
 			 * the file on disk; if this is a sparse file,
 			 * the disk size may have already been set from
 			 * GNU.sparse.realsize or GNU.sparse.size or
 			 * an old GNU header field or SCHILY.realsize
 			 * or ....
 			 */
 			if (tar->realsize < 0) {
 				archive_entry_set_size(entry,
 				    tar->entry_bytes_remaining);
 				tar->realsize
 				    = tar->entry_bytes_remaining;
 			}
 		}
 		break;
 	case 'u':
 		if (strcmp(key, "uid") == 0) {
 			archive_entry_set_uid(entry,
 			    tar_atol10(value, strlen(value)));
 		} else if (strcmp(key, "uname") == 0) {
 			archive_strcpy(&(tar->entry_uname), value);
 		}
 		break;
 	}
 	return (err);
 }
 
 
 
 /*
  * parse a decimal time value, which may include a fractional portion
  */
 static void
 pax_time(const char *p, int64_t *ps, long *pn)
 {
 	char digit;
 	int64_t	s;
 	unsigned long l;
 	int sign;
 	int64_t limit, last_digit_limit;
 
 	limit = INT64_MAX / 10;
 	last_digit_limit = INT64_MAX % 10;
 
 	s = 0;
 	sign = 1;
 	if (*p == '-') {
 		sign = -1;
 		p++;
 	}
 	while (*p >= '0' && *p <= '9') {
 		digit = *p - '0';
 		if (s > limit ||
 		    (s == limit && digit > last_digit_limit)) {
 			s = INT64_MAX;
 			break;
 		}
 		s = (s * 10) + digit;
 		++p;
 	}
 
 	*ps = s * sign;
 
 	/* Calculate nanoseconds. */
 	*pn = 0;
 
 	if (*p != '.')
 		return;
 
 	l = 100000000UL;
 	do {
 		++p;
 		if (*p >= '0' && *p <= '9')
 			*pn += (*p - '0') * l;
 		else
 			break;
 	} while (l /= 10);
 }
 
 /*
  * Parse GNU tar header
  */
 static int
 header_gnutar(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const void *h, size_t *unconsumed)
 {
 	const struct archive_entry_header_gnutar *header;
 	int64_t t;
 	int err = ARCHIVE_OK;
 
 	/*
 	 * GNU header is like POSIX ustar, except 'prefix' is
 	 * replaced with some other fields. This also means the
 	 * filename is stored as in old-style archives.
 	 */
 
 	/* Grab fields common to all tar variants. */
 	err = header_common(a, tar, entry, h);
 	if (err == ARCHIVE_FATAL)
 		return (err);
 
 	/* Copy filename over (to ensure null termination). */
 	header = (const struct archive_entry_header_gnutar *)h;
 	if (archive_entry_copy_pathname_l(entry,
 	    header->name, sizeof(header->name), tar->sconv) != 0) {
 		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
 		if (err == ARCHIVE_FATAL)
 			return (err);
 	}
 
 	/* Fields common to ustar and GNU */
 	/* XXX Can the following be factored out since it's common
 	 * to ustar and gnu tar?  Is it okay to move it down into
 	 * header_common, perhaps?  */
 	if (archive_entry_copy_uname_l(entry,
 	    header->uname, sizeof(header->uname), tar->sconv) != 0) {
 		err = set_conversion_failed_error(a, tar->sconv, "Uname");
 		if (err == ARCHIVE_FATAL)
 			return (err);
 	}
 
 	if (archive_entry_copy_gname_l(entry,
 	    header->gname, sizeof(header->gname), tar->sconv) != 0) {
 		err = set_conversion_failed_error(a, tar->sconv, "Gname");
 		if (err == ARCHIVE_FATAL)
 			return (err);
 	}
 
 	/* Parse out device numbers only for char and block specials */
 	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
 		archive_entry_set_rdevmajor(entry, (dev_t)
 		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
 		archive_entry_set_rdevminor(entry, (dev_t)
 		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
 	} else
 		archive_entry_set_rdev(entry, 0);
 
 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
 
 	/* Grab GNU-specific fields. */
 	t = tar_atol(header->atime, sizeof(header->atime));
 	if (t > 0)
 		archive_entry_set_atime(entry, t, 0);
 	t = tar_atol(header->ctime, sizeof(header->ctime));
 	if (t > 0)
 		archive_entry_set_ctime(entry, t, 0);
 
 	if (header->realsize[0] != 0) {
 		tar->realsize
 		    = tar_atol(header->realsize, sizeof(header->realsize));
 		archive_entry_set_size(entry, tar->realsize);
 	}
 
 	if (header->sparse[0].offset[0] != 0) {
 		if (gnu_sparse_old_read(a, tar, header, unconsumed)
 		    != ARCHIVE_OK)
 			return (ARCHIVE_FATAL);
 	} else {
 		if (header->isextended[0] != 0) {
 			/* XXX WTF? XXX */
 		}
 	}
 
 	return (err);
 }
 
 static int
 gnu_add_sparse_entry(struct archive_read *a, struct tar *tar,
     int64_t offset, int64_t remaining)
 {
 	struct sparse_block *p;
 
 	p = (struct sparse_block *)malloc(sizeof(*p));
 	if (p == NULL) {
 		archive_set_error(&a->archive, ENOMEM, "Out of memory");
 		return (ARCHIVE_FATAL);
 	}
 	memset(p, 0, sizeof(*p));
 	if (tar->sparse_last != NULL)
 		tar->sparse_last->next = p;
 	else
 		tar->sparse_list = p;
 	tar->sparse_last = p;
 	if (remaining < 0 || offset < 0) {
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data");
 		return (ARCHIVE_FATAL);
 	}
 	p->offset = offset;
 	p->remaining = remaining;
 	return (ARCHIVE_OK);
 }
 
 static void
 gnu_clear_sparse_list(struct tar *tar)
 {
 	struct sparse_block *p;
 
 	while (tar->sparse_list != NULL) {
 		p = tar->sparse_list;
 		tar->sparse_list = p->next;
 		free(p);
 	}
 	tar->sparse_last = NULL;
 }
 
 /*
  * GNU tar old-format sparse data.
  *
  * GNU old-format sparse data is stored in a fixed-field
  * format.  Offset/size values are 11-byte octal fields (same
  * format as 'size' field in ustart header).  These are
  * stored in the header, allocating subsequent header blocks
  * as needed.  Extending the header in this way is a pretty
  * severe POSIX violation; this design has earned GNU tar a
  * lot of criticism.
  */
 
 static int
 gnu_sparse_old_read(struct archive_read *a, struct tar *tar,
     const struct archive_entry_header_gnutar *header, size_t *unconsumed)
 {
 	ssize_t bytes_read;
 	const void *data;
 	struct extended {
 		struct gnu_sparse sparse[21];
 		char	isextended[1];
 		char	padding[7];
 	};
 	const struct extended *ext;
 
 	if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK)
 		return (ARCHIVE_FATAL);
 	if (header->isextended[0] == 0)
 		return (ARCHIVE_OK);
 
 	do {
 		tar_flush_unconsumed(a, unconsumed);
 		data = __archive_read_ahead(a, 512, &bytes_read);
 		if (bytes_read < 0)
 			return (ARCHIVE_FATAL);
 		if (bytes_read < 512) {
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
 			    "Truncated tar archive "
 			    "detected while reading sparse file data");
 			return (ARCHIVE_FATAL);
 		}
 		*unconsumed = 512;
 		ext = (const struct extended *)data;
 		if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK)
 			return (ARCHIVE_FATAL);
 	} while (ext->isextended[0] != 0);
 	if (tar->sparse_list != NULL)
 		tar->entry_offset = tar->sparse_list->offset;
 	return (ARCHIVE_OK);
 }
 
 static int
 gnu_sparse_old_parse(struct archive_read *a, struct tar *tar,
     const struct gnu_sparse *sparse, int length)
 {
 	while (length > 0 && sparse->offset[0] != 0) {
 		if (gnu_add_sparse_entry(a, tar,
 		    tar_atol(sparse->offset, sizeof(sparse->offset)),
 		    tar_atol(sparse->numbytes, sizeof(sparse->numbytes)))
 		    != ARCHIVE_OK)
 			return (ARCHIVE_FATAL);
 		sparse++;
 		length--;
 	}
 	return (ARCHIVE_OK);
 }
 
 /*
  * GNU tar sparse format 0.0
  *
  * Beginning with GNU tar 1.15, sparse files are stored using
  * information in the pax extended header.  The GNU tar maintainers
  * have gone through a number of variations in the process of working
  * out this scheme; fortunately, they're all numbered.
  *
  * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the
  * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to
  * store offset/size for each block.  The repeated instances of these
  * latter fields violate the pax specification (which frowns on
  * duplicate keys), so this format was quickly replaced.
  */
 
 /*
  * GNU tar sparse format 0.1
  *
  * This version replaced the offset/numbytes attributes with
  * a single "map" attribute that stored a list of integers.  This
  * format had two problems: First, the "map" attribute could be very
  * long, which caused problems for some implementations.  More
  * importantly, the sparse data was lost when extracted by archivers
  * that didn't recognize this extension.
  */
 
 static int
 gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p)
 {
 	const char *e;
 	int64_t offset = -1, size = -1;
 
 	for (;;) {
 		e = p;
 		while (*e != '\0' && *e != ',') {
 			if (*e < '0' || *e > '9')
 				return (ARCHIVE_WARN);
 			e++;
 		}
 		if (offset < 0) {
 			offset = tar_atol10(p, e - p);
 			if (offset < 0)
 				return (ARCHIVE_WARN);
 		} else {
 			size = tar_atol10(p, e - p);
 			if (size < 0)
 				return (ARCHIVE_WARN);
 			if (gnu_add_sparse_entry(a, tar, offset, size)
 			    != ARCHIVE_OK)
 				return (ARCHIVE_FATAL);
 			offset = -1;
 		}
 		if (*e == '\0')
 			return (ARCHIVE_OK);
 		p = e + 1;
 	}
 }
 
 /*
  * GNU tar sparse format 1.0
  *
  * The idea: The offset/size data is stored as a series of base-10
  * ASCII numbers prepended to the file data, so that dearchivers that
  * don't support this format will extract the block map along with the
  * data and a separate post-process can restore the sparseness.
  *
  * Unfortunately, GNU tar 1.16 had a bug that added unnecessary
  * padding to the body of the file when using this format.  GNU tar
  * 1.17 corrected this bug without bumping the version number, so
  * it's not possible to support both variants.  This code supports
  * the later variant at the expense of not supporting the former.
  *
  * This variant also replaced GNU.sparse.size with GNU.sparse.realsize
  * and introduced the GNU.sparse.major/GNU.sparse.minor attributes.
  */
 
 /*
  * Read the next line from the input, and parse it as a decimal
  * integer followed by '\n'.  Returns positive integer value or
  * negative on error.
  */
 static int64_t
 gnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
     int64_t *remaining, size_t *unconsumed)
 {
 	int64_t l, limit, last_digit_limit;
 	const char *p;
 	ssize_t bytes_read;
 	int base, digit;
 
 	base = 10;
 	limit = INT64_MAX / base;
 	last_digit_limit = INT64_MAX % base;
 
 	/*
 	 * Skip any lines starting with '#'; GNU tar specs
 	 * don't require this, but they should.
 	 */
 	do {
 		bytes_read = readline(a, tar, &p,
 			(ssize_t)tar_min(*remaining, 100), unconsumed);
 		if (bytes_read <= 0)
 			return (ARCHIVE_FATAL);
 		*remaining -= bytes_read;
 	} while (p[0] == '#');
 
 	l = 0;
 	while (bytes_read > 0) {
 		if (*p == '\n')
 			return (l);
 		if (*p < '0' || *p >= '0' + base)
 			return (ARCHIVE_WARN);
 		digit = *p - '0';
 		if (l > limit || (l == limit && digit > last_digit_limit))
 			l = INT64_MAX; /* Truncate on overflow. */
 		else
 			l = (l * base) + digit;
 		p++;
 		bytes_read--;
 	}
 	/* TODO: Error message. */
 	return (ARCHIVE_WARN);
 }
 
 /*
  * Returns length (in bytes) of the sparse data description
  * that was read.
  */
 static ssize_t
 gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed)
 {
 	ssize_t bytes_read;
 	int entries;
 	int64_t offset, size, to_skip, remaining;
 
 	/* Clear out the existing sparse list. */
 	gnu_clear_sparse_list(tar);
 
 	remaining = tar->entry_bytes_remaining;
 
 	/* Parse entries. */
 	entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
 	if (entries < 0)
 		return (ARCHIVE_FATAL);
 	/* Parse the individual entries. */
 	while (entries-- > 0) {
 		/* Parse offset/size */
 		offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
 		if (offset < 0)
 			return (ARCHIVE_FATAL);
 		size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
 		if (size < 0)
 			return (ARCHIVE_FATAL);
 		/* Add a new sparse entry. */
 		if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK)
 			return (ARCHIVE_FATAL);
 	}
 	/* Skip rest of block... */
 	tar_flush_unconsumed(a, unconsumed);
 	bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining);
 	to_skip = 0x1ff & -bytes_read;
 	if (to_skip != __archive_read_consume(a, to_skip))
 		return (ARCHIVE_FATAL);
 	return ((ssize_t)(bytes_read + to_skip));
 }
 
 /*
  * Solaris pax extension for a sparse file. This is recorded with the
  * data and hole pairs. The way recording sparse information by Solaris'
  * pax simply indicates where data and sparse are, so the stored contents
  * consist of both data and hole.
  */
 static int
 solaris_sparse_parse(struct archive_read *a, struct tar *tar,
     struct archive_entry *entry, const char *p)
 {
 	const char *e;
 	int64_t start, end;
 	int hole = 1;
 
 	(void)entry; /* UNUSED */
 
 	end = 0;
 	if (*p == ' ')
 		p++;
 	else
 		return (ARCHIVE_WARN);
 	for (;;) {
 		e = p;
 		while (*e != '\0' && *e != ' ') {
 			if (*e < '0' || *e > '9')
 				return (ARCHIVE_WARN);
 			e++;
 		}
 		start = end;
 		end = tar_atol10(p, e - p);
 		if (end < 0)
 			return (ARCHIVE_WARN);
 		if (start < end) {
 			if (gnu_add_sparse_entry(a, tar, start,
 			    end - start) != ARCHIVE_OK)
 				return (ARCHIVE_FATAL);
 			tar->sparse_last->hole = hole;
 		}
 		if (*e == '\0')
 			return (ARCHIVE_OK);
 		p = e + 1;
 		hole = hole == 0;
 	}
 }
 
 /*-
  * Convert text->integer.
  *
  * Traditional tar formats (including POSIX) specify base-8 for
  * all of the standard numeric fields.  This is a significant limitation
  * in practice:
  *   = file size is limited to 8GB
  *   = rdevmajor and rdevminor are limited to 21 bits
  *   = uid/gid are limited to 21 bits
  *
  * There are two workarounds for this:
  *   = pax extended headers, which use variable-length string fields
  *   = GNU tar and STAR both allow either base-8 or base-256 in
  *      most fields.  The high bit is set to indicate base-256.
  *
  * On read, this implementation supports both extensions.
  */
 static int64_t
 tar_atol(const char *p, size_t char_cnt)
 {
 	/*
 	 * Technically, GNU tar considers a field to be in base-256
 	 * only if the first byte is 0xff or 0x80.
 	 */
 	if (*p & 0x80)
 		return (tar_atol256(p, char_cnt));
 	return (tar_atol8(p, char_cnt));
 }
 
 /*
  * Note that this implementation does not (and should not!) obey
  * locale settings; you cannot simply substitute strtol here, since
  * it does obey locale.
  */
 static int64_t
 tar_atol_base_n(const char *p, size_t char_cnt, int base)
 {
 	int64_t	l, maxval, limit, last_digit_limit;
 	int digit, sign;
 
 	maxval = INT64_MAX;
 	limit = INT64_MAX / base;
 	last_digit_limit = INT64_MAX % base;
 
 	/* the pointer will not be dereferenced if char_cnt is zero
 	 * due to the way the && operator is evaulated.
 	 */
 	while (char_cnt != 0 && (*p == ' ' || *p == '\t')) {
 		p++;
 		char_cnt--;
 	}
 
 	sign = 1;
 	if (char_cnt != 0 && *p == '-') {
 		sign = -1;
 		p++;
 		char_cnt--;
 
 		maxval = INT64_MIN;
 		limit = -(INT64_MIN / base);
 		last_digit_limit = INT64_MIN % base;
 	}
 
 	l = 0;
 	if (char_cnt != 0) {
 		digit = *p - '0';
 		while (digit >= 0 && digit < base  && char_cnt != 0) {
 			if (l>limit || (l == limit && digit > last_digit_limit)) {
 				return maxval; /* Truncate on overflow. */
 			}
 			l = (l * base) + digit;
 			digit = *++p - '0';
 			char_cnt--;
 		}
 	}
 	return (sign < 0) ? -l : l;
 }
 
 static int64_t
 tar_atol8(const char *p, size_t char_cnt)
 {
 	return tar_atol_base_n(p, char_cnt, 8);
 }
 
 static int64_t
 tar_atol10(const char *p, size_t char_cnt)
 {
 	return tar_atol_base_n(p, char_cnt, 10);
 }
 
 /*
  * Parse a base-256 integer.  This is just a variable-length
  * twos-complement signed binary value in big-endian order, except
  * that the high-order bit is ignored.  The values here can be up to
  * 12 bytes, so we need to be careful about overflowing 64-bit
  * (8-byte) integers.
  *
  * This code unashamedly assumes that the local machine uses 8-bit
  * bytes and twos-complement arithmetic.
  */
 static int64_t
 tar_atol256(const char *_p, size_t char_cnt)
 {
 	uint64_t l;
 	const unsigned char *p = (const unsigned char *)_p;
 	unsigned char c, neg;
 
 	/* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */
 	c = *p;
 	if (c & 0x40) {
 		neg = 0xff;
 		c |= 0x80;
 		l = ~ARCHIVE_LITERAL_ULL(0);
 	} else {
 		neg = 0;
 		c &= 0x7f;
 		l = 0;
 	}
 
 	/* If more than 8 bytes, check that we can ignore
 	 * high-order bits without overflow. */
 	while (char_cnt > sizeof(int64_t)) {
 		--char_cnt;
 		if (c != neg)
 			return neg ? INT64_MIN : INT64_MAX;
 		c = *++p;
 	}
 
 	/* c is first byte that fits; if sign mismatch, return overflow */
 	if ((c ^ neg) & 0x80) {
 		return neg ? INT64_MIN : INT64_MAX;
 	}
 
 	/* Accumulate remaining bytes. */
 	while (--char_cnt > 0) {
 		l = (l << 8) | c;
 		c = *++p;
 	}
 	l = (l << 8) | c;
 	/* Return signed twos-complement value. */
 	return (int64_t)(l);
 }
 
 /*
  * Returns length of line (including trailing newline)
  * or negative on error.  'start' argument is updated to
  * point to first character of line.  This avoids copying
  * when possible.
  */
 static ssize_t
 readline(struct archive_read *a, struct tar *tar, const char **start,
     ssize_t limit, size_t *unconsumed)
 {
 	ssize_t bytes_read;
 	ssize_t total_size = 0;
 	const void *t;
 	const char *s;
 	void *p;
 
 	tar_flush_unconsumed(a, unconsumed);
 
 	t = __archive_read_ahead(a, 1, &bytes_read);
 	if (bytes_read <= 0)
 		return (ARCHIVE_FATAL);
 	s = t;  /* Start of line? */
 	p = memchr(t, '\n', bytes_read);
 	/* If we found '\n' in the read buffer, return pointer to that. */
 	if (p != NULL) {
 		bytes_read = 1 + ((const char *)p) - s;
 		if (bytes_read > limit) {
 			archive_set_error(&a->archive,
 			    ARCHIVE_ERRNO_FILE_FORMAT,
 			    "Line too long");
 			return (ARCHIVE_FATAL);
 		}
 		*unconsumed = bytes_read;
 		*start = s;
 		return (bytes_read);
 	}
 	*unconsumed = bytes_read;
 	/* Otherwise, we need to accumulate in a line buffer. */
 	for (;;) {
 		if (total_size + bytes_read > limit) {
 			archive_set_error(&a->archive,
 			    ARCHIVE_ERRNO_FILE_FORMAT,
 			    "Line too long");
 			return (ARCHIVE_FATAL);
 		}
 		if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) {
 			archive_set_error(&a->archive, ENOMEM,
 			    "Can't allocate working buffer");
 			return (ARCHIVE_FATAL);
 		}
 		memcpy(tar->line.s + total_size, t, bytes_read);
 		tar_flush_unconsumed(a, unconsumed);
 		total_size += bytes_read;
 		/* If we found '\n', clean up and return. */
 		if (p != NULL) {
 			*start = tar->line.s;
 			return (total_size);
 		}
 		/* Read some more. */
 		t = __archive_read_ahead(a, 1, &bytes_read);
 		if (bytes_read <= 0)
 			return (ARCHIVE_FATAL);
 		s = t;  /* Start of line? */
 		p = memchr(t, '\n', bytes_read);
 		/* If we found '\n', trim the read. */
 		if (p != NULL) {
 			bytes_read = 1 + ((const char *)p) - s;
 		}
 		*unconsumed = bytes_read;
 	}
 }
 
 /*
  * base64_decode - Base64 decode
  *
  * This accepts most variations of base-64 encoding, including:
  *    * with or without line breaks
  *    * with or without the final group padded with '=' or '_' characters
  * (The most economical Base-64 variant does not pad the last group and
  * omits line breaks; RFC1341 used for MIME requires both.)
  */
 static char *
 base64_decode(const char *s, size_t len, size_t *out_len)
 {
 	static const unsigned char digits[64] = {
 		'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
 		'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b',
 		'c','d','e','f','g','h','i','j','k','l','m','n','o','p',
 		'q','r','s','t','u','v','w','x','y','z','0','1','2','3',
 		'4','5','6','7','8','9','+','/' };
 	static unsigned char decode_table[128];
 	char *out, *d;
 	const unsigned char *src = (const unsigned char *)s;
 
 	/* If the decode table is not yet initialized, prepare it. */
 	if (decode_table[digits[1]] != 1) {
 		unsigned i;
 		memset(decode_table, 0xff, sizeof(decode_table));
 		for (i = 0; i < sizeof(digits); i++)
 			decode_table[digits[i]] = i;
 	}
 
 	/* Allocate enough space to hold the entire output. */
 	/* Note that we may not use all of this... */
 	out = (char *)malloc(len - len / 4 + 1);
 	if (out == NULL) {
 		*out_len = 0;
 		return (NULL);
 	}
 	d = out;
 
 	while (len > 0) {
 		/* Collect the next group of (up to) four characters. */
 		int v = 0;
 		int group_size = 0;
 		while (group_size < 4 && len > 0) {
 			/* '=' or '_' padding indicates final group. */
 			if (*src == '=' || *src == '_') {
 				len = 0;
 				break;
 			}
 			/* Skip illegal characters (including line breaks) */
 			if (*src > 127 || *src < 32
 			    || decode_table[*src] == 0xff) {
 				len--;
 				src++;
 				continue;
 			}
 			v <<= 6;
 			v |= decode_table[*src++];
 			len --;
 			group_size++;
 		}
 		/* Align a short group properly. */
 		v <<= 6 * (4 - group_size);
 		/* Unpack the group we just collected. */
 		switch (group_size) {
 		case 4: d[2] = v & 0xff;
 			/* FALLTHROUGH */
 		case 3: d[1] = (v >> 8) & 0xff;
 			/* FALLTHROUGH */
 		case 2: d[0] = (v >> 16) & 0xff;
 			break;
 		case 1: /* this is invalid! */
 			break;
 		}
 		d += group_size * 3 / 4;
 	}
 
 	*out_len = d - out;
 	return (out);
 }
 
 static char *
 url_decode(const char *in)
 {
 	char *out, *d;
 	const char *s;
 
 	out = (char *)malloc(strlen(in) + 1);
 	if (out == NULL)
 		return (NULL);
 	for (s = in, d = out; *s != '\0'; ) {
 		if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') {
 			/* Try to convert % escape */
 			int digit1 = tohex(s[1]);
 			int digit2 = tohex(s[2]);
 			if (digit1 >= 0 && digit2 >= 0) {
 				/* Looks good, consume three chars */
 				s += 3;
 				/* Convert output */
 				*d++ = ((digit1 << 4) | digit2);
 				continue;
 			}
 			/* Else fall through and treat '%' as normal char */
 		}
 		*d++ = *s++;
 	}
 	*d = '\0';
 	return (out);
 }
 
 static int
 tohex(int c)
 {
 	if (c >= '0' && c <= '9')
 		return (c - '0');
 	else if (c >= 'A' && c <= 'F')
 		return (c - 'A' + 10);
 	else if (c >= 'a' && c <= 'f')
 		return (c - 'a' + 10);
 	else
 		return (-1);
 }
Index: projects/clang390-import/contrib/libarchive/libarchive/archive_write_disk_acl.c
===================================================================
--- projects/clang390-import/contrib/libarchive/libarchive/archive_write_disk_acl.c	(revision 305016)
+++ projects/clang390-import/contrib/libarchive/libarchive/archive_write_disk_acl.c	(revision 305017)
@@ -1,308 +1,268 @@
 /*-
  * Copyright (c) 2003-2010 Tim Kientzle
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "archive_platform.h"
 __FBSDID("$FreeBSD: head/lib/libarchive/archive_write_disk.c 201159 2009-12-29 05:35:40Z kientzle $");
 
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
 #endif
 #ifdef HAVE_SYS_ACL_H
 #define _ACL_PRIVATE /* For debugging */
 #include <sys/acl.h>
 #endif
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 
 #include "archive.h"
 #include "archive_entry.h"
 #include "archive_acl_private.h"
 #include "archive_write_disk_private.h"
 
 #ifndef HAVE_POSIX_ACL
 /* Default empty function body to satisfy mainline code. */
 int
 archive_write_disk_set_acls(struct archive *a, int fd, const char *name,
 	 struct archive_acl *abstract_acl)
 {
 	(void)a; /* UNUSED */
 	(void)fd; /* UNUSED */
 	(void)name; /* UNUSED */
 	(void)abstract_acl; /* UNUSED */
 	return (ARCHIVE_OK);
 }
 
 #else
 
 static int	set_acl(struct archive *, int fd, const char *,
 			struct archive_acl *,
 			acl_type_t, int archive_entry_acl_type, const char *tn);
 
 /*
  * XXX TODO: What about ACL types other than ACCESS and DEFAULT?
  */
 int
 archive_write_disk_set_acls(struct archive *a, int fd, const char *name,
 	 struct archive_acl *abstract_acl)
 {
 	int		 ret;
 
 	if (archive_acl_count(abstract_acl, ARCHIVE_ENTRY_ACL_TYPE_POSIX1E) > 0) {
 		ret = set_acl(a, fd, name, abstract_acl, ACL_TYPE_ACCESS,
 		    ARCHIVE_ENTRY_ACL_TYPE_ACCESS, "access");
 		if (ret != ARCHIVE_OK)
 			return (ret);
 		ret = set_acl(a, fd, name, abstract_acl, ACL_TYPE_DEFAULT,
 		    ARCHIVE_ENTRY_ACL_TYPE_DEFAULT, "default");
 		return (ret);
 #ifdef ACL_TYPE_NFS4
 	} else if (archive_acl_count(abstract_acl, ARCHIVE_ENTRY_ACL_TYPE_NFS4) > 0) {
 		ret = set_acl(a, fd, name, abstract_acl, ACL_TYPE_NFS4,
 		    ARCHIVE_ENTRY_ACL_TYPE_NFS4, "nfs4");
 		return (ret);
 #endif
 	} else
 		return ARCHIVE_OK;
 }
 
 static struct {
 	int archive_perm;
 	int platform_perm;
 } acl_perm_map[] = {
 	{ARCHIVE_ENTRY_ACL_EXECUTE, ACL_EXECUTE},
 	{ARCHIVE_ENTRY_ACL_WRITE, ACL_WRITE},
 	{ARCHIVE_ENTRY_ACL_READ, ACL_READ},
 #ifdef ACL_TYPE_NFS4
 	{ARCHIVE_ENTRY_ACL_READ_DATA, ACL_READ_DATA},
 	{ARCHIVE_ENTRY_ACL_LIST_DIRECTORY, ACL_LIST_DIRECTORY},
 	{ARCHIVE_ENTRY_ACL_WRITE_DATA, ACL_WRITE_DATA},
 	{ARCHIVE_ENTRY_ACL_ADD_FILE, ACL_ADD_FILE},
 	{ARCHIVE_ENTRY_ACL_APPEND_DATA, ACL_APPEND_DATA},
 	{ARCHIVE_ENTRY_ACL_ADD_SUBDIRECTORY, ACL_ADD_SUBDIRECTORY},
 	{ARCHIVE_ENTRY_ACL_READ_NAMED_ATTRS, ACL_READ_NAMED_ATTRS},
 	{ARCHIVE_ENTRY_ACL_WRITE_NAMED_ATTRS, ACL_WRITE_NAMED_ATTRS},
 	{ARCHIVE_ENTRY_ACL_DELETE_CHILD, ACL_DELETE_CHILD},
 	{ARCHIVE_ENTRY_ACL_READ_ATTRIBUTES, ACL_READ_ATTRIBUTES},
 	{ARCHIVE_ENTRY_ACL_WRITE_ATTRIBUTES, ACL_WRITE_ATTRIBUTES},
 	{ARCHIVE_ENTRY_ACL_DELETE, ACL_DELETE},
 	{ARCHIVE_ENTRY_ACL_READ_ACL, ACL_READ_ACL},
 	{ARCHIVE_ENTRY_ACL_WRITE_ACL, ACL_WRITE_ACL},
 	{ARCHIVE_ENTRY_ACL_WRITE_OWNER, ACL_WRITE_OWNER},
 	{ARCHIVE_ENTRY_ACL_SYNCHRONIZE, ACL_SYNCHRONIZE}
 #endif
 };
 
 #ifdef ACL_TYPE_NFS4
 static struct {
 	int archive_inherit;
 	int platform_inherit;
 } acl_inherit_map[] = {
 	{ARCHIVE_ENTRY_ACL_ENTRY_FILE_INHERIT, ACL_ENTRY_FILE_INHERIT},
 	{ARCHIVE_ENTRY_ACL_ENTRY_DIRECTORY_INHERIT, ACL_ENTRY_DIRECTORY_INHERIT},
 	{ARCHIVE_ENTRY_ACL_ENTRY_NO_PROPAGATE_INHERIT, ACL_ENTRY_NO_PROPAGATE_INHERIT},
 	{ARCHIVE_ENTRY_ACL_ENTRY_INHERIT_ONLY, ACL_ENTRY_INHERIT_ONLY}
 };
 #endif
 
 static int
 set_acl(struct archive *a, int fd, const char *name,
     struct archive_acl *abstract_acl,
     acl_type_t acl_type, int ae_requested_type, const char *tname)
 {
 	acl_t		 acl;
 	acl_entry_t	 acl_entry;
 	acl_permset_t	 acl_permset;
 #ifdef ACL_TYPE_NFS4
 	acl_flagset_t	 acl_flagset;
 	int		 r;
 #endif
 	int		 ret;
 	int		 ae_type, ae_permset, ae_tag, ae_id;
 	uid_t		 ae_uid;
 	gid_t		 ae_gid;
 	const char	*ae_name;
 	int		 entries;
 	int		 i;
 
 	ret = ARCHIVE_OK;
 	entries = archive_acl_reset(abstract_acl, ae_requested_type);
 	if (entries == 0)
 		return (ARCHIVE_OK);
 	acl = acl_init(entries);
-	if (acl == (acl_t)NULL) {
-		archive_set_error(a, errno,
-		    "Failed to initialize ACL working storage");
-		return (ARCHIVE_FAILED);
-	}
 	while (archive_acl_next(a, abstract_acl, ae_requested_type, &ae_type,
 		   &ae_permset, &ae_tag, &ae_id, &ae_name) == ARCHIVE_OK) {
-		if (acl_create_entry(&acl, &acl_entry) != 0) {
-			archive_set_error(a, errno,
-			    "Failed to create a new ACL entry");
-			return (ARCHIVE_FAILED);
-		}
+		acl_create_entry(&acl, &acl_entry);
 
 		switch (ae_tag) {
 		case ARCHIVE_ENTRY_ACL_USER:
 			acl_set_tag_type(acl_entry, ACL_USER);
 			ae_uid = archive_write_disk_uid(a, ae_name, ae_id);
 			acl_set_qualifier(acl_entry, &ae_uid);
 			break;
 		case ARCHIVE_ENTRY_ACL_GROUP:
 			acl_set_tag_type(acl_entry, ACL_GROUP);
 			ae_gid = archive_write_disk_gid(a, ae_name, ae_id);
 			acl_set_qualifier(acl_entry, &ae_gid);
 			break;
 		case ARCHIVE_ENTRY_ACL_USER_OBJ:
 			acl_set_tag_type(acl_entry, ACL_USER_OBJ);
 			break;
 		case ARCHIVE_ENTRY_ACL_GROUP_OBJ:
 			acl_set_tag_type(acl_entry, ACL_GROUP_OBJ);
 			break;
 		case ARCHIVE_ENTRY_ACL_MASK:
 			acl_set_tag_type(acl_entry, ACL_MASK);
 			break;
 		case ARCHIVE_ENTRY_ACL_OTHER:
 			acl_set_tag_type(acl_entry, ACL_OTHER);
 			break;
 #ifdef ACL_TYPE_NFS4
 		case ARCHIVE_ENTRY_ACL_EVERYONE:
 			acl_set_tag_type(acl_entry, ACL_EVERYONE);
 			break;
 #endif
 		default:
-			archive_set_error(a, ARCHIVE_ERRNO_MISC,
-			    "Unknown ACL tag: %d", ae_tag);
-			return (ARCHIVE_FAILED);
+			/* XXX */
+			break;
 		}
 
 #ifdef ACL_TYPE_NFS4
-		r = 0;
 		switch (ae_type) {
 		case ARCHIVE_ENTRY_ACL_TYPE_ALLOW:
-			r = acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_ALLOW);
+			acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_ALLOW);
 			break;
 		case ARCHIVE_ENTRY_ACL_TYPE_DENY:
-			r = acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_DENY);
+			acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_DENY);
 			break;
 		case ARCHIVE_ENTRY_ACL_TYPE_AUDIT:
-			r = acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_AUDIT);
+			acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_AUDIT);
 			break;
 		case ARCHIVE_ENTRY_ACL_TYPE_ALARM:
-			r = acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_ALARM);
+			acl_set_entry_type_np(acl_entry, ACL_ENTRY_TYPE_ALARM);
 			break;
 		case ARCHIVE_ENTRY_ACL_TYPE_ACCESS:
 		case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT:
 			// These don't translate directly into the system ACL.
 			break;
 		default:
-			archive_set_error(a, ARCHIVE_ERRNO_MISC,
-			    "Unknown ACL entry type: %d", ae_type);
-			return (ARCHIVE_FAILED);
+			// XXX error handling here.
+			break;
 		}
-		if (r != 0) {
-			archive_set_error(a, errno,
-			    "Failed to set ACL entry type");
-			return (ARCHIVE_FAILED);
-		}
 #endif
 
-		if (acl_get_permset(acl_entry, &acl_permset) != 0) {
-			archive_set_error(a, errno,
-			    "Failed to get ACL permission set");
-			return (ARCHIVE_FAILED);
-		}
-		if (acl_clear_perms(acl_permset) != 0) {
-			archive_set_error(a, errno,
-			    "Failed to clear ACL permissions");
-			return (ARCHIVE_FAILED);
-		}
+		acl_get_permset(acl_entry, &acl_permset);
+		acl_clear_perms(acl_permset);
 
 		for (i = 0; i < (int)(sizeof(acl_perm_map) / sizeof(acl_perm_map[0])); ++i) {
 			if (ae_permset & acl_perm_map[i].archive_perm)
-				if (acl_add_perm(acl_permset,
-				    acl_perm_map[i].platform_perm) != 0) {
-					archive_set_error(a, errno,
-					    "Failed to add ACL permission");
-					return (ARCHIVE_FAILED);
-				}
+				acl_add_perm(acl_permset,
+					     acl_perm_map[i].platform_perm);
 		}
 
 #ifdef ACL_TYPE_NFS4
-		if (acl_type == ACL_TYPE_NFS4) {
-			if (acl_get_flagset_np(acl_entry, &acl_flagset) != 0) {
-				archive_set_error(a, errno,
-				    "Failed to get flagset from an NFSv4 ACL entry");
-				return (ARCHIVE_FAILED);
-			}
-			if (acl_clear_flags_np(acl_flagset) != 0) {
-				archive_set_error(a, errno,
-				    "Failed to clear flags from an NFSv4 ACL flagset");
-				return (ARCHIVE_FAILED);
-			}
+		// XXX acl_get_flagset_np on FreeBSD returns EINVAL for
+		// non-NFSv4 ACLs
+		r = acl_get_flagset_np(acl_entry, &acl_flagset);
+		if (r == 0) {
+			acl_clear_flags_np(acl_flagset);
 			for (i = 0; i < (int)(sizeof(acl_inherit_map) / sizeof(acl_inherit_map[0])); ++i) {
-				if (ae_permset & acl_inherit_map[i].archive_inherit) {
-					if (acl_add_flag_np(acl_flagset,
-							acl_inherit_map[i].platform_inherit) != 0) {
-						archive_set_error(a, errno,
-						    "Failed to add flag to NFSv4 ACL flagset");
-						return (ARCHIVE_FAILED);
-					}
-				}
+				if (ae_permset & acl_inherit_map[i].archive_inherit)
+					acl_add_flag_np(acl_flagset,
+							acl_inherit_map[i].platform_inherit);
 			}
 		}
 #endif
 	}
 
 	/* Try restoring the ACL through 'fd' if we can. */
 #if HAVE_ACL_SET_FD
 	if (fd >= 0 && acl_type == ACL_TYPE_ACCESS && acl_set_fd(fd, acl) == 0)
 		ret = ARCHIVE_OK;
 	else
 #else
 #if HAVE_ACL_SET_FD_NP
 	if (fd >= 0 && acl_set_fd_np(fd, acl, acl_type) == 0)
 		ret = ARCHIVE_OK;
 	else
 #endif
 #endif
 #if HAVE_ACL_SET_LINK_NP
 	  if (acl_set_link_np(name, acl_type, acl) != 0) {
 		archive_set_error(a, errno, "Failed to set %s acl", tname);
 		ret = ARCHIVE_WARN;
 	  }
 #else
 	/* TODO: Skip this if 'name' is a symlink. */
 	if (acl_set_file(name, acl_type, acl) != 0) {
 		archive_set_error(a, errno, "Failed to set %s acl", tname);
 		ret = ARCHIVE_WARN;
 	}
 #endif
 	acl_free(acl);
 	return (ret);
 }
 #endif
Index: projects/clang390-import/contrib/libarchive/libarchive/archive_write_set_format_pax.c
===================================================================
--- projects/clang390-import/contrib/libarchive/libarchive/archive_write_set_format_pax.c	(revision 305016)
+++ projects/clang390-import/contrib/libarchive/libarchive/archive_write_set_format_pax.c	(revision 305017)
@@ -1,1931 +1,1907 @@
 /*-
  * Copyright (c) 2003-2007 Tim Kientzle
  * Copyright (c) 2010-2012 Michihiro NAKAJIMA
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "archive_platform.h"
 __FBSDID("$FreeBSD$");
 
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 #ifdef HAVE_STDLIB_H
 #include <stdlib.h>
 #endif
 #ifdef HAVE_STRING_H
 #include <string.h>
 #endif
 
 #include "archive.h"
 #include "archive_entry.h"
 #include "archive_entry_locale.h"
 #include "archive_private.h"
 #include "archive_write_private.h"
 
 struct sparse_block {
 	struct sparse_block	*next;
 	int		is_hole;
 	uint64_t	offset;
 	uint64_t	remaining;
 };
 
 struct pax {
 	uint64_t	entry_bytes_remaining;
 	uint64_t	entry_padding;
 	struct archive_string	l_url_encoded_name;
 	struct archive_string	pax_header;
 	struct archive_string	sparse_map;
 	size_t			sparse_map_padding;
 	struct sparse_block	*sparse_list;
 	struct sparse_block	*sparse_tail;
 	struct archive_string_conv *sconv_utf8;
 	int			 opt_binary;
 };
 
 static void		 add_pax_attr(struct archive_string *, const char *key,
 			     const char *value);
 static void		 add_pax_attr_int(struct archive_string *,
 			     const char *key, int64_t value);
 static void		 add_pax_attr_time(struct archive_string *,
 			     const char *key, int64_t sec,
 			     unsigned long nanos);
-static int		 add_pax_acl(struct archive_write *,
-			    struct archive_entry *, struct pax *, int);
 static ssize_t		 archive_write_pax_data(struct archive_write *,
 			     const void *, size_t);
 static int		 archive_write_pax_close(struct archive_write *);
 static int		 archive_write_pax_free(struct archive_write *);
 static int		 archive_write_pax_finish_entry(struct archive_write *);
 static int		 archive_write_pax_header(struct archive_write *,
 			     struct archive_entry *);
 static int		 archive_write_pax_options(struct archive_write *,
 			     const char *, const char *);
 static char		*base64_encode(const char *src, size_t len);
 static char		*build_gnu_sparse_name(char *dest, const char *src);
 static char		*build_pax_attribute_name(char *dest, const char *src);
 static char		*build_ustar_entry_name(char *dest, const char *src,
 			     size_t src_length, const char *insert);
 static char		*format_int(char *dest, int64_t);
 static int		 has_non_ASCII(const char *);
 static void		 sparse_list_clear(struct pax *);
 static int		 sparse_list_add(struct pax *, int64_t, int64_t);
 static char		*url_encode(const char *in);
 
 /*
  * Set output format to 'restricted pax' format.
  *
  * This is the same as normal 'pax', but tries to suppress
  * the pax header whenever possible.  This is the default for
  * bsdtar, for instance.
  */
 int
 archive_write_set_format_pax_restricted(struct archive *_a)
 {
 	struct archive_write *a = (struct archive_write *)_a;
 	int r;
 
 	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
 	    ARCHIVE_STATE_NEW, "archive_write_set_format_pax_restricted");
 
 	r = archive_write_set_format_pax(&a->archive);
 	a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_RESTRICTED;
 	a->archive.archive_format_name = "restricted POSIX pax interchange";
 	return (r);
 }
 
 /*
  * Set output format to 'pax' format.
  */
 int
 archive_write_set_format_pax(struct archive *_a)
 {
 	struct archive_write *a = (struct archive_write *)_a;
 	struct pax *pax;
 
 	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
 	    ARCHIVE_STATE_NEW, "archive_write_set_format_pax");
 
 	if (a->format_free != NULL)
 		(a->format_free)(a);
 
 	pax = (struct pax *)malloc(sizeof(*pax));
 	if (pax == NULL) {
 		archive_set_error(&a->archive, ENOMEM,
 		    "Can't allocate pax data");
 		return (ARCHIVE_FATAL);
 	}
 	memset(pax, 0, sizeof(*pax));
 	a->format_data = pax;
 	a->format_name = "pax";
 	a->format_options = archive_write_pax_options;
 	a->format_write_header = archive_write_pax_header;
 	a->format_write_data = archive_write_pax_data;
 	a->format_close = archive_write_pax_close;
 	a->format_free = archive_write_pax_free;
 	a->format_finish_entry = archive_write_pax_finish_entry;
 	a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
 	a->archive.archive_format_name = "POSIX pax interchange";
 	return (ARCHIVE_OK);
 }
 
 static int
 archive_write_pax_options(struct archive_write *a, const char *key,
     const char *val)
 {
 	struct pax *pax = (struct pax *)a->format_data;
 	int ret = ARCHIVE_FAILED;
 
 	if (strcmp(key, "hdrcharset")  == 0) {
 		/*
 		 * The character-set we can use are defined in
 		 * IEEE Std 1003.1-2001
 		 */
 		if (val == NULL || val[0] == 0)
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			    "pax: hdrcharset option needs a character-set name");
 		else if (strcmp(val, "BINARY") == 0 ||
 		    strcmp(val, "binary") == 0) {
 			/*
 			 * Specify binary mode. We will not convert
 			 * filenames, uname and gname to any charsets.
 			 */
 			pax->opt_binary = 1;
 			ret = ARCHIVE_OK;
 		} else if (strcmp(val, "UTF-8") == 0) {
 			/*
 			 * Specify UTF-8 character-set to be used for
 			 * filenames. This is almost the test that
 			 * running platform supports the string conversion.
 			 * Especially libarchive_test needs this trick for
 			 * its test.
 			 */
 			pax->sconv_utf8 = archive_string_conversion_to_charset(
 			    &(a->archive), "UTF-8", 0);
 			if (pax->sconv_utf8 == NULL)
 				ret = ARCHIVE_FATAL;
 			else
 				ret = ARCHIVE_OK;
 		} else
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			    "pax: invalid charset name");
 		return (ret);
 	}
 
 	/* Note: The "warn" return is just to inform the options
 	 * supervisor that we didn't handle it.  It will generate
 	 * a suitable error if no one used this option. */
 	return (ARCHIVE_WARN);
 }
 
 /*
  * Note: This code assumes that 'nanos' has the same sign as 'sec',
  * which implies that sec=-1, nanos=200000000 represents -1.2 seconds
  * and not -0.8 seconds.  This is a pretty pedantic point, as we're
  * unlikely to encounter many real files created before Jan 1, 1970,
  * much less ones with timestamps recorded to sub-second resolution.
  */
 static void
 add_pax_attr_time(struct archive_string *as, const char *key,
     int64_t sec, unsigned long nanos)
 {
 	int digit, i;
 	char *t;
 	/*
 	 * Note that each byte contributes fewer than 3 base-10
 	 * digits, so this will always be big enough.
 	 */
 	char tmp[1 + 3*sizeof(sec) + 1 + 3*sizeof(nanos)];
 
 	tmp[sizeof(tmp) - 1] = 0;
 	t = tmp + sizeof(tmp) - 1;
 
 	/* Skip trailing zeros in the fractional part. */
 	for (digit = 0, i = 10; i > 0 && digit == 0; i--) {
 		digit = nanos % 10;
 		nanos /= 10;
 	}
 
 	/* Only format the fraction if it's non-zero. */
 	if (i > 0) {
 		while (i > 0) {
 			*--t = "0123456789"[digit];
 			digit = nanos % 10;
 			nanos /= 10;
 			i--;
 		}
 		*--t = '.';
 	}
 	t = format_int(t, sec);
 
 	add_pax_attr(as, key, t);
 }
 
 static char *
 format_int(char *t, int64_t i)
 {
 	uint64_t ui;
 
 	if (i < 0) 
 		ui = (i == INT64_MIN) ? (uint64_t)(INT64_MAX) + 1 : (uint64_t)(-i);
 	else
 		ui = i;
 
 	do {
 		*--t = "0123456789"[ui % 10];
 	} while (ui /= 10);
 	if (i < 0)
 		*--t = '-';
 	return (t);
 }
 
 static void
 add_pax_attr_int(struct archive_string *as, const char *key, int64_t value)
 {
 	char tmp[1 + 3 * sizeof(value)];
 
 	tmp[sizeof(tmp) - 1] = 0;
 	add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value));
 }
 
 /*
  * Add a key/value attribute to the pax header.  This function handles
  * the length field and various other syntactic requirements.
  */
 static void
 add_pax_attr(struct archive_string *as, const char *key, const char *value)
 {
 	int digits, i, len, next_ten;
 	char tmp[1 + 3 * sizeof(int)];	/* < 3 base-10 digits per byte */
 
 	/*-
 	 * PAX attributes have the following layout:
 	 *     <len> <space> <key> <=> <value> <nl>
 	 */
 	len = 1 + (int)strlen(key) + 1 + (int)strlen(value) + 1;
 
 	/*
 	 * The <len> field includes the length of the <len> field, so
 	 * computing the correct length is tricky.  I start by
 	 * counting the number of base-10 digits in 'len' and
 	 * computing the next higher power of 10.
 	 */
 	next_ten = 1;
 	digits = 0;
 	i = len;
 	while (i > 0) {
 		i = i / 10;
 		digits++;
 		next_ten = next_ten * 10;
 	}
 	/*
 	 * For example, if string without the length field is 99
 	 * chars, then adding the 2 digit length "99" will force the
 	 * total length past 100, requiring an extra digit.  The next
 	 * statement adjusts for this effect.
 	 */
 	if (len + digits >= next_ten)
 		digits++;
 
 	/* Now, we have the right length so we can build the line. */
 	tmp[sizeof(tmp) - 1] = 0;	/* Null-terminate the work area. */
 	archive_strcat(as, format_int(tmp + sizeof(tmp) - 1, len + digits));
 	archive_strappend_char(as, ' ');
 	archive_strcat(as, key);
 	archive_strappend_char(as, '=');
 	archive_strcat(as, value);
 	archive_strappend_char(as, '\n');
 }
 
 static int
 archive_write_pax_header_xattrs(struct archive_write *a,
     struct pax *pax, struct archive_entry *entry)
 {
 	struct archive_string s;
 	int i = archive_entry_xattr_reset(entry);
 
 	while (i--) {
 		const char *name;
 		const void *value;
 		char *encoded_value;
 		char *url_encoded_name = NULL, *encoded_name = NULL;
 		size_t size;
 		int r;
 
 		archive_entry_xattr_next(entry, &name, &value, &size);
 		url_encoded_name = url_encode(name);
 		if (url_encoded_name != NULL) {
 			/* Convert narrow-character to UTF-8. */
 			r = archive_strcpy_l(&(pax->l_url_encoded_name),
 			    url_encoded_name, pax->sconv_utf8);
 			free(url_encoded_name); /* Done with this. */
 			if (r == 0)
 				encoded_name = pax->l_url_encoded_name.s;
 			else if (errno == ENOMEM) {
 				archive_set_error(&a->archive, ENOMEM,
 				    "Can't allocate memory for Linkname");
 				return (ARCHIVE_FATAL);
 			}
 		}
 
 		encoded_value = base64_encode((const char *)value, size);
 
 		if (encoded_name != NULL && encoded_value != NULL) {
 			archive_string_init(&s);
 			archive_strcpy(&s, "LIBARCHIVE.xattr.");
 			archive_strcat(&s, encoded_name);
 			add_pax_attr(&(pax->pax_header), s.s, encoded_value);
 			archive_string_free(&s);
 		}
 		free(encoded_value);
 	}
 	return (ARCHIVE_OK);
 }
 
 static int
 get_entry_hardlink(struct archive_write *a, struct archive_entry *entry,
     const char **name, size_t *length, struct archive_string_conv *sc)
 {
 	int r;
 	
 	r = archive_entry_hardlink_l(entry, name, length, sc);
 	if (r != 0) {
 		if (errno == ENOMEM) {
 			archive_set_error(&a->archive, ENOMEM,
 			    "Can't allocate memory for Linkname");
 			return (ARCHIVE_FATAL);
 		}
 		return (ARCHIVE_WARN);
 	}
 	return (ARCHIVE_OK);
 }
 
 static int
 get_entry_pathname(struct archive_write *a, struct archive_entry *entry,
     const char **name, size_t *length, struct archive_string_conv *sc)
 {
 	int r;
 
 	r = archive_entry_pathname_l(entry, name, length, sc);
 	if (r != 0) {
 		if (errno == ENOMEM) {
 			archive_set_error(&a->archive, ENOMEM,
 			    "Can't allocate memory for Pathname");
 			return (ARCHIVE_FATAL);
 		}
 		return (ARCHIVE_WARN);
 	}
 	return (ARCHIVE_OK);
 }
 
 static int
 get_entry_uname(struct archive_write *a, struct archive_entry *entry,
     const char **name, size_t *length, struct archive_string_conv *sc)
 {
 	int r;
 
 	r = archive_entry_uname_l(entry, name, length, sc);
 	if (r != 0) {
 		if (errno == ENOMEM) {
 			archive_set_error(&a->archive, ENOMEM,
 			    "Can't allocate memory for Uname");
 			return (ARCHIVE_FATAL);
 		}
 		return (ARCHIVE_WARN);
 	}
 	return (ARCHIVE_OK);
 }
 
 static int
 get_entry_gname(struct archive_write *a, struct archive_entry *entry,
     const char **name, size_t *length, struct archive_string_conv *sc)
 {
 	int r;
 
 	r = archive_entry_gname_l(entry, name, length, sc);
 	if (r != 0) {
 		if (errno == ENOMEM) {
 			archive_set_error(&a->archive, ENOMEM,
 			    "Can't allocate memory for Gname");
 			return (ARCHIVE_FATAL);
 		}
 		return (ARCHIVE_WARN);
 	}
 	return (ARCHIVE_OK);
 }
 
 static int
 get_entry_symlink(struct archive_write *a, struct archive_entry *entry,
     const char **name, size_t *length, struct archive_string_conv *sc)
 {
 	int r;
 
 	r = archive_entry_symlink_l(entry, name, length, sc);
 	if (r != 0) {
 		if (errno == ENOMEM) {
 			archive_set_error(&a->archive, ENOMEM,
 			    "Can't allocate memory for Linkname");
 			return (ARCHIVE_FATAL);
 		}
 		return (ARCHIVE_WARN);
 	}
 	return (ARCHIVE_OK);
 }
 
-/* Add ACL to pax header */
-static int
-add_pax_acl(struct archive_write *a,
-    struct archive_entry *entry, struct pax *pax, int flags)
-{
-	const char *p;
-	const char *attr;
-	int r;
-
-	if (flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS)
-		attr = "SCHILY.acl.access";
-	else if (flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT)
-		attr = "SCHILY.acl.default";
-	else if (flags & ARCHIVE_ENTRY_ACL_TYPE_NFS4)
-		attr = "SCHILY.acl.ace";
-	else
-		return(ARCHIVE_FATAL);
-
-	r = archive_entry_acl_text_l(entry, flags, &p, NULL,
-	    pax->sconv_utf8);
-	if (r != 0) {
-		if (errno == ENOMEM) {
-			archive_set_error(&a->archive, ENOMEM, "%s %s",
-			    "Can't allocate memory for ", attr);
-			return (ARCHIVE_FATAL);
-		}
-		archive_set_error(&a->archive,
-		    ARCHIVE_ERRNO_FILE_FORMAT, "%s %s %s",
-		    "Can't translate ", attr, " to UTF-8");
-		return(ARCHIVE_WARN);
-	} else if (p != NULL && *p != '\0') {
-		add_pax_attr(&(pax->pax_header),
-		    attr, p);
-	}
-	return(ARCHIVE_OK);
-}
-
 /*
  * TODO: Consider adding 'comment' and 'charset' fields to
  * archive_entry so that clients can specify them.  Also, consider
  * adding generic key/value tags so clients can add arbitrary
  * key/value data.
  *
  * TODO: Break up this 700-line function!!!!  Yowza!
  */
 static int
 archive_write_pax_header(struct archive_write *a,
     struct archive_entry *entry_original)
 {
 	struct archive_entry *entry_main;
 	const char *p;
 	const char *suffix;
 	int need_extension, r, ret;
-	int acl_access, acl_default, acl_nfs4;
 	int sparse_count;
 	uint64_t sparse_total, real_size;
 	struct pax *pax;
 	const char *hardlink;
 	const char *path = NULL, *linkpath = NULL;
 	const char *uname = NULL, *gname = NULL;
 	const void *mac_metadata;
 	size_t mac_metadata_size;
 	struct archive_string_conv *sconv;
 	size_t hardlink_length, path_length, linkpath_length;
 	size_t uname_length, gname_length;
 
 	char paxbuff[512];
 	char ustarbuff[512];
 	char ustar_entry_name[256];
 	char pax_entry_name[256];
 	char gnu_sparse_name[256];
 	struct archive_string entry_name;
 
 	ret = ARCHIVE_OK;
 	need_extension = 0;
 	pax = (struct pax *)a->format_data;
 
 	/* Sanity check. */
 	if (archive_entry_pathname(entry_original) == NULL) {
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			  "Can't record entry in tar file without pathname");
 		return (ARCHIVE_FAILED);
 	}
 
 	/*
 	 * Choose a header encoding.
 	 */
 	if (pax->opt_binary)
 		sconv = NULL;/* Binary mode. */
 	else {
 		/* Header encoding is UTF-8. */
 		if (pax->sconv_utf8 == NULL) {
 			/* Initialize the string conversion object
 			 * we must need */
 			pax->sconv_utf8 = archive_string_conversion_to_charset(
 			    &(a->archive), "UTF-8", 1);
 			if (pax->sconv_utf8 == NULL)
 				/* Couldn't allocate memory */
 				return (ARCHIVE_FAILED);
 		}
 		sconv = pax->sconv_utf8;
 	}
 
 	r = get_entry_hardlink(a, entry_original, &hardlink,
 	    &hardlink_length, sconv);
 	if (r == ARCHIVE_FATAL)
 		return (r);
 	else if (r != ARCHIVE_OK) {
 		r = get_entry_hardlink(a, entry_original, &hardlink,
 		    &hardlink_length, NULL);
 		if (r == ARCHIVE_FATAL)
 			return (r);
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
 		    "Can't translate linkname '%s' to %s", hardlink,
 		    archive_string_conversion_charset_name(sconv));
 		ret = ARCHIVE_WARN;
 		sconv = NULL;/* The header charset switches to binary mode. */
 	}
 
 	/* Make sure this is a type of entry that we can handle here */
 	if (hardlink == NULL) {
 		switch (archive_entry_filetype(entry_original)) {
 		case AE_IFBLK:
 		case AE_IFCHR:
 		case AE_IFIFO:
 		case AE_IFLNK:
 		case AE_IFREG:
 			break;
 		case AE_IFDIR:
 		{
 			/*
 			 * Ensure a trailing '/'.  Modify the original
 			 * entry so the client sees the change.
 			 */
 #if defined(_WIN32) && !defined(__CYGWIN__)
 			const wchar_t *wp;
 
 			wp = archive_entry_pathname_w(entry_original);
 			if (wp != NULL && wp[wcslen(wp) -1] != L'/') {
 				struct archive_wstring ws;
 
 				archive_string_init(&ws);
 				path_length = wcslen(wp);
 				if (archive_wstring_ensure(&ws,
 				    path_length + 2) == NULL) {
 					archive_set_error(&a->archive, ENOMEM,
 					    "Can't allocate pax data");
 					archive_wstring_free(&ws);
 					return(ARCHIVE_FATAL);
 				}
 				/* Should we keep '\' ? */
 				if (wp[path_length -1] == L'\\')
 					path_length--;
 				archive_wstrncpy(&ws, wp, path_length);
 				archive_wstrappend_wchar(&ws, L'/');
 				archive_entry_copy_pathname_w(
 				    entry_original, ws.s);
 				archive_wstring_free(&ws);
 				p = NULL;
 			} else
 #endif
 				p = archive_entry_pathname(entry_original);
 			/*
 			 * On Windows, this is a backup operation just in
 			 * case getting WCS failed. On POSIX, this is a
 			 * normal operation.
 			 */
 			if (p != NULL && p[strlen(p) - 1] != '/') {
 				struct archive_string as;
 
 				archive_string_init(&as);
 				path_length = strlen(p);
 				if (archive_string_ensure(&as,
 				    path_length + 2) == NULL) {
 					archive_set_error(&a->archive, ENOMEM,
 					    "Can't allocate pax data");
 					archive_string_free(&as);
 					return(ARCHIVE_FATAL);
 				}
 #if defined(_WIN32) && !defined(__CYGWIN__)
 				/* NOTE: This might break the pathname
 				 * if the current code page is CP932 and
 				 * the pathname includes a character '\'
 				 * as a part of its multibyte pathname. */
 				if (p[strlen(p) -1] == '\\')
 					path_length--;
 				else
 #endif
 				archive_strncpy(&as, p, path_length);
 				archive_strappend_char(&as, '/');
 				archive_entry_copy_pathname(
 				    entry_original, as.s);
 				archive_string_free(&as);
 			}
 			break;
 		}
 		case AE_IFSOCK:
 			archive_set_error(&a->archive,
 			    ARCHIVE_ERRNO_FILE_FORMAT,
 			    "tar format cannot archive socket");
 			return (ARCHIVE_FAILED);
 		default:
 			archive_set_error(&a->archive,
 			    ARCHIVE_ERRNO_FILE_FORMAT,
 			    "tar format cannot archive this (type=0%lo)",
 			    (unsigned long)
 			    archive_entry_filetype(entry_original));
 			return (ARCHIVE_FAILED);
 		}
 	}
 
 	/*
 	 * If Mac OS metadata blob is here, recurse to write that
 	 * as a separate entry.  This is really a pretty poor design:
 	 * In particular, it doubles the overhead for long filenames.
 	 * TODO: Help Apple folks design something better and figure
 	 * out how to transition from this legacy format.
 	 *
 	 * Note that this code is present on every platform; clients
 	 * on non-Mac are unlikely to ever provide this data, but
 	 * applications that copy entries from one archive to another
 	 * should not lose data just because the local filesystem
 	 * can't store it.
 	 */
 	mac_metadata =
 	    archive_entry_mac_metadata(entry_original, &mac_metadata_size);
 	if (mac_metadata != NULL) {
 		const char *oname;
 		char *name, *bname;
 		size_t name_length;
 		struct archive_entry *extra = archive_entry_new2(&a->archive);
 
 		oname = archive_entry_pathname(entry_original);
 		name_length = strlen(oname);
 		name = malloc(name_length + 3);
 		if (name == NULL || extra == NULL) {
 			/* XXX error message */
 			archive_entry_free(extra);
 			free(name);
 			return (ARCHIVE_FAILED);
 		}
 		strcpy(name, oname);
 		/* Find last '/'; strip trailing '/' characters */
 		bname = strrchr(name, '/');
 		while (bname != NULL && bname[1] == '\0') {
 			*bname = '\0';
 			bname = strrchr(name, '/');
 		}
 		if (bname == NULL) {
 			memmove(name + 2, name, name_length + 1);
 			memmove(name, "._", 2);
 		} else {
 			bname += 1;
 			memmove(bname + 2, bname, strlen(bname) + 1);
 			memmove(bname, "._", 2);
 		}
 		archive_entry_copy_pathname(extra, name);
 		free(name);
 
 		archive_entry_set_size(extra, mac_metadata_size);
 		archive_entry_set_filetype(extra, AE_IFREG);
 		archive_entry_set_perm(extra,
 		    archive_entry_perm(entry_original));
 		archive_entry_set_mtime(extra,
 		    archive_entry_mtime(entry_original),
 		    archive_entry_mtime_nsec(entry_original));
 		archive_entry_set_gid(extra,
 		    archive_entry_gid(entry_original));
 		archive_entry_set_gname(extra,
 		    archive_entry_gname(entry_original));
 		archive_entry_set_uid(extra,
 		    archive_entry_uid(entry_original));
 		archive_entry_set_uname(extra,
 		    archive_entry_uname(entry_original));
 
 		/* Recurse to write the special copyfile entry. */
 		r = archive_write_pax_header(a, extra);
 		archive_entry_free(extra);
 		if (r < ARCHIVE_WARN)
 			return (r);
 		if (r < ret)
 			ret = r;
 		r = (int)archive_write_pax_data(a, mac_metadata,
 		    mac_metadata_size);
 		if (r < ARCHIVE_WARN)
 			return (r);
 		if (r < ret)
 			ret = r;
 		r = archive_write_pax_finish_entry(a);
 		if (r < ARCHIVE_WARN)
 			return (r);
 		if (r < ret)
 			ret = r;
 	}
 
 	/* Copy entry so we can modify it as needed. */
 #if defined(_WIN32) && !defined(__CYGWIN__)
 	/* Make sure the path separators in pahtname, hardlink and symlink
 	 * are all slash '/', not the Windows path separator '\'. */
 	entry_main = __la_win_entry_in_posix_pathseparator(entry_original);
 	if (entry_main == entry_original)
 		entry_main = archive_entry_clone(entry_original);
 #else
 	entry_main = archive_entry_clone(entry_original);
 #endif
 	if (entry_main == NULL) {
 		archive_set_error(&a->archive, ENOMEM,
 		    "Can't allocate pax data");
 		return(ARCHIVE_FATAL);
 	}
 	archive_string_empty(&(pax->pax_header)); /* Blank our work area. */
 	archive_string_empty(&(pax->sparse_map));
 	sparse_total = 0;
 	sparse_list_clear(pax);
 
 	if (hardlink == NULL &&
 	    archive_entry_filetype(entry_main) == AE_IFREG)
 		sparse_count = archive_entry_sparse_reset(entry_main);
 	else
 		sparse_count = 0;
 	if (sparse_count) {
 		int64_t offset, length, last_offset = 0;
 		/* Get the last entry of sparse block. */
 		while (archive_entry_sparse_next(
 		    entry_main, &offset, &length) == ARCHIVE_OK)
 			last_offset = offset + length;
 
 		/* If the last sparse block does not reach the end of file,
 		 * We have to add a empty sparse block as the last entry to
 		 * manage storing file data. */
 		if (last_offset < archive_entry_size(entry_main))
 			archive_entry_sparse_add_entry(entry_main,
 			    archive_entry_size(entry_main), 0);
 		sparse_count = archive_entry_sparse_reset(entry_main);
 	}
 
 	/*
 	 * First, check the name fields and see if any of them
 	 * require binary coding.  If any of them does, then all of
 	 * them do.
 	 */
 	r = get_entry_pathname(a, entry_main, &path, &path_length, sconv);
 	if (r == ARCHIVE_FATAL)
 		return (r);
 	else if (r != ARCHIVE_OK) {
 		r = get_entry_pathname(a, entry_main, &path,
 		    &path_length, NULL);
 		if (r == ARCHIVE_FATAL)
 			return (r);
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
 		    "Can't translate pathname '%s' to %s", path,
 		    archive_string_conversion_charset_name(sconv));
 		ret = ARCHIVE_WARN;
 		sconv = NULL;/* The header charset switches to binary mode. */
 	}
 	r = get_entry_uname(a, entry_main, &uname, &uname_length, sconv);
 	if (r == ARCHIVE_FATAL)
 		return (r);
 	else if (r != ARCHIVE_OK) {
 		r = get_entry_uname(a, entry_main, &uname, &uname_length, NULL);
 		if (r == ARCHIVE_FATAL)
 			return (r);
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
 		    "Can't translate uname '%s' to %s", uname,
 		    archive_string_conversion_charset_name(sconv));
 		ret = ARCHIVE_WARN;
 		sconv = NULL;/* The header charset switches to binary mode. */
 	}
 	r = get_entry_gname(a, entry_main, &gname, &gname_length, sconv);
 	if (r == ARCHIVE_FATAL)
 		return (r);
 	else if (r != ARCHIVE_OK) {
 		r = get_entry_gname(a, entry_main, &gname, &gname_length, NULL);
 		if (r == ARCHIVE_FATAL)
 			return (r);
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
 		    "Can't translate gname '%s' to %s", gname,
 		    archive_string_conversion_charset_name(sconv));
 		ret = ARCHIVE_WARN;
 		sconv = NULL;/* The header charset switches to binary mode. */
 	}
 	linkpath = hardlink;
 	linkpath_length = hardlink_length;
 	if (linkpath == NULL) {
 		r = get_entry_symlink(a, entry_main, &linkpath,
 		    &linkpath_length, sconv);
 		if (r == ARCHIVE_FATAL)
 			return (r);
 		else if (r != ARCHIVE_OK) {
 			r = get_entry_symlink(a, entry_main, &linkpath,
 			    &linkpath_length, NULL);
 			if (r == ARCHIVE_FATAL)
 				return (r);
 			archive_set_error(&a->archive,
 			    ARCHIVE_ERRNO_FILE_FORMAT,
 			    "Can't translate linkname '%s' to %s", linkpath,
 			    archive_string_conversion_charset_name(sconv));
 			ret = ARCHIVE_WARN;
 			sconv = NULL;
 		}
 	}
 
 	/* If any string conversions failed, get all attributes
 	 * in binary-mode. */
 	if (sconv == NULL && !pax->opt_binary) {
 		if (hardlink != NULL) {
 			r = get_entry_hardlink(a, entry_main, &hardlink,
 			    &hardlink_length, NULL);
 			if (r == ARCHIVE_FATAL)
 				return (r);
 			linkpath = hardlink;
 			linkpath_length = hardlink_length;
 		}
 		r = get_entry_pathname(a, entry_main, &path,
 		    &path_length, NULL);
 		if (r == ARCHIVE_FATAL)
 			return (r);
 		r = get_entry_uname(a, entry_main, &uname, &uname_length, NULL);
 		if (r == ARCHIVE_FATAL)
 			return (r);
 		r = get_entry_gname(a, entry_main, &gname, &gname_length, NULL);
 		if (r == ARCHIVE_FATAL)
 			return (r);
 	}
 
 	/* Store the header encoding first, to be nice to readers. */
 	if (sconv == NULL)
 		add_pax_attr(&(pax->pax_header), "hdrcharset", "BINARY");
 
 
 	/*
 	 * If name is too long, or has non-ASCII characters, add
 	 * 'path' to pax extended attrs.  (Note that an unconvertible
 	 * name must have non-ASCII characters.)
 	 */
 	if (has_non_ASCII(path)) {
 		/* We have non-ASCII characters. */
 		add_pax_attr(&(pax->pax_header), "path", path);
 		archive_entry_set_pathname(entry_main,
 		    build_ustar_entry_name(ustar_entry_name,
 			path, path_length, NULL));
 		need_extension = 1;
 	} else {
 		/* We have an all-ASCII path; we'd like to just store
 		 * it in the ustar header if it will fit.  Yes, this
 		 * duplicates some of the logic in
 		 * archive_write_set_format_ustar.c
 		 */
 		if (path_length <= 100) {
 			/* Fits in the old 100-char tar name field. */
 		} else {
 			/* Find largest suffix that will fit. */
 			/* Note: strlen() > 100, so strlen() - 100 - 1 >= 0 */
 			suffix = strchr(path + path_length - 100 - 1, '/');
 			/* Don't attempt an empty prefix. */
 			if (suffix == path)
 				suffix = strchr(suffix + 1, '/');
 			/* We can put it in the ustar header if it's
 			 * all ASCII and it's either <= 100 characters
 			 * or can be split at a '/' into a prefix <=
 			 * 155 chars and a suffix <= 100 chars.  (Note
 			 * the strchr() above will return NULL exactly
 			 * when the path can't be split.)
 			 */
 			if (suffix == NULL       /* Suffix > 100 chars. */
 			    || suffix[1] == '\0'    /* empty suffix */
 			    || suffix - path > 155)  /* Prefix > 155 chars */
 			{
 				add_pax_attr(&(pax->pax_header), "path", path);
 				archive_entry_set_pathname(entry_main,
 				    build_ustar_entry_name(ustar_entry_name,
 					path, path_length, NULL));
 				need_extension = 1;
 			}
 		}
 	}
 
 	if (linkpath != NULL) {
 		/* If link name is too long or has non-ASCII characters, add
 		 * 'linkpath' to pax extended attrs. */
 		if (linkpath_length > 100 || has_non_ASCII(linkpath)) {
 			add_pax_attr(&(pax->pax_header), "linkpath", linkpath);
 			if (linkpath_length > 100) {
 				if (hardlink != NULL)
 					archive_entry_set_hardlink(entry_main,
 					    "././@LongHardLink");
 				else
 					archive_entry_set_symlink(entry_main,
 					    "././@LongSymLink");
 			}
 			need_extension = 1;
 		}
 	}
 	/* Save a pathname since it will be renamed if `entry_main` has
 	 * sparse blocks. */
 	archive_string_init(&entry_name);
 	archive_strcpy(&entry_name, archive_entry_pathname(entry_main));
 
 	/* If file size is too large, add 'size' to pax extended attrs. */
 	if (archive_entry_size(entry_main) >= (((int64_t)1) << 33)) {
 		add_pax_attr_int(&(pax->pax_header), "size",
 		    archive_entry_size(entry_main));
 		need_extension = 1;
 	}
 
 	/* If numeric GID is too large, add 'gid' to pax extended attrs. */
 	if ((unsigned int)archive_entry_gid(entry_main) >= (1 << 18)) {
 		add_pax_attr_int(&(pax->pax_header), "gid",
 		    archive_entry_gid(entry_main));
 		need_extension = 1;
 	}
 
 	/* If group name is too large or has non-ASCII characters, add
 	 * 'gname' to pax extended attrs. */
 	if (gname != NULL) {
 		if (gname_length > 31 || has_non_ASCII(gname)) {
 			add_pax_attr(&(pax->pax_header), "gname", gname);
 			need_extension = 1;
 		}
 	}
 
 	/* If numeric UID is too large, add 'uid' to pax extended attrs. */
 	if ((unsigned int)archive_entry_uid(entry_main) >= (1 << 18)) {
 		add_pax_attr_int(&(pax->pax_header), "uid",
 		    archive_entry_uid(entry_main));
 		need_extension = 1;
 	}
 
 	/* Add 'uname' to pax extended attrs if necessary. */
 	if (uname != NULL) {
 		if (uname_length > 31 || has_non_ASCII(uname)) {
 			add_pax_attr(&(pax->pax_header), "uname", uname);
 			need_extension = 1;
 		}
 	}
 
 	/*
 	 * POSIX/SUSv3 doesn't provide a standard key for large device
 	 * numbers.  I use the same keys here that Joerg Schilling
 	 * used for 'star.'  (Which, somewhat confusingly, are called
 	 * "devXXX" even though they code "rdev" values.)  No doubt,
 	 * other implementations use other keys.  Note that there's no
 	 * reason we can't write the same information into a number of
 	 * different keys.
 	 *
 	 * Of course, this is only needed for block or char device entries.
 	 */
 	if (archive_entry_filetype(entry_main) == AE_IFBLK
 	    || archive_entry_filetype(entry_main) == AE_IFCHR) {
 		/*
 		 * If rdevmajor is too large, add 'SCHILY.devmajor' to
 		 * extended attributes.
 		 */
 		int rdevmajor, rdevminor;
 		rdevmajor = archive_entry_rdevmajor(entry_main);
 		rdevminor = archive_entry_rdevminor(entry_main);
 		if (rdevmajor >= (1 << 18)) {
 			add_pax_attr_int(&(pax->pax_header), "SCHILY.devmajor",
 			    rdevmajor);
 			/*
 			 * Non-strict formatting below means we don't
 			 * have to truncate here.  Not truncating improves
 			 * the chance that some more modern tar archivers
 			 * (such as GNU tar 1.13) can restore the full
 			 * value even if they don't understand the pax
 			 * extended attributes.  See my rant below about
 			 * file size fields for additional details.
 			 */
 			/* archive_entry_set_rdevmajor(entry_main,
 			   rdevmajor & ((1 << 18) - 1)); */
 			need_extension = 1;
 		}
 
 		/*
 		 * If devminor is too large, add 'SCHILY.devminor' to
 		 * extended attributes.
 		 */
 		if (rdevminor >= (1 << 18)) {
 			add_pax_attr_int(&(pax->pax_header), "SCHILY.devminor",
 			    rdevminor);
 			/* Truncation is not necessary here, either. */
 			/* archive_entry_set_rdevminor(entry_main,
 			   rdevminor & ((1 << 18) - 1)); */
 			need_extension = 1;
 		}
 	}
 
 	/*
 	 * Technically, the mtime field in the ustar header can
 	 * support 33 bits, but many platforms use signed 32-bit time
 	 * values.  The cutoff of 0x7fffffff here is a compromise.
 	 * Yes, this check is duplicated just below; this helps to
 	 * avoid writing an mtime attribute just to handle a
 	 * high-resolution timestamp in "restricted pax" mode.
 	 */
 	if (!need_extension &&
 	    ((archive_entry_mtime(entry_main) < 0)
 		|| (archive_entry_mtime(entry_main) >= 0x7fffffff)))
 		need_extension = 1;
 
 	/* I use a star-compatible file flag attribute. */
 	p = archive_entry_fflags_text(entry_main);
 	if (!need_extension && p != NULL  &&  *p != '\0')
 		need_extension = 1;
 
+	/* If there are non-trivial ACL entries, we need an extension. */
+	if (!need_extension && archive_entry_acl_count(entry_original,
+		ARCHIVE_ENTRY_ACL_TYPE_ACCESS) > 0)
+		need_extension = 1;
+
+	/* If there are non-trivial ACL entries, we need an extension. */
+	if (!need_extension && archive_entry_acl_count(entry_original,
+		ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) > 0)
+		need_extension = 1;
+
 	/* If there are extended attributes, we need an extension */
 	if (!need_extension && archive_entry_xattr_count(entry_original) > 0)
 		need_extension = 1;
 
 	/* If there are sparse info, we need an extension */
 	if (!need_extension && sparse_count > 0)
 		need_extension = 1;
 
-	acl_access = archive_entry_acl_count(entry_original,
-	    ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
-	acl_default = archive_entry_acl_count(entry_original,
-	    ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
-	acl_nfs4 = archive_entry_acl_count(entry_original,
-	    ARCHIVE_ENTRY_ACL_TYPE_NFS4);
-
-	/* If there are any ACL entries, we need an extension */
-	if (!need_extension && (acl_access + acl_default + acl_nfs4) > 0)
-		need_extension = 1;
-
 	/*
 	 * Libarchive used to include these in extended headers for
 	 * restricted pax format, but that confused people who
 	 * expected ustar-like time semantics.  So now we only include
 	 * them in full pax format.
 	 */
 	if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_RESTRICTED) {
 		if (archive_entry_ctime(entry_main) != 0  ||
 		    archive_entry_ctime_nsec(entry_main) != 0)
 			add_pax_attr_time(&(pax->pax_header), "ctime",
 			    archive_entry_ctime(entry_main),
 			    archive_entry_ctime_nsec(entry_main));
 
 		if (archive_entry_atime(entry_main) != 0 ||
 		    archive_entry_atime_nsec(entry_main) != 0)
 			add_pax_attr_time(&(pax->pax_header), "atime",
 			    archive_entry_atime(entry_main),
 			    archive_entry_atime_nsec(entry_main));
 
 		/* Store birth/creationtime only if it's earlier than mtime */
 		if (archive_entry_birthtime_is_set(entry_main) &&
 		    archive_entry_birthtime(entry_main)
 		    < archive_entry_mtime(entry_main))
 			add_pax_attr_time(&(pax->pax_header),
 			    "LIBARCHIVE.creationtime",
 			    archive_entry_birthtime(entry_main),
 			    archive_entry_birthtime_nsec(entry_main));
 	}
 
 	/*
 	 * The following items are handled differently in "pax
 	 * restricted" format.  In particular, in "pax restricted"
 	 * format they won't be added unless need_extension is
 	 * already set (we're already generating an extended header, so
 	 * may as well include these).
 	 */
 	if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_RESTRICTED ||
 	    need_extension) {
 		if (archive_entry_mtime(entry_main) < 0  ||
 		    archive_entry_mtime(entry_main) >= 0x7fffffff  ||
 		    archive_entry_mtime_nsec(entry_main) != 0)
 			add_pax_attr_time(&(pax->pax_header), "mtime",
 			    archive_entry_mtime(entry_main),
 			    archive_entry_mtime_nsec(entry_main));
 
 		/* I use a star-compatible file flag attribute. */
 		p = archive_entry_fflags_text(entry_main);
 		if (p != NULL  &&  *p != '\0')
 			add_pax_attr(&(pax->pax_header), "SCHILY.fflags", p);
 
 		/* I use star-compatible ACL attributes. */
-		if (acl_access > 0) {
-			ret = add_pax_acl(a, entry_original, pax,
-			    ARCHIVE_ENTRY_ACL_TYPE_ACCESS |
-			    ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID);
-			if (ret == ARCHIVE_FATAL)
+		r = archive_entry_acl_text_l(entry_original,
+		    ARCHIVE_ENTRY_ACL_TYPE_ACCESS |
+		    ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID,
+		    &p, NULL, pax->sconv_utf8);
+		if (r != 0) {
+			if (errno == ENOMEM) {
+				archive_set_error(&a->archive, ENOMEM,
+				    "Can't allocate memory for "
+				    "ACL.access");
 				return (ARCHIVE_FATAL);
+			}
+			archive_set_error(&a->archive,
+			    ARCHIVE_ERRNO_FILE_FORMAT,
+			    "Can't translate ACL.access to UTF-8");
+			ret = ARCHIVE_WARN;
+		} else if (p != NULL && *p != '\0') {
+			add_pax_attr(&(pax->pax_header),
+			    "SCHILY.acl.access", p);
 		}
-		if (acl_default > 0) {
-			ret = add_pax_acl(a, entry_original, pax,
-			    ARCHIVE_ENTRY_ACL_TYPE_DEFAULT |
-			    ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID);
-			if (ret == ARCHIVE_FATAL)
+		r = archive_entry_acl_text_l(entry_original,
+		    ARCHIVE_ENTRY_ACL_TYPE_DEFAULT |
+		    ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID,
+		    &p, NULL, pax->sconv_utf8);
+		if (r != 0) {
+			if (errno == ENOMEM) {
+				archive_set_error(&a->archive, ENOMEM,
+				    "Can't allocate memory for "
+				    "ACL.default");
 				return (ARCHIVE_FATAL);
-		}
-		if (acl_nfs4 > 0) {
-			ret = add_pax_acl(a, entry_original, pax,
-			    ARCHIVE_ENTRY_ACL_TYPE_NFS4 |
-			    ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID);
-			if (ret == ARCHIVE_FATAL)
-				return (ARCHIVE_FATAL);
+			}
+			archive_set_error(&a->archive,
+			    ARCHIVE_ERRNO_FILE_FORMAT,
+			    "Can't translate ACL.default to UTF-8");
+			ret = ARCHIVE_WARN;
+		} else if (p != NULL && *p != '\0') {
+			add_pax_attr(&(pax->pax_header),
+			    "SCHILY.acl.default", p);
 		}
 
 		/* We use GNU-tar-compatible sparse attributes. */
 		if (sparse_count > 0) {
 			int64_t soffset, slength;
 
 			add_pax_attr_int(&(pax->pax_header),
 			    "GNU.sparse.major", 1);
 			add_pax_attr_int(&(pax->pax_header),
 			    "GNU.sparse.minor", 0);
 			add_pax_attr(&(pax->pax_header),
 			    "GNU.sparse.name", entry_name.s);
 			add_pax_attr_int(&(pax->pax_header),
 			    "GNU.sparse.realsize",
 			    archive_entry_size(entry_main));
 
 			/* Rename the file name which will be used for
 			 * ustar header to a special name, which GNU
 			 * PAX Format 1.0 requires */
 			archive_entry_set_pathname(entry_main,
 			    build_gnu_sparse_name(gnu_sparse_name,
 			        entry_name.s));
 
 			/*
 			 * - Make a sparse map, which will precede a file data.
 			 * - Get the total size of available data of sparse.
 			 */
 			archive_string_sprintf(&(pax->sparse_map), "%d\n",
 			    sparse_count);
 			while (archive_entry_sparse_next(entry_main,
 			    &soffset, &slength) == ARCHIVE_OK) {
 				archive_string_sprintf(&(pax->sparse_map),
 				    "%jd\n%jd\n",
 				    (intmax_t)soffset,
 				    (intmax_t)slength);
 				sparse_total += slength;
 				if (sparse_list_add(pax, soffset, slength)
 				    != ARCHIVE_OK) {
 					archive_set_error(&a->archive,
 					    ENOMEM,
 					    "Can't allocate memory");
 					archive_entry_free(entry_main);
 					archive_string_free(&entry_name);
 					return (ARCHIVE_FATAL);
 				}
 			}
 		}
 
 		/* Store extended attributes */
 		if (archive_write_pax_header_xattrs(a, pax, entry_original)
 		    == ARCHIVE_FATAL) {
 			archive_entry_free(entry_main);
 			archive_string_free(&entry_name);
 			return (ARCHIVE_FATAL);
 		}
 	}
 
 	/* Only regular files have data. */
 	if (archive_entry_filetype(entry_main) != AE_IFREG)
 		archive_entry_set_size(entry_main, 0);
 
 	/*
 	 * Pax-restricted does not store data for hardlinks, in order
 	 * to improve compatibility with ustar.
 	 */
 	if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE &&
 	    hardlink != NULL)
 		archive_entry_set_size(entry_main, 0);
 
 	/*
 	 * XXX Full pax interchange format does permit a hardlink
 	 * entry to have data associated with it.  I'm not supporting
 	 * that here because the client expects me to tell them whether
 	 * or not this format expects data for hardlinks.  If I
 	 * don't check here, then every pax archive will end up with
 	 * duplicated data for hardlinks.  Someday, there may be
 	 * need to select this behavior, in which case the following
 	 * will need to be revisited. XXX
 	 */
 	if (hardlink != NULL)
 		archive_entry_set_size(entry_main, 0);
 
 	/* Save a real file size. */
 	real_size = archive_entry_size(entry_main);
 	/*
 	 * Overwrite a file size by the total size of sparse blocks and
 	 * the size of sparse map info. That file size is the length of
 	 * the data, which we will exactly store into an archive file.
 	 */
 	if (archive_strlen(&(pax->sparse_map))) {
 		size_t mapsize = archive_strlen(&(pax->sparse_map));
 		pax->sparse_map_padding = 0x1ff & (-(ssize_t)mapsize);
 		archive_entry_set_size(entry_main,
 		    mapsize + pax->sparse_map_padding + sparse_total);
 	}
 
 	/* Format 'ustar' header for main entry.
 	 *
 	 * The trouble with file size: If the reader can't understand
 	 * the file size, they may not be able to locate the next
 	 * entry and the rest of the archive is toast.  Pax-compliant
 	 * readers are supposed to ignore the file size in the main
 	 * header, so the question becomes how to maximize portability
 	 * for readers that don't support pax attribute extensions.
 	 * For maximum compatibility, I permit numeric extensions in
 	 * the main header so that the file size stored will always be
 	 * correct, even if it's in a format that only some
 	 * implementations understand.  The technique used here is:
 	 *
 	 *  a) If possible, follow the standard exactly.  This handles
 	 *  files up to 8 gigabytes minus 1.
 	 *
 	 *  b) If that fails, try octal but omit the field terminator.
 	 *  That handles files up to 64 gigabytes minus 1.
 	 *
 	 *  c) Otherwise, use base-256 extensions.  That handles files
 	 *  up to 2^63 in this implementation, with the potential to
 	 *  go up to 2^94.  That should hold us for a while. ;-)
 	 *
 	 * The non-strict formatter uses similar logic for other
 	 * numeric fields, though they're less critical.
 	 */
 	if (__archive_write_format_header_ustar(a, ustarbuff, entry_main, -1, 0,
 	    NULL) == ARCHIVE_FATAL)
 		return (ARCHIVE_FATAL);
 
 	/* If we built any extended attributes, write that entry first. */
 	if (archive_strlen(&(pax->pax_header)) > 0) {
 		struct archive_entry *pax_attr_entry;
 		time_t s;
 		int64_t uid, gid;
 		int mode;
 
 		pax_attr_entry = archive_entry_new2(&a->archive);
 		p = entry_name.s;
 		archive_entry_set_pathname(pax_attr_entry,
 		    build_pax_attribute_name(pax_entry_name, p));
 		archive_entry_set_size(pax_attr_entry,
 		    archive_strlen(&(pax->pax_header)));
 		/* Copy uid/gid (but clip to ustar limits). */
 		uid = archive_entry_uid(entry_main);
 		if (uid >= 1 << 18)
 			uid = (1 << 18) - 1;
 		archive_entry_set_uid(pax_attr_entry, uid);
 		gid = archive_entry_gid(entry_main);
 		if (gid >= 1 << 18)
 			gid = (1 << 18) - 1;
 		archive_entry_set_gid(pax_attr_entry, gid);
 		/* Copy mode over (but not setuid/setgid bits) */
 		mode = archive_entry_mode(entry_main);
 #ifdef S_ISUID
 		mode &= ~S_ISUID;
 #endif
 #ifdef S_ISGID
 		mode &= ~S_ISGID;
 #endif
 #ifdef S_ISVTX
 		mode &= ~S_ISVTX;
 #endif
 		archive_entry_set_mode(pax_attr_entry, mode);
 
 		/* Copy uname/gname. */
 		archive_entry_set_uname(pax_attr_entry,
 		    archive_entry_uname(entry_main));
 		archive_entry_set_gname(pax_attr_entry,
 		    archive_entry_gname(entry_main));
 
 		/* Copy mtime, but clip to ustar limits. */
 		s = archive_entry_mtime(entry_main);
 		if (s < 0) { s = 0; }
 		if (s >= 0x7fffffff) { s = 0x7fffffff; }
 		archive_entry_set_mtime(pax_attr_entry, s, 0);
 
 		/* Standard ustar doesn't support atime. */
 		archive_entry_set_atime(pax_attr_entry, 0, 0);
 
 		/* Standard ustar doesn't support ctime. */
 		archive_entry_set_ctime(pax_attr_entry, 0, 0);
 
 		r = __archive_write_format_header_ustar(a, paxbuff,
 		    pax_attr_entry, 'x', 1, NULL);
 
 		archive_entry_free(pax_attr_entry);
 
 		/* Note that the 'x' header shouldn't ever fail to format */
 		if (r < ARCHIVE_WARN) {
 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 			    "archive_write_pax_header: "
 			    "'x' header failed?!  This can't happen.\n");
 			return (ARCHIVE_FATAL);
 		} else if (r < ret)
 			ret = r;
 		r = __archive_write_output(a, paxbuff, 512);
 		if (r != ARCHIVE_OK) {
 			sparse_list_clear(pax);
 			pax->entry_bytes_remaining = 0;
 			pax->entry_padding = 0;
 			return (ARCHIVE_FATAL);
 		}
 
 		pax->entry_bytes_remaining = archive_strlen(&(pax->pax_header));
 		pax->entry_padding =
 		    0x1ff & (-(int64_t)pax->entry_bytes_remaining);
 
 		r = __archive_write_output(a, pax->pax_header.s,
 		    archive_strlen(&(pax->pax_header)));
 		if (r != ARCHIVE_OK) {
 			/* If a write fails, we're pretty much toast. */
 			return (ARCHIVE_FATAL);
 		}
 		/* Pad out the end of the entry. */
 		r = __archive_write_nulls(a, (size_t)pax->entry_padding);
 		if (r != ARCHIVE_OK) {
 			/* If a write fails, we're pretty much toast. */
 			return (ARCHIVE_FATAL);
 		}
 		pax->entry_bytes_remaining = pax->entry_padding = 0;
 	}
 
 	/* Write the header for main entry. */
 	r = __archive_write_output(a, ustarbuff, 512);
 	if (r != ARCHIVE_OK)
 		return (r);
 
 	/*
 	 * Inform the client of the on-disk size we're using, so
 	 * they can avoid unnecessarily writing a body for something
 	 * that we're just going to ignore.
 	 */
 	archive_entry_set_size(entry_original, real_size);
 	if (pax->sparse_list == NULL && real_size > 0) {
 		/* This is not a sparse file but we handle its data as
 		 * a sparse block. */
 		sparse_list_add(pax, 0, real_size);
 		sparse_total = real_size;
 	}
 	pax->entry_padding = 0x1ff & (-(int64_t)sparse_total);
 	archive_entry_free(entry_main);
 	archive_string_free(&entry_name);
 
 	return (ret);
 }
 
 /*
  * We need a valid name for the regular 'ustar' entry.  This routine
  * tries to hack something more-or-less reasonable.
  *
  * The approach here tries to preserve leading dir names.  We do so by
  * working with four sections:
  *   1) "prefix" directory names,
  *   2) "suffix" directory names,
  *   3) inserted dir name (optional),
  *   4) filename.
  *
  * These sections must satisfy the following requirements:
  *   * Parts 1 & 2 together form an initial portion of the dir name.
  *   * Part 3 is specified by the caller.  (It should not contain a leading
  *     or trailing '/'.)
  *   * Part 4 forms an initial portion of the base filename.
  *   * The filename must be <= 99 chars to fit the ustar 'name' field.
  *   * Parts 2, 3, 4 together must be <= 99 chars to fit the ustar 'name' fld.
  *   * Part 1 must be <= 155 chars to fit the ustar 'prefix' field.
  *   * If the original name ends in a '/', the new name must also end in a '/'
  *   * Trailing '/.' sequences may be stripped.
  *
  * Note: Recall that the ustar format does not store the '/' separating
  * parts 1 & 2, but does store the '/' separating parts 2 & 3.
  */
 static char *
 build_ustar_entry_name(char *dest, const char *src, size_t src_length,
     const char *insert)
 {
 	const char *prefix, *prefix_end;
 	const char *suffix, *suffix_end;
 	const char *filename, *filename_end;
 	char *p;
 	int need_slash = 0; /* Was there a trailing slash? */
 	size_t suffix_length = 99;
 	size_t insert_length;
 
 	/* Length of additional dir element to be added. */
 	if (insert == NULL)
 		insert_length = 0;
 	else
 		/* +2 here allows for '/' before and after the insert. */
 		insert_length = strlen(insert) + 2;
 
 	/* Step 0: Quick bailout in a common case. */
 	if (src_length < 100 && insert == NULL) {
 		strncpy(dest, src, src_length);
 		dest[src_length] = '\0';
 		return (dest);
 	}
 
 	/* Step 1: Locate filename and enforce the length restriction. */
 	filename_end = src + src_length;
 	/* Remove trailing '/' chars and '/.' pairs. */
 	for (;;) {
 		if (filename_end > src && filename_end[-1] == '/') {
 			filename_end --;
 			need_slash = 1; /* Remember to restore trailing '/'. */
 			continue;
 		}
 		if (filename_end > src + 1 && filename_end[-1] == '.'
 		    && filename_end[-2] == '/') {
 			filename_end -= 2;
 			need_slash = 1; /* "foo/." will become "foo/" */
 			continue;
 		}
 		break;
 	}
 	if (need_slash)
 		suffix_length--;
 	/* Find start of filename. */
 	filename = filename_end - 1;
 	while ((filename > src) && (*filename != '/'))
 		filename --;
 	if ((*filename == '/') && (filename < filename_end - 1))
 		filename ++;
 	/* Adjust filename_end so that filename + insert fits in 99 chars. */
 	suffix_length -= insert_length;
 	if (filename_end > filename + suffix_length)
 		filename_end = filename + suffix_length;
 	/* Calculate max size for "suffix" section (#3 above). */
 	suffix_length -= filename_end - filename;
 
 	/* Step 2: Locate the "prefix" section of the dirname, including
 	 * trailing '/'. */
 	prefix = src;
 	prefix_end = prefix + 155;
 	if (prefix_end > filename)
 		prefix_end = filename;
 	while (prefix_end > prefix && *prefix_end != '/')
 		prefix_end--;
 	if ((prefix_end < filename) && (*prefix_end == '/'))
 		prefix_end++;
 
 	/* Step 3: Locate the "suffix" section of the dirname,
 	 * including trailing '/'. */
 	suffix = prefix_end;
 	suffix_end = suffix + suffix_length; /* Enforce limit. */
 	if (suffix_end > filename)
 		suffix_end = filename;
 	if (suffix_end < suffix)
 		suffix_end = suffix;
 	while (suffix_end > suffix && *suffix_end != '/')
 		suffix_end--;
 	if ((suffix_end < filename) && (*suffix_end == '/'))
 		suffix_end++;
 
 	/* Step 4: Build the new name. */
 	/* The OpenBSD strlcpy function is safer, but less portable. */
 	/* Rather than maintain two versions, just use the strncpy version. */
 	p = dest;
 	if (prefix_end > prefix) {
 		strncpy(p, prefix, prefix_end - prefix);
 		p += prefix_end - prefix;
 	}
 	if (suffix_end > suffix) {
 		strncpy(p, suffix, suffix_end - suffix);
 		p += suffix_end - suffix;
 	}
 	if (insert != NULL) {
 		/* Note: assume insert does not have leading or trailing '/' */
 		strcpy(p, insert);
 		p += strlen(insert);
 		*p++ = '/';
 	}
 	strncpy(p, filename, filename_end - filename);
 	p += filename_end - filename;
 	if (need_slash)
 		*p++ = '/';
 	*p = '\0';
 
 	return (dest);
 }
 
 /*
  * The ustar header for the pax extended attributes must have a
  * reasonable name:  SUSv3 requires 'dirname'/PaxHeader.'pid'/'filename'
  * where 'pid' is the PID of the archiving process.  Unfortunately,
  * that makes testing a pain since the output varies for each run,
  * so I'm sticking with the simpler 'dirname'/PaxHeader/'filename'
  * for now.  (Someday, I'll make this settable.  Then I can use the
  * SUS recommendation as default and test harnesses can override it
  * to get predictable results.)
  *
  * Joerg Schilling has argued that this is unnecessary because, in
  * practice, if the pax extended attributes get extracted as regular
  * files, no one is going to bother reading those attributes to
  * manually restore them.  Based on this, 'star' uses
  * /tmp/PaxHeader/'basename' as the ustar header name.  This is a
  * tempting argument, in part because it's simpler than the SUSv3
  * recommendation, but I'm not entirely convinced.  I'm also
  * uncomfortable with the fact that "/tmp" is a Unix-ism.
  *
  * The following routine leverages build_ustar_entry_name() above and
  * so is simpler than you might think.  It just needs to provide the
  * additional path element and handle a few pathological cases).
  */
 static char *
 build_pax_attribute_name(char *dest, const char *src)
 {
 	char buff[64];
 	const char *p;
 
 	/* Handle the null filename case. */
 	if (src == NULL || *src == '\0') {
 		strcpy(dest, "PaxHeader/blank");
 		return (dest);
 	}
 
 	/* Prune final '/' and other unwanted final elements. */
 	p = src + strlen(src);
 	for (;;) {
 		/* Ends in "/", remove the '/' */
 		if (p > src && p[-1] == '/') {
 			--p;
 			continue;
 		}
 		/* Ends in "/.", remove the '.' */
 		if (p > src + 1 && p[-1] == '.'
 		    && p[-2] == '/') {
 			--p;
 			continue;
 		}
 		break;
 	}
 
 	/* Pathological case: After above, there was nothing left.
 	 * This includes "/." "/./." "/.//./." etc. */
 	if (p == src) {
 		strcpy(dest, "/PaxHeader/rootdir");
 		return (dest);
 	}
 
 	/* Convert unadorned "." into a suitable filename. */
 	if (*src == '.' && p == src + 1) {
 		strcpy(dest, "PaxHeader/currentdir");
 		return (dest);
 	}
 
 	/*
 	 * TODO: Push this string into the 'pax' structure to avoid
 	 * recomputing it every time.  That will also open the door
 	 * to having clients override it.
 	 */
 #if HAVE_GETPID && 0  /* Disable this for now; see above comment. */
 	sprintf(buff, "PaxHeader.%d", getpid());
 #else
 	/* If the platform can't fetch the pid, don't include it. */
 	strcpy(buff, "PaxHeader");
 #endif
 	/* General case: build a ustar-compatible name adding
 	 * "/PaxHeader/". */
 	build_ustar_entry_name(dest, src, p - src, buff);
 
 	return (dest);
 }
 
 /*
  * GNU PAX Format 1.0 requires the special name, which pattern is:
  * <dir>/GNUSparseFile.<pid>/<original file name>
  *
  * This function is used for only Sparse file, a file type of which
  * is regular file.
  */
 static char *
 build_gnu_sparse_name(char *dest, const char *src)
 {
 	char buff[64];
 	const char *p;
 
 	/* Handle the null filename case. */
 	if (src == NULL || *src == '\0') {
 		strcpy(dest, "GNUSparseFile/blank");
 		return (dest);
 	}
 
 	/* Prune final '/' and other unwanted final elements. */
 	p = src + strlen(src);
 	for (;;) {
 		/* Ends in "/", remove the '/' */
 		if (p > src && p[-1] == '/') {
 			--p;
 			continue;
 		}
 		/* Ends in "/.", remove the '.' */
 		if (p > src + 1 && p[-1] == '.'
 		    && p[-2] == '/') {
 			--p;
 			continue;
 		}
 		break;
 	}
 
 #if HAVE_GETPID && 0  /* Disable this as pax attribute name. */
 	sprintf(buff, "GNUSparseFile.%d", getpid());
 #else
 	/* If the platform can't fetch the pid, don't include it. */
 	strcpy(buff, "GNUSparseFile");
 #endif
 	/* General case: build a ustar-compatible name adding
 	 * "/GNUSparseFile/". */
 	build_ustar_entry_name(dest, src, p - src, buff);
 
 	return (dest);
 }
 
 /* Write two null blocks for the end of archive */
 static int
 archive_write_pax_close(struct archive_write *a)
 {
 	return (__archive_write_nulls(a, 512 * 2));
 }
 
 static int
 archive_write_pax_free(struct archive_write *a)
 {
 	struct pax *pax;
 
 	pax = (struct pax *)a->format_data;
 	if (pax == NULL)
 		return (ARCHIVE_OK);
 
 	archive_string_free(&pax->pax_header);
 	archive_string_free(&pax->sparse_map);
 	archive_string_free(&pax->l_url_encoded_name);
 	sparse_list_clear(pax);
 	free(pax);
 	a->format_data = NULL;
 	return (ARCHIVE_OK);
 }
 
 static int
 archive_write_pax_finish_entry(struct archive_write *a)
 {
 	struct pax *pax;
 	uint64_t remaining;
 	int ret;
 
 	pax = (struct pax *)a->format_data;
 	remaining = pax->entry_bytes_remaining;
 	if (remaining == 0) {
 		while (pax->sparse_list) {
 			struct sparse_block *sb;
 			if (!pax->sparse_list->is_hole)
 				remaining += pax->sparse_list->remaining;
 			sb = pax->sparse_list->next;
 			free(pax->sparse_list);
 			pax->sparse_list = sb;
 		}
 	}
 	ret = __archive_write_nulls(a, (size_t)(remaining + pax->entry_padding));
 	pax->entry_bytes_remaining = pax->entry_padding = 0;
 	return (ret);
 }
 
 static ssize_t
 archive_write_pax_data(struct archive_write *a, const void *buff, size_t s)
 {
 	struct pax *pax;
 	size_t ws;
 	size_t total;
 	int ret;
 
 	pax = (struct pax *)a->format_data;
 
 	/*
 	 * According to GNU PAX format 1.0, write a sparse map
 	 * before the body.
 	 */
 	if (archive_strlen(&(pax->sparse_map))) {
 		ret = __archive_write_output(a, pax->sparse_map.s,
 		    archive_strlen(&(pax->sparse_map)));
 		if (ret != ARCHIVE_OK)
 			return (ret);
 		ret = __archive_write_nulls(a, pax->sparse_map_padding);
 		if (ret != ARCHIVE_OK)
 			return (ret);
 		archive_string_empty(&(pax->sparse_map));
 	}
 
 	total = 0;
 	while (total < s) {
 		const unsigned char *p;
 
 		while (pax->sparse_list != NULL &&
 		    pax->sparse_list->remaining == 0) {
 			struct sparse_block *sb = pax->sparse_list->next;
 			free(pax->sparse_list);
 			pax->sparse_list = sb;
 		}
 
 		if (pax->sparse_list == NULL)
 			return (total);
 
 		p = ((const unsigned char *)buff) + total;
 		ws = s - total;
 		if (ws > pax->sparse_list->remaining)
 			ws = (size_t)pax->sparse_list->remaining;
 
 		if (pax->sparse_list->is_hole) {
 			/* Current block is hole thus we do not write
 			 * the body. */
 			pax->sparse_list->remaining -= ws;
 			total += ws;
 			continue;
 		}
 
 		ret = __archive_write_output(a, p, ws);
 		pax->sparse_list->remaining -= ws;
 		total += ws;
 		if (ret != ARCHIVE_OK)
 			return (ret);
 	}
 	return (total);
 }
 
 static int
 has_non_ASCII(const char *_p)
 {
 	const unsigned char *p = (const unsigned char *)_p;
 
 	if (p == NULL)
 		return (1);
 	while (*p != '\0' && *p < 128)
 		p++;
 	return (*p != '\0');
 }
 
 /*
  * Used by extended attribute support; encodes the name
  * so that there will be no '=' characters in the result.
  */
 static char *
 url_encode(const char *in)
 {
 	const char *s;
 	char *d;
 	int out_len = 0;
 	char *out;
 
 	for (s = in; *s != '\0'; s++) {
 		if (*s < 33 || *s > 126 || *s == '%' || *s == '=')
 			out_len += 3;
 		else
 			out_len++;
 	}
 
 	out = (char *)malloc(out_len + 1);
 	if (out == NULL)
 		return (NULL);
 
 	for (s = in, d = out; *s != '\0'; s++) {
 		/* encode any non-printable ASCII character or '%' or '=' */
 		if (*s < 33 || *s > 126 || *s == '%' || *s == '=') {
 			/* URL encoding is '%' followed by two hex digits */
 			*d++ = '%';
 			*d++ = "0123456789ABCDEF"[0x0f & (*s >> 4)];
 			*d++ = "0123456789ABCDEF"[0x0f & *s];
 		} else {
 			*d++ = *s;
 		}
 	}
 	*d = '\0';
 	return (out);
 }
 
 /*
  * Encode a sequence of bytes into a C string using base-64 encoding.
  *
  * Returns a null-terminated C string allocated with malloc(); caller
  * is responsible for freeing the result.
  */
 static char *
 base64_encode(const char *s, size_t len)
 {
 	static const char digits[64] =
 	    { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
 	      'P','Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d',
 	      'e','f','g','h','i','j','k','l','m','n','o','p','q','r','s',
 	      't','u','v','w','x','y','z','0','1','2','3','4','5','6','7',
 	      '8','9','+','/' };
 	int v;
 	char *d, *out;
 
 	/* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */
 	out = (char *)malloc((len * 4 + 2) / 3 + 1);
 	if (out == NULL)
 		return (NULL);
 	d = out;
 
 	/* Convert each group of 3 bytes into 4 characters. */
 	while (len >= 3) {
 		v = (((int)s[0] << 16) & 0xff0000)
 		    | (((int)s[1] << 8) & 0xff00)
 		    | (((int)s[2]) & 0x00ff);
 		s += 3;
 		len -= 3;
 		*d++ = digits[(v >> 18) & 0x3f];
 		*d++ = digits[(v >> 12) & 0x3f];
 		*d++ = digits[(v >> 6) & 0x3f];
 		*d++ = digits[(v) & 0x3f];
 	}
 	/* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */
 	switch (len) {
 	case 0: break;
 	case 1:
 		v = (((int)s[0] << 16) & 0xff0000);
 		*d++ = digits[(v >> 18) & 0x3f];
 		*d++ = digits[(v >> 12) & 0x3f];
 		break;
 	case 2:
 		v = (((int)s[0] << 16) & 0xff0000)
 		    | (((int)s[1] << 8) & 0xff00);
 		*d++ = digits[(v >> 18) & 0x3f];
 		*d++ = digits[(v >> 12) & 0x3f];
 		*d++ = digits[(v >> 6) & 0x3f];
 		break;
 	}
 	/* Add trailing NUL character so output is a valid C string. */
 	*d = '\0';
 	return (out);
 }
 
 static void
 sparse_list_clear(struct pax *pax)
 {
 	while (pax->sparse_list != NULL) {
 		struct sparse_block *sb = pax->sparse_list;
 		pax->sparse_list = sb->next;
 		free(sb);
 	}
 	pax->sparse_tail = NULL;
 }
 
 static int
 _sparse_list_add_block(struct pax *pax, int64_t offset, int64_t length,
     int is_hole)
 {
 	struct sparse_block *sb;
 
 	sb = (struct sparse_block *)malloc(sizeof(*sb));
 	if (sb == NULL)
 		return (ARCHIVE_FATAL);
 	sb->next = NULL;
 	sb->is_hole = is_hole;
 	sb->offset = offset;
 	sb->remaining = length;
 	if (pax->sparse_list == NULL || pax->sparse_tail == NULL)
 		pax->sparse_list = pax->sparse_tail = sb;
 	else {
 		pax->sparse_tail->next = sb;
 		pax->sparse_tail = sb;
 	}
 	return (ARCHIVE_OK);
 }
 
 static int
 sparse_list_add(struct pax *pax, int64_t offset, int64_t length)
 {
 	int64_t last_offset;
 	int r;
 
 	if (pax->sparse_tail == NULL)
 		last_offset = 0;
 	else {
 		last_offset = pax->sparse_tail->offset +
 		    pax->sparse_tail->remaining;
 	}
 	if (last_offset < offset) {
 		/* Add a hole block. */
 		r = _sparse_list_add_block(pax, last_offset,
 		    offset - last_offset, 1);
 		if (r != ARCHIVE_OK)
 			return (r);
 	}
 	/* Add data block. */
 	return (_sparse_list_add_block(pax, offset, length, 0));
 }
 
Index: projects/clang390-import/contrib/libarchive/libarchive
===================================================================
--- projects/clang390-import/contrib/libarchive/libarchive	(revision 305016)
+++ projects/clang390-import/contrib/libarchive/libarchive	(revision 305017)

Property changes on: projects/clang390-import/contrib/libarchive/libarchive
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/libarchive/libarchive:r304885-305016
Index: projects/clang390-import/contrib/libarchive
===================================================================
--- projects/clang390-import/contrib/libarchive	(revision 305016)
+++ projects/clang390-import/contrib/libarchive	(revision 305017)

Property changes on: projects/clang390-import/contrib/libarchive
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/libarchive:r304885-305016
Index: projects/clang390-import/contrib/netbsd-tests/usr.bin/dirname/t_dirname.sh
===================================================================
--- projects/clang390-import/contrib/netbsd-tests/usr.bin/dirname/t_dirname.sh	(revision 305016)
+++ projects/clang390-import/contrib/netbsd-tests/usr.bin/dirname/t_dirname.sh	(revision 305017)
@@ -1,49 +1,52 @@
 # $NetBSD: t_dirname.sh,v 1.1 2012/03/17 16:33:13 jruoho Exp $
 #
 # Copyright (c) 2008 The NetBSD Foundation, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 #
 
 atf_test_case basic
 basic_head()
 {
 	atf_set "descr" "Checks basic functionality"
 }
 basic_body()
 {
+	# Begin FreeBSD
+	atf_tc_expect_fail "dirname //usr//bin doesn't return //usr like it used to; bug # 212193"
+	# End FreeBSD
 	atf_check -o inline:"/\n" dirname /
 	atf_check -o inline:"/\n" dirname //
 	atf_check -o inline:"/usr\n" dirname /usr/bin/
 	atf_check -o inline:"//usr\n" dirname //usr//bin//
 	atf_check -o inline:".\n" dirname usr
 	atf_check -o inline:".\n" dirname ""
 	atf_check -o inline:"/\n" dirname /usr
 	atf_check -o inline:"/usr\n" dirname /usr/bin
 	atf_check -o inline:"usr\n" dirname usr/bin
 }
 
 atf_init_test_cases()
 {
 	atf_add_test_case basic
 }
Index: projects/clang390-import/lib/libc/aarch64/sys/Makefile.inc
===================================================================
--- projects/clang390-import/lib/libc/aarch64/sys/Makefile.inc	(revision 305016)
+++ projects/clang390-import/lib/libc/aarch64/sys/Makefile.inc	(revision 305017)
@@ -1,24 +1,23 @@
 # $FreeBSD$
 
 MIASM:=	${MIASM:Nfreebsd[467]_*}
 
 SRCS+=	__vdso_gettc.c
 
-#MDASM= ptrace.S
 MDASM=	cerror.S \
 	shmat.S \
 	sigreturn.S \
 	syscall.S \
 	vfork.S
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o \
 	exit.o \
 	getlogin.o \
 	sbrk.o \
 	sstk.o \
 	vfork.o \
 	yield.o
 
 PSEUDO= _exit.o \
 	_getlogin.o
Index: projects/clang390-import/lib/libc/amd64/sys/ptrace.S
===================================================================
--- projects/clang390-import/lib/libc/amd64/sys/ptrace.S	(revision 305016)
+++ projects/clang390-import/lib/libc/amd64/sys/ptrace.S	(nonexistent)
@@ -1,67 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#if defined(SYSLIBC_SCCS) && !defined(lint)
-	.asciz "@(#)ptrace.s	5.1 (Berkeley) 4/23/90"
-#endif /* SYSLIBC_SCCS and not lint */
-#include <machine/asm.h>
-__FBSDID("$FreeBSD$");
-
-#include "SYS.h"
-
-	.globl	CNAME(__error)
-	.type	CNAME(__error),@function
-
-ENTRY(ptrace)
-	pushq	%rdi	/* align stack */
-	pushq	%rdi
-	pushq	%rsi
-	pushq	%rdx
-	pushq	%rcx
-#ifdef PIC
-	callq	PIC_PLT(CNAME(__error))
-#else
-	callq	CNAME(__error)
-#endif
-	popq	%rcx
-	popq	%rdx
-	popq	%rsi
-	popq	%rdi
-	popq	%rdi
-	movl	$0,(%rax)
-	mov	$SYS_ptrace,%eax
-	KERNCALL
-	jb	HIDENAME(cerror)
-	ret
-END(ptrace)
-
-	.section .note.GNU-stack,"",%progbits

Property changes on: projects/clang390-import/lib/libc/amd64/sys/ptrace.S
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang390-import/lib/libc/amd64/sys/Makefile.inc
===================================================================
--- projects/clang390-import/lib/libc/amd64/sys/Makefile.inc	(revision 305016)
+++ projects/clang390-import/lib/libc/amd64/sys/Makefile.inc	(revision 305017)
@@ -1,13 +1,13 @@
 #	from: Makefile.inc,v 1.1 1993/09/03 19:04:23 jtc Exp
 # $FreeBSD$
 
 SRCS+=	amd64_get_fsbase.c amd64_get_gsbase.c amd64_set_fsbase.c \
 	amd64_set_gsbase.c
 
-MDASM=	vfork.S brk.S cerror.S exect.S getcontext.S ptrace.S \
+MDASM=	vfork.S brk.S cerror.S exect.S getcontext.S \
 	sbrk.S setlogin.S sigreturn.S
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o exit.o getlogin.o sstk.o vfork.o yield.o
 
 PSEUDO=	_getlogin.o _exit.o
Index: projects/clang390-import/lib/libc/arm/sys/ptrace.S
===================================================================
--- projects/clang390-import/lib/libc/arm/sys/ptrace.S	(revision 305016)
+++ projects/clang390-import/lib/libc/arm/sys/ptrace.S	(nonexistent)
@@ -1,51 +0,0 @@
-/*	$NetBSD: ptrace.S,v 1.7 2003/08/07 16:42:04 agc Exp $ */
-
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	from: @(#)ptrace.s	5.1 (Berkeley) 4/23/90
- */
-
-#include <machine/asm.h>
-__FBSDID("$FreeBSD$");
-#include "SYS.h"
-
-ENTRY(ptrace)
-	stmfd	sp!, {r0-r3, lr}
-	sub	sp, sp, #4	/* align stack */
-	bl	PIC_SYM(_C_LABEL(__error), PLT)
-	add	sp, sp, #4	/* unalign stack */
-	mov	r1, #0x00000000
-	str	r1, [r0]
-	ldmfd	sp!, {r0-r3, lr}
-	SYSTRAP(ptrace)
-	bcs	PIC_SYM(CERROR, PLT)
-	RET
-END(ptrace)
-
-	.section .note.GNU-stack,"",%progbits

Property changes on: projects/clang390-import/lib/libc/arm/sys/ptrace.S
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang390-import/lib/libc/arm/sys/Makefile.inc
===================================================================
--- projects/clang390-import/lib/libc/arm/sys/Makefile.inc	(revision 305016)
+++ projects/clang390-import/lib/libc/arm/sys/Makefile.inc	(revision 305017)
@@ -1,10 +1,10 @@
 # $FreeBSD$
 
 SRCS+=	__vdso_gettc.c
 
-MDASM= Ovfork.S brk.S cerror.S ptrace.S sbrk.S shmat.S sigreturn.S syscall.S
+MDASM= Ovfork.S brk.S cerror.S sbrk.S shmat.S sigreturn.S syscall.S
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o exit.o getlogin.o sstk.o vfork.o yield.o
 
 PSEUDO= _exit.o _getlogin.o
Index: projects/clang390-import/lib/libc/i386/sys/ptrace.S
===================================================================
--- projects/clang390-import/lib/libc/i386/sys/ptrace.S	(revision 305016)
+++ projects/clang390-import/lib/libc/i386/sys/ptrace.S	(nonexistent)
@@ -1,59 +0,0 @@
-/*-
- * Copyright (c) 1990 The Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * William Jolitz.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#if defined(SYSLIBC_SCCS) && !defined(lint)
-	.asciz "@(#)ptrace.s	5.1 (Berkeley) 4/23/90"
-#endif /* SYSLIBC_SCCS and not lint */
-#include <machine/asm.h>
-__FBSDID("$FreeBSD$");
-
-#include "SYS.h"
-
-	.globl	CNAME(__error)
-	.type	CNAME(__error),@function
-
-ENTRY(ptrace)
-#ifdef PIC
-	PIC_PROLOGUE
-	call	PIC_PLT(CNAME(__error))
-	PIC_EPILOGUE
-#else
-	call	CNAME(__error)
-#endif
-	movl	$0,(%eax)
-	mov	$SYS_ptrace,%eax
-	KERNCALL
-	jb	HIDENAME(cerror)
-	ret
-END(ptrace)
-
-	.section .note.GNU-stack,"",%progbits

Property changes on: projects/clang390-import/lib/libc/i386/sys/ptrace.S
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang390-import/lib/libc/i386/sys/Makefile.inc
===================================================================
--- projects/clang390-import/lib/libc/i386/sys/Makefile.inc	(revision 305016)
+++ projects/clang390-import/lib/libc/i386/sys/Makefile.inc	(revision 305017)
@@ -1,23 +1,23 @@
 #	from: Makefile.inc,v 1.1 1993/09/03 19:04:23 jtc Exp
 # $FreeBSD$
 
 .if !defined(COMPAT_32BIT)
 SRCS+=	i386_clr_watch.c i386_set_watch.c i386_vm86.c
 .endif
 SRCS+=	i386_get_fsbase.c i386_get_gsbase.c i386_get_ioperm.c i386_get_ldt.c \
 	i386_set_fsbase.c i386_set_gsbase.c i386_set_ioperm.c i386_set_ldt.c
 
-MDASM=	Ovfork.S brk.S cerror.S exect.S getcontext.S ptrace.S \
+MDASM=	Ovfork.S brk.S cerror.S exect.S getcontext.S \
 	sbrk.S setlogin.S sigreturn.S syscall.S
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o exit.o getlogin.o sstk.o vfork.o yield.o
 
 PSEUDO=	_getlogin.o _exit.o
 
 MAN+=	i386_get_ioperm.2 i386_get_ldt.2 i386_vm86.2
 MAN+=	i386_set_watch.3
 
 MLINKS+=i386_get_ioperm.2 i386_set_ioperm.2
 MLINKS+=i386_get_ldt.2 i386_set_ldt.2
 MLINKS+=i386_set_watch.3 i386_clr_watch.3
Index: projects/clang390-import/lib/libc/include/libc_private.h
===================================================================
--- projects/clang390-import/lib/libc/include/libc_private.h	(revision 305016)
+++ projects/clang390-import/lib/libc/include/libc_private.h	(revision 305017)
@@ -1,401 +1,402 @@
 /*
  * Copyright (c) 1998 John Birrell <jb@cimlogic.com.au>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of any co-contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY JOHN BIRRELL AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  * Private definitions for libc, libc_r and libpthread.
  *
  */
 
 #ifndef _LIBC_PRIVATE_H_
 #define _LIBC_PRIVATE_H_
 #include <sys/_types.h>
 #include <sys/_pthreadtypes.h>
 
 /*
  * This global flag is non-zero when a process has created one
  * or more threads. It is used to avoid calling locking functions
  * when they are not required.
  */
 extern int	__isthreaded;
 
 /*
  * Elf_Auxinfo *__elf_aux_vector, the pointer to the ELF aux vector
  * provided by kernel. Either set for us by rtld, or found at runtime
  * on stack for static binaries.
  *
  * Type is void to avoid polluting whole libc with ELF types.
  */
 extern void	*__elf_aux_vector;
 
 /*
  * libc should use libc_dlopen internally, which respects a global
  * flag where loading of new shared objects can be restricted.
  */
 void *libc_dlopen(const char *, int);
 
 /*
  * For dynamic linker.
  */
 void _rtld_error(const char *fmt, ...);
 
 /*
  * File lock contention is difficult to diagnose without knowing
  * where locks were set. Allow a debug library to be built which
  * records the source file and line number of each lock call.
  */
 #ifdef	_FLOCK_DEBUG
 #define _FLOCKFILE(x)	_flockfile_debug(x, __FILE__, __LINE__)
 #else
 #define _FLOCKFILE(x)	_flockfile(x)
 #endif
 
 /*
  * Macros for locking and unlocking FILEs. These test if the
  * process is threaded to avoid locking when not required.
  */
 #define	FLOCKFILE(fp)		if (__isthreaded) _FLOCKFILE(fp)
 #define	FUNLOCKFILE(fp)		if (__isthreaded) _funlockfile(fp)
 
 struct _spinlock;
 extern struct _spinlock __stdio_thread_lock __hidden;
 #define STDIO_THREAD_LOCK()				\
 do {							\
 	if (__isthreaded)				\
 		_SPINLOCK(&__stdio_thread_lock);	\
 } while (0)
 #define STDIO_THREAD_UNLOCK()				\
 do {							\
 	if (__isthreaded)				\
 		_SPINUNLOCK(&__stdio_thread_lock);	\
 } while (0)
 
 void		__libc_spinlock_stub(struct _spinlock *);
 void		__libc_spinunlock_stub(struct _spinlock *);
 
 /*
  * Indexes into the pthread jump table.
  *
  * Warning! If you change this type, you must also change the threads
  * libraries that reference it (libc_r, libpthread).
  */
 typedef enum {
 	PJT_ATFORK,
 	PJT_ATTR_DESTROY,
 	PJT_ATTR_GETDETACHSTATE,
 	PJT_ATTR_GETGUARDSIZE,
 	PJT_ATTR_GETINHERITSCHED,
 	PJT_ATTR_GETSCHEDPARAM,
 	PJT_ATTR_GETSCHEDPOLICY,
 	PJT_ATTR_GETSCOPE,
 	PJT_ATTR_GETSTACKADDR,
 	PJT_ATTR_GETSTACKSIZE,
 	PJT_ATTR_INIT,
 	PJT_ATTR_SETDETACHSTATE,
 	PJT_ATTR_SETGUARDSIZE,
 	PJT_ATTR_SETINHERITSCHED,
 	PJT_ATTR_SETSCHEDPARAM,
 	PJT_ATTR_SETSCHEDPOLICY,
 	PJT_ATTR_SETSCOPE,
 	PJT_ATTR_SETSTACKADDR,
 	PJT_ATTR_SETSTACKSIZE,
 	PJT_CANCEL,
 	PJT_CLEANUP_POP,
 	PJT_CLEANUP_PUSH,
 	PJT_COND_BROADCAST,
 	PJT_COND_DESTROY,
 	PJT_COND_INIT,
 	PJT_COND_SIGNAL,
 	PJT_COND_TIMEDWAIT,
 	PJT_COND_WAIT,
 	PJT_DETACH,
 	PJT_EQUAL,
 	PJT_EXIT,
 	PJT_GETSPECIFIC,
 	PJT_JOIN,
 	PJT_KEY_CREATE,
 	PJT_KEY_DELETE,
 	PJT_KILL,
 	PJT_MAIN_NP,
 	PJT_MUTEXATTR_DESTROY,
 	PJT_MUTEXATTR_INIT,
 	PJT_MUTEXATTR_SETTYPE,
 	PJT_MUTEX_DESTROY,
 	PJT_MUTEX_INIT,
 	PJT_MUTEX_LOCK,
 	PJT_MUTEX_TRYLOCK,
 	PJT_MUTEX_UNLOCK,
 	PJT_ONCE,
 	PJT_RWLOCK_DESTROY,
 	PJT_RWLOCK_INIT,
 	PJT_RWLOCK_RDLOCK,
 	PJT_RWLOCK_TRYRDLOCK,
 	PJT_RWLOCK_TRYWRLOCK,
 	PJT_RWLOCK_UNLOCK,
 	PJT_RWLOCK_WRLOCK,
 	PJT_SELF,
 	PJT_SETCANCELSTATE,
 	PJT_SETCANCELTYPE,
 	PJT_SETSPECIFIC,
 	PJT_SIGMASK,
 	PJT_TESTCANCEL,
 	PJT_CLEANUP_POP_IMP,
 	PJT_CLEANUP_PUSH_IMP,
 	PJT_CANCEL_ENTER,
 	PJT_CANCEL_LEAVE,
 	PJT_MUTEX_CONSISTENT,
 	PJT_MUTEXATTR_GETROBUST,
 	PJT_MUTEXATTR_SETROBUST,
 	PJT_MAX
 } pjt_index_t;
 
 typedef int (*pthread_func_t)(void);
 typedef pthread_func_t pthread_func_entry_t[2];
 
 extern pthread_func_entry_t __thr_jtable[];
 
 void	__set_error_selector(int *(*arg)(void));
 int	_pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
 	    void *(calloc_cb)(__size_t, __size_t));
 
 typedef int (*interpos_func_t)(void);
 interpos_func_t *__libc_interposing_slot(int interposno);
 extern interpos_func_t __libc_interposing[] __hidden;
 
 enum {
 	INTERPOS_accept,
 	INTERPOS_accept4,
 	INTERPOS_aio_suspend,
 	INTERPOS_close,
 	INTERPOS_connect,
 	INTERPOS_fcntl,
 	INTERPOS_fsync,
 	INTERPOS_fork,
 	INTERPOS_msync,
 	INTERPOS_nanosleep,
 	INTERPOS_openat,
 	INTERPOS_poll,
 	INTERPOS_pselect,
 	INTERPOS_recvfrom,
 	INTERPOS_recvmsg,
 	INTERPOS_select,
 	INTERPOS_sendmsg,
 	INTERPOS_sendto,
 	INTERPOS_setcontext,
 	INTERPOS_sigaction,
 	INTERPOS_sigprocmask,
 	INTERPOS_sigsuspend,
 	INTERPOS_sigwait,
 	INTERPOS_sigtimedwait,
 	INTERPOS_sigwaitinfo,
 	INTERPOS_swapcontext,
 	INTERPOS_system,
 	INTERPOS_tcdrain,
 	INTERPOS_read,
 	INTERPOS_readv,
 	INTERPOS_wait4,
 	INTERPOS_write,
 	INTERPOS_writev,
 	INTERPOS__pthread_mutex_init_calloc_cb,
 	INTERPOS_spinlock,
 	INTERPOS_spinunlock,
 	INTERPOS_kevent,
 	INTERPOS_wait6,
 	INTERPOS_ppoll,
 	INTERPOS_map_stacks_exec,
 	INTERPOS_fdatasync,
 	INTERPOS_MAX
 };
 
 /*
  * yplib internal interfaces
  */
 #ifdef YP
 int _yp_check(char **);
 #endif
 
 /*
  * Initialise TLS for static programs
  */
 void _init_tls(void);
 
 /*
  * Provides pthread_once()-like functionality for both single-threaded
  * and multi-threaded applications.
  */
 int _once(pthread_once_t *, void (*)(void));
 
 /*
  * Set the TLS thread pointer
  */
 void _set_tp(void *tp);
 
 /*
  * This is a pointer in the C run-time startup code. It is used
  * by getprogname() and setprogname().
  */
 extern const char *__progname;
 
 /*
  * This function is used by the threading libraries to notify malloc that a
  * thread is exiting.
  */
 void _malloc_thread_cleanup(void);
 
 /*
  * This function is used by the threading libraries to notify libc that a
  * thread is exiting, so its thread-local dtors should be called.
  */
 void __cxa_thread_call_dtors(void);
 
 /*
  * These functions are used by the threading libraries in order to protect
  * malloc across fork().
  */
 void _malloc_prefork(void);
 void _malloc_postfork(void);
 
 void _malloc_first_thread(void);
 
 /*
  * Function to clean up streams, called from abort() and exit().
  */
 extern void (*__cleanup)(void) __hidden;
 
 /*
  * Get kern.osreldate to detect ABI revisions.  Explicitly
  * ignores value of $OSVERSION and caches result.
  */
 int __getosreldate(void);
 #include <sys/_types.h>
 #include <sys/_sigset.h>
 
 struct aiocb;
 struct fd_set;
 struct iovec;
 struct kevent;
 struct msghdr;
 struct pollfd;
 struct rusage;
 struct sigaction;
 struct sockaddr;
 struct timespec;
 struct timeval;
 struct timezone;
 struct __siginfo;
 struct __ucontext;
 struct __wrusage;
 enum idtype;
 int		__sys_aio_suspend(const struct aiocb * const[], int,
 		    const struct timespec *);
 int		__sys_accept(int, struct sockaddr *, __socklen_t *);
 int		__sys_accept4(int, struct sockaddr *, __socklen_t *, int);
 int		__sys_clock_gettime(__clockid_t, struct timespec *ts);
 int		__sys_close(int);
 int		__sys_connect(int, const struct sockaddr *, __socklen_t);
 int		__sys_fcntl(int, int, ...);
 int		__sys_fdatasync(int);
 int		__sys_fsync(int);
 __pid_t		__sys_fork(void);
 int		__sys_ftruncate(int, __off_t);
 int		__sys_gettimeofday(struct timeval *, struct timezone *);
 int		__sys_kevent(int, const struct kevent *, int, struct kevent *,
 		    int, const struct timespec *);
 __off_t		__sys_lseek(int, __off_t, int);
 void	       *__sys_mmap(void *, __size_t, int, int, int, __off_t);
 int		__sys_msync(void *, __size_t, int);
 int		__sys_nanosleep(const struct timespec *, struct timespec *);
 int		__sys_open(const char *, int, ...);
 int		__sys_openat(int, const char *, int, ...);
 int		__sys_pselect(int, struct fd_set *, struct fd_set *,
 		    struct fd_set *, const struct timespec *,
 		    const __sigset_t *);
+int		__sys_ptrace(int, __pid_t, char *, int);
 int		__sys_poll(struct pollfd *, unsigned, int);
 int		__sys_ppoll(struct pollfd *, unsigned, const struct timespec *,
 		    const __sigset_t *);
 __ssize_t	__sys_pread(int, void *, __size_t, __off_t);
 __ssize_t	__sys_pwrite(int, const void *, __size_t, __off_t);
 __ssize_t	__sys_read(int, void *, __size_t);
 __ssize_t	__sys_readv(int, const struct iovec *, int);
 __ssize_t	__sys_recv(int, void *, __size_t, int);
 __ssize_t	__sys_recvfrom(int, void *, __size_t, int, struct sockaddr *,
 		    __socklen_t *);
 __ssize_t	__sys_recvmsg(int, struct msghdr *, int);
 int		__sys_select(int, struct fd_set *, struct fd_set *,
 		    struct fd_set *, struct timeval *);
 __ssize_t	__sys_sendmsg(int, const struct msghdr *, int);
 __ssize_t	__sys_sendto(int, const void *, __size_t, int,
 		    const struct sockaddr *, __socklen_t);
 int		__sys_setcontext(const struct __ucontext *);
 int		__sys_sigaction(int, const struct sigaction *,
 		    struct sigaction *);
 int		__sys_sigprocmask(int, const __sigset_t *, __sigset_t *);
 int		__sys_sigsuspend(const __sigset_t *);
 int		__sys_sigtimedwait(const __sigset_t *, struct __siginfo *,
 		    const struct timespec *);
 int		__sys_sigwait(const __sigset_t *, int *);
 int		__sys_sigwaitinfo(const __sigset_t *, struct __siginfo *);
 int		__sys_swapcontext(struct __ucontext *,
 		    const struct __ucontext *);
 int		__sys_thr_kill(long, int);
 int		__sys_thr_self(long *);
 int		__sys_truncate(const char *, __off_t);
 __pid_t		__sys_wait4(__pid_t, int *, int, struct rusage *);
 __pid_t		__sys_wait6(enum idtype, __id_t, int *, int,
 		    struct __wrusage *, struct __siginfo *);
 __ssize_t	__sys_write(int, const void *, __size_t);
 __ssize_t	__sys_writev(int, const struct iovec *, int);
 
 int		__libc_sigaction(int, const struct sigaction *,
 		    struct sigaction *) __hidden;
 int		__libc_sigprocmask(int, const __sigset_t *, __sigset_t *)
 		    __hidden;
 int		__libc_sigsuspend(const __sigset_t *) __hidden;
 int		__libc_sigwait(const __sigset_t * __restrict,
 		    int * restrict sig);
 int		__libc_system(const char *);
 int		__libc_tcdrain(int);
 int		__fcntl_compat(int fd, int cmd, ...);
 
 int		__sys_futimens(int fd, const struct timespec *times) __hidden;
 int		__sys_utimensat(int fd, const char *path,
 		    const struct timespec *times, int flag) __hidden;
 
 /* execve() with PATH processing to implement posix_spawnp() */
 int _execvpe(const char *, char * const *, char * const *);
 
 int _elf_aux_info(int aux, void *buf, int buflen);
 struct dl_phdr_info;
 int __elf_phdr_match_addr(struct dl_phdr_info *, void *);
 void __init_elf_aux_vector(void);
 void __libc_map_stacks_exec(void);
 
 void	_pthread_cancel_enter(int);
 void	_pthread_cancel_leave(int);
 
 #endif /* _LIBC_PRIVATE_H_ */
Index: projects/clang390-import/lib/libc/mips/sys/ptrace.S
===================================================================
--- projects/clang390-import/lib/libc/mips/sys/ptrace.S	(revision 305016)
+++ projects/clang390-import/lib/libc/mips/sys/ptrace.S	(nonexistent)
@@ -1,71 +0,0 @@
-/*	$NetBSD: ptrace.S,v 1.9 2003/08/07 16:42:17 agc Exp $	*/
-
-/*-
- * Copyright (c) 1991, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Ralph Campbell.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <machine/asm.h>
-__FBSDID("$FreeBSD$");
-#include "SYS.h"
-
-#if defined(LIBC_SCCS) && !defined(lint)
-	ASMSTR("from: @(#)ptrace.s	8.1 (Berkeley) 6/4/93")
-	ASMSTR("$NetBSD: ptrace.S,v 1.9 2003/08/07 16:42:17 agc Exp $")
-#endif /* LIBC_SCCS and not lint */
-
-NESTED_NOPROFILE(ptrace, CALLFRAME_SIZ, ra)
-	.mask	0x80000000, (CALLFRAME_RA - CALLFRAME_SIZ)
-	SETUP_GP
-	PTR_SUBU	sp, sp, CALLFRAME_SIZ
-	SETUP_GP64(CALLFRAME_GP, ptrace)
-	SAVE_GP(CALLFRAME_GP)
-
-	PTR_S		ra, CALLFRAME_RA(sp)
-
-	PTR_LA		t9, _C_LABEL(__error)	# locate address of errno
-	jalr		t9
-
-	PTR_L		ra, CALLFRAME_RA(sp)
-	INT_S		zero, 0(v0)		# update errno value
-
-	li		v0, SYS_ptrace
-	syscall
-
-	# Load __cerror's address using our gp, then restore it.
-	PTR_LA		t9, __cerror
-	RESTORE_GP64
-	PTR_ADDU	sp, sp, CALLFRAME_SIZ
-
-	bne		a3, zero, 1f
-
-	j		ra
-1:	j		t9
-END(ptrace)

Property changes on: projects/clang390-import/lib/libc/mips/sys/ptrace.S
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang390-import/lib/libc/mips/sys/Makefile.inc
===================================================================
--- projects/clang390-import/lib/libc/mips/sys/Makefile.inc	(revision 305016)
+++ projects/clang390-import/lib/libc/mips/sys/Makefile.inc	(revision 305017)
@@ -1,11 +1,11 @@
 # $FreeBSD$
 
 SRCS+=	trivial-vdso_tc.c
 
 MDASM=  Ovfork.S brk.S cerror.S exect.S \
-	ptrace.S sbrk.S syscall.S
+	sbrk.S syscall.S
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o exit.o getlogin.o sstk.o vfork.o yield.o
 
 PSEUDO= _exit.o _getlogin.o
Index: projects/clang390-import/lib/libc/powerpc/sys/ptrace.S
===================================================================
--- projects/clang390-import/lib/libc/powerpc/sys/ptrace.S	(revision 305016)
+++ projects/clang390-import/lib/libc/powerpc/sys/ptrace.S	(nonexistent)
@@ -1,61 +0,0 @@
-/*-
- * Copyright (c) 2002 Peter Grehan.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*      $NetBSD: ptrace.S,v 1.3 2000/02/23 20:16:57 kleink Exp $        */
-
-#include <machine/asm.h>
-__FBSDID("$FreeBSD$");
-
-#include "SYS.h"
-
-ENTRY(ptrace)
-	mflr	%r0
-	stwu	%r1,-32(%r1)
-	stw	%r0,36(%r1)
-	stw	%r3,8(%r1)
-	stw	%r4,12(%r1)
-	stw	%r5,16(%r1)
-	stw	%r6,20(%r1)
-
-	bl	PIC_PLT(CNAME(__error))
-	li	%r7,0
-	stw	%r7,0(%r3)
-
-	lwz	%r3,8(%r1)
-	lwz	%r4,12(%r1)
-	lwz	%r5,16(%r1)
-	lwz	%r0,36(%r1)
-	lwz	%r6,20(%r1)
-	mtlr	%r0
-	la	%r1,32(%r1)
-	li	%r0,SYS_ptrace
-	sc
-	bso	1f
-	blr
-1:
-	b	PIC_PLT(HIDENAME(cerror))
-END(ptrace)
-
-	.section .note.GNU-stack,"",%progbits

Property changes on: projects/clang390-import/lib/libc/powerpc/sys/ptrace.S
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang390-import/lib/libc/powerpc/sys/Makefile.inc
===================================================================
--- projects/clang390-import/lib/libc/powerpc/sys/Makefile.inc	(revision 305016)
+++ projects/clang390-import/lib/libc/powerpc/sys/Makefile.inc	(revision 305017)
@@ -1,8 +1,8 @@
 # $FreeBSD$
 
-MDASM+=	brk.S cerror.S exect.S ptrace.S sbrk.S setlogin.S
+MDASM+=	brk.S cerror.S exect.S sbrk.S setlogin.S
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o exit.o getlogin.o sstk.o yield.o
 
 PSEUDO=	_getlogin.o _exit.o
Index: projects/clang390-import/lib/libc/powerpc64/sys/ptrace.S
===================================================================
--- projects/clang390-import/lib/libc/powerpc64/sys/ptrace.S	(revision 305016)
+++ projects/clang390-import/lib/libc/powerpc64/sys/ptrace.S	(nonexistent)
@@ -1,68 +0,0 @@
-/*-
- * Copyright (c) 2002 Peter Grehan.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*      $NetBSD: ptrace.S,v 1.3 2000/02/23 20:16:57 kleink Exp $        */
-
-#include <machine/asm.h>
-__FBSDID("$FreeBSD$");
-
-#include "SYS.h"
-
-ENTRY(ptrace)
-	mflr	%r0
-	std	%r0,16(%r1)
-	stdu	%r1,-80(%r1)
-	stw	%r3,48(%r1)
-	stw	%r4,52(%r1)
-	std	%r5,56(%r1)
-	stw	%r6,64(%r1)
-
-	bl	CNAME(__error)
-	nop
-	li	%r7,0
-	stw	%r7,0(%r3)
-
-	lwz	%r3,48(%r1)
-	lwz	%r4,52(%r1)
-	ld	%r5,56(%r1)
-	lwz	%r6,64(%r1)
-	ld	%r1,0(%r1)
-	ld	%r0,16(%r1)
-	mtlr	%r0
-	li	%r0,SYS_ptrace
-	sc
-	bso	1f
-	blr
-1:
-	stdu	%r1,-48(%r1)		/* lr already saved */
-	bl	HIDENAME(cerror)
-	nop
-	ld	%r1,0(%r1)
-	ld	%r0,16(%r1)
-	mtlr	%r0
-	blr
-END(ptrace)
-
-	.section .note.GNU-stack,"",%progbits

Property changes on: projects/clang390-import/lib/libc/powerpc64/sys/ptrace.S
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang390-import/lib/libc/powerpc64/sys/Makefile.inc
===================================================================
--- projects/clang390-import/lib/libc/powerpc64/sys/Makefile.inc	(revision 305016)
+++ projects/clang390-import/lib/libc/powerpc64/sys/Makefile.inc	(revision 305017)
@@ -1,8 +1,8 @@
 # $FreeBSD$
 
-MDASM+=	brk.S cerror.S exect.S ptrace.S sbrk.S setlogin.S
+MDASM+=	brk.S cerror.S exect.S sbrk.S setlogin.S
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o exit.o getlogin.o sstk.o yield.o
 
 PSEUDO=	_getlogin.o _exit.o
Index: projects/clang390-import/lib/libc/sparc64/sys/ptrace.S
===================================================================
--- projects/clang390-import/lib/libc/sparc64/sys/ptrace.S	(revision 305016)
+++ projects/clang390-import/lib/libc/sparc64/sys/ptrace.S	(nonexistent)
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 1992, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * This software was developed by the Computer Systems Engineering group
- * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
- * contributed to Berkeley.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	from: Header: ptrace.s,v 1.2 91/12/20 01:59:00 leres Exp
- *	from: NetBSD: ptrace.S,v 1.4 2000/07/24 00:11:10 mycroft Exp
- */
-
-#if defined(SYSLIBC_SCCS) && !defined(lint)
-	.asciz "@(#)ptrace.s	8.1 (Berkeley) 6/4/93"
-#if 0
-	RCSID("$NetBSD: ptrace.S,v 1.4 2000/07/24 00:11:10 mycroft Exp $")
-#endif
-#endif /* SYSLIBC_SCCS and not lint */
-#include <machine/asm.h>
-__FBSDID("$FreeBSD$");
-
-#include "SYS.h"
-
-_SYSENTRY(ptrace)
-	save	%sp, -CCFSZ, %sp
-	call	CNAME(__error)
-	 nop
-	stw	%g0, [%o0]
-	restore
-	_SYSCALL(ptrace)
-	retl
-	 nop
-_SYSEND(ptrace)

Property changes on: projects/clang390-import/lib/libc/sparc64/sys/ptrace.S
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/clang390-import/lib/libc/sparc64/sys/Makefile.inc
===================================================================
--- projects/clang390-import/lib/libc/sparc64/sys/Makefile.inc	(revision 305016)
+++ projects/clang390-import/lib/libc/sparc64/sys/Makefile.inc	(revision 305017)
@@ -1,20 +1,20 @@
 # $FreeBSD$
 
 SRCS+=	__sparc_sigtramp_setup.c \
 	__sparc_utrap.c \
 	__sparc_utrap_align.c \
 	__sparc_utrap_emul.c \
 	__sparc_utrap_fp_disabled.S \
 	__sparc_utrap_gen.S \
 	__sparc_utrap_install.c \
 	__sparc_utrap_setup.c \
 	sigcode.S
 
 CFLAGS+= -I${LIBC_SRCTOP}/sparc64/fpu
 
-MDASM+=	brk.S cerror.S exect.S ptrace.S sbrk.S setlogin.S sigaction1.S
+MDASM+=	brk.S cerror.S exect.S sbrk.S setlogin.S sigaction1.S
 
 # Don't generate default code for these syscalls:
 NOASM=	break.o exit.o getlogin.o sstk.o yield.o
 
 PSEUDO=	_getlogin.o _exit.o
Index: projects/clang390-import/lib/libc/sys/Makefile.inc
===================================================================
--- projects/clang390-import/lib/libc/sys/Makefile.inc	(revision 305016)
+++ projects/clang390-import/lib/libc/sys/Makefile.inc	(revision 305017)
@@ -1,467 +1,468 @@
 #	@(#)Makefile.inc	8.3 (Berkeley) 10/24/94
 # $FreeBSD$
 
 # sys sources
 .PATH: ${LIBC_SRCTOP}/${LIBC_ARCH}/sys ${LIBC_SRCTOP}/sys
 
 # Include the generated makefile containing the *complete* list
 # of syscall names in MIASM.
 .include "${LIBC_SRCTOP}/../../sys/sys/syscall.mk"
 
 # Include machine dependent definitions.
 #
 # MDASM names override the default syscall names in MIASM.
 # NOASM will prevent the default syscall code from being generated.
 #
 .sinclude "${LIBC_SRCTOP}/${LIBC_ARCH}/sys/Makefile.inc"
 
 SRCS+= clock_gettime.c gettimeofday.c __vdso_gettimeofday.c
 NOASM+=  clock_gettime.o gettimeofday.o
 PSEUDO+= _clock_gettime.o _gettimeofday.o
 
 # Sources common to both syscall interfaces:
 SRCS+=	\
 	__error.c \
 	interposing_table.c
 
 SRCS+= futimens.c utimensat.c
 NOASM+= futimens.o utimensat.o
 PSEUDO+= _futimens.o _utimensat.o
 
 SRCS+= pipe.c
 
 INTERPOSED = \
 	accept \
 	accept4 \
 	aio_suspend \
 	close \
 	connect \
 	fcntl \
 	fdatasync \
 	fsync \
 	fork \
 	kevent \
 	msync \
 	nanosleep \
 	open \
 	openat \
 	poll \
 	ppoll \
 	pselect \
+	ptrace \
 	read \
 	readv \
 	recvfrom \
 	recvmsg \
 	select \
 	sendmsg \
 	sendto \
 	setcontext \
 	sigprocmask \
 	sigsuspend \
 	sigtimedwait \
 	sigwait \
 	sigwaitinfo \
 	swapcontext \
 	wait4 \
 	wait6 \
 	write \
 	writev
 
 .if ${MACHINE_CPUARCH} == "sparc64"
 SRCS+=	sigaction.c
 NOASM+=	sigaction.o
 .else
 INTERPOSED+= sigaction
 .endif
 
 SRCS+=	${INTERPOSED:S/$/.c/}
 NOASM+=	${INTERPOSED:S/$/.o/}
 PSEUDO+=	${INTERPOSED:C/^.*$/_&.o/}
 
 # Add machine dependent asm sources:
 SRCS+=${MDASM}
 
 # Look though the complete list of syscalls (MIASM) for names that are
 # not defined with machine dependent implementations (MDASM) and are
 # not declared for no generation of default code (NOASM).  Add each
 # syscall that satisfies these conditions to the ASM list.
 .for _asm in ${MIASM}
 .if (${MDASM:R:M${_asm:R}} == "")
 .if (${NOASM:R:M${_asm:R}} == "")
 ASM+=$(_asm)
 .endif
 .endif
 .endfor
 
 SASM=	${ASM:S/.o/.S/}
 
 SPSEUDO= ${PSEUDO:S/.o/.S/}
 
 SRCS+=	${SASM} ${SPSEUDO}
 
 SYM_MAPS+=	${LIBC_SRCTOP}/sys/Symbol.map
 
 # Generated files
 CLEANFILES+=	${SASM} ${SPSEUDO}
 
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" || \
     ${MACHINE_CPUARCH} == "powerpc" || ${MACHINE_ARCH:Marmv6*}
 NOTE_GNU_STACK='\t.section .note.GNU-stack,"",%%progbits\n'
 .else
 NOTE_GNU_STACK=''
 .endif
 
 ${SASM}:
 	printf '#include "compat.h"\n' > ${.TARGET}
 	printf '#include "SYS.h"\nRSYSCALL(${.PREFIX})\n' >> ${.TARGET}
 	printf  ${NOTE_GNU_STACK} >>${.TARGET}
 
 ${SPSEUDO}:
 	printf '#include "compat.h"\n' > ${.TARGET}
 	printf '#include "SYS.h"\nPSEUDO(${.PREFIX:S/_//})\n' \
 	    >> ${.TARGET}
 	printf ${NOTE_GNU_STACK} >>${.TARGET}
 
 MAN+=	abort2.2 \
 	accept.2 \
 	access.2 \
 	acct.2 \
 	adjtime.2 \
 	aio_cancel.2 \
 	aio_error.2 \
 	aio_fsync.2 \
 	aio_mlock.2 \
 	aio_read.2 \
 	aio_return.2 \
 	aio_suspend.2 \
 	aio_waitcomplete.2 \
 	aio_write.2 \
 	bind.2 \
 	bindat.2 \
 	brk.2 \
 	cap_enter.2 \
 	cap_fcntls_limit.2 \
 	cap_ioctls_limit.2 \
 	cap_rights_limit.2 \
 	chdir.2 \
 	chflags.2 \
 	chmod.2 \
 	chown.2 \
 	chroot.2 \
 	clock_gettime.2 \
 	close.2 \
 	closefrom.2 \
 	connect.2 \
 	connectat.2 \
 	cpuset.2 \
 	cpuset_getaffinity.2 \
 	dup.2 \
 	execve.2 \
 	_exit.2 \
 	extattr_get_file.2 \
 	fcntl.2 \
 	ffclock.2 \
 	fhopen.2 \
 	flock.2 \
 	fork.2 \
 	fsync.2 \
 	getdirentries.2 \
 	getdtablesize.2 \
 	getfh.2 \
 	getfsstat.2 \
 	getgid.2 \
 	getgroups.2 \
 	getitimer.2 \
 	getlogin.2 \
 	getloginclass.2 \
 	getpeername.2 \
 	getpgrp.2 \
 	getpid.2 \
 	getpriority.2 \
 	getrlimit.2 \
 	getrusage.2 \
 	getsid.2 \
 	getsockname.2 \
 	getsockopt.2 \
 	gettimeofday.2 \
 	getuid.2 \
 	intro.2 \
 	ioctl.2 \
 	issetugid.2 \
 	jail.2 \
 	kenv.2 \
 	kill.2 \
 	kldfind.2 \
 	kldfirstmod.2 \
 	kldload.2 \
 	kldnext.2 \
 	kldstat.2 \
 	kldsym.2 \
 	kldunload.2 \
 	kqueue.2 \
 	ktrace.2 \
 	link.2 \
 	lio_listio.2 \
 	listen.2 \
 	lseek.2 \
 	madvise.2 \
 	mincore.2 \
 	minherit.2 \
 	mkdir.2 \
 	mkfifo.2 \
 	mknod.2 \
 	mlock.2 \
 	mlockall.2 \
 	mmap.2 \
 	modfind.2 \
 	modnext.2 \
 	modstat.2 \
 	mount.2 \
 	mprotect.2 \
 	mq_close.2 \
 	mq_getattr.2 \
 	mq_notify.2 \
 	mq_open.2 \
 	mq_receive.2 \
 	mq_send.2 \
 	mq_setattr.2 \
 	msgctl.2 \
 	msgget.2 \
 	msgrcv.2 \
 	msgsnd.2 \
 	msync.2 \
 	munmap.2 \
 	nanosleep.2 \
 	nfssvc.2 \
 	ntp_adjtime.2 \
 	numa_getaffinity.2 \
 	open.2 \
 	pathconf.2 \
 	pdfork.2 \
 	pipe.2 \
 	poll.2 \
 	posix_fadvise.2 \
 	posix_fallocate.2 \
 	posix_openpt.2 \
 	procctl.2 \
 	profil.2 \
 	pselect.2 \
 	ptrace.2 \
 	quotactl.2 \
 	read.2 \
 	readlink.2 \
 	reboot.2 \
 	recv.2 \
 	rename.2 \
 	revoke.2 \
 	rfork.2 \
 	rmdir.2 \
 	rtprio.2
 .if !defined(NO_P1003_1B)
 MAN+=	sched_get_priority_max.2 \
 	sched_setparam.2 \
 	sched_setscheduler.2 \
 	sched_yield.2
 .endif
 MAN+=	sctp_generic_recvmsg.2 \
 	sctp_generic_sendmsg.2 \
 	sctp_peeloff.2 \
 	select.2 \
 	semctl.2 \
 	semget.2 \
 	semop.2 \
 	send.2 \
 	setfib.2 \
 	sendfile.2 \
 	setgroups.2 \
 	setpgid.2 \
 	setregid.2 \
 	setresuid.2 \
 	setreuid.2 \
 	setsid.2 \
 	setuid.2 \
 	shmat.2 \
 	shmctl.2 \
 	shmget.2 \
 	shm_open.2 \
 	shutdown.2 \
 	sigaction.2 \
 	sigaltstack.2 \
 	sigpending.2 \
 	sigprocmask.2 \
 	sigqueue.2 \
 	sigreturn.2 \
 	sigstack.2 \
 	sigsuspend.2 \
 	sigwait.2 \
 	sigwaitinfo.2 \
 	socket.2 \
 	socketpair.2 \
 	stat.2 \
 	statfs.2 \
 	swapon.2 \
 	symlink.2 \
 	sync.2 \
 	sysarch.2 \
 	syscall.2 \
 	thr_exit.2 \
 	thr_kill.2 \
 	thr_new.2 \
 	thr_self.2 \
 	thr_set_name.2 \
 	timer_create.2 \
 	timer_delete.2 \
 	timer_settime.2 \
 	truncate.2 \
 	umask.2 \
 	undelete.2 \
 	unlink.2 \
 	utimensat.2 \
 	utimes.2 \
 	utrace.2 \
 	uuidgen.2 \
 	vfork.2 \
 	wait.2 \
 	write.2 \
 	_umtx_op.2
 
 MLINKS+=accept.2 accept4.2
 MLINKS+=access.2 eaccess.2 \
 	access.2 faccessat.2
 MLINKS+=brk.2 sbrk.2
 MLINKS+=cap_enter.2 cap_getmode.2
 MLINKS+=cap_fcntls_limit.2 cap_fcntls_get.2
 MLINKS+=cap_ioctls_limit.2 cap_ioctls_get.2
 MLINKS+=cap_rights_limit.2 cap_rights_get.2
 MLINKS+=chdir.2 fchdir.2
 MLINKS+=chflags.2 chflagsat.2 \
 	chflags.2 fchflags.2 \
 	chflags.2 lchflags.2
 MLINKS+=chmod.2 fchmod.2 \
 	chmod.2 fchmodat.2 \
 	chmod.2 lchmod.2
 MLINKS+=chown.2 fchown.2 \
 	chown.2 fchownat.2 \
 	chown.2 lchown.2
 MLINKS+=clock_gettime.2 clock_getres.2 \
 	clock_gettime.2 clock_settime.2
 MLINKS+=cpuset.2 cpuset_getid.2 \
 	cpuset.2 cpuset_setid.2
 MLINKS+=cpuset_getaffinity.2 cpuset_setaffinity.2
 MLINKS+=dup.2 dup2.2
 MLINKS+=execve.2 fexecve.2
 MLINKS+=extattr_get_file.2 extattr.2 \
 	extattr_get_file.2 extattr_delete_fd.2 \
 	extattr_get_file.2 extattr_delete_file.2 \
 	extattr_get_file.2 extattr_delete_link.2 \
 	extattr_get_file.2 extattr_get_fd.2 \
 	extattr_get_file.2 extattr_get_link.2 \
 	extattr_get_file.2 extattr_list_fd.2 \
 	extattr_get_file.2 extattr_list_file.2 \
 	extattr_get_file.2 extattr_list_link.2 \
 	extattr_get_file.2 extattr_set_fd.2 \
 	extattr_get_file.2 extattr_set_file.2 \
 	extattr_get_file.2 extattr_set_link.2
 MLINKS+=ffclock.2 ffclock_getcounter.2 \
 	ffclock.2 ffclock_getestimate.2 \
 	ffclock.2 ffclock_setestimate.2
 MLINKS+=fhopen.2 fhstat.2 fhopen.2 fhstatfs.2
 MLINKS+=fsync.2 fdatasync.2
 MLINKS+=getdirentries.2 getdents.2
 MLINKS+=getfh.2 lgetfh.2
 MLINKS+=getgid.2 getegid.2
 MLINKS+=getitimer.2 setitimer.2
 MLINKS+=getlogin.2 getlogin_r.3
 MLINKS+=getlogin.2 setlogin.2
 MLINKS+=getloginclass.2 setloginclass.2
 MLINKS+=getpgrp.2 getpgid.2
 MLINKS+=getpid.2 getppid.2
 MLINKS+=getpriority.2 setpriority.2
 MLINKS+=getrlimit.2 setrlimit.2
 MLINKS+=getsockopt.2 setsockopt.2
 MLINKS+=gettimeofday.2 settimeofday.2
 MLINKS+=getuid.2 geteuid.2
 MLINKS+=intro.2 errno.2
 MLINKS+=jail.2 jail_attach.2 \
 	jail.2 jail_get.2 \
 	jail.2 jail_remove.2 \
 	jail.2 jail_set.2
 MLINKS+=kldunload.2 kldunloadf.2
 MLINKS+=kqueue.2 kevent.2 \
 	kqueue.2 EV_SET.3
 MLINKS+=link.2 linkat.2
 MLINKS+=madvise.2 posix_madvise.2
 MLINKS+=mkdir.2 mkdirat.2
 MLINKS+=mkfifo.2 mkfifoat.2
 MLINKS+=mknod.2 mknodat.2
 MLINKS+=mlock.2 munlock.2
 MLINKS+=mlockall.2 munlockall.2
 MLINKS+=modnext.2 modfnext.2
 MLINKS+=mount.2 nmount.2 \
 	mount.2 unmount.2
 MLINKS+=mq_receive.2 mq_timedreceive.2
 MLINKS+=mq_send.2 mq_timedsend.2
 MLINKS+=ntp_adjtime.2 ntp_gettime.2
 MLINKS+=numa_getaffinity.2 numa_setaffinity.2
 MLINKS+=open.2 openat.2
 MLINKS+=pathconf.2 fpathconf.2
 MLINKS+=pathconf.2 lpathconf.2
 MLINKS+=pdfork.2 pdgetpid.2\
 	pdfork.2 pdkill.2 \
 	pdfork.2 pdwait4.2
 MLINKS+=pipe.2 pipe2.2
 MLINKS+=poll.2 ppoll.2
 MLINKS+=read.2 pread.2 \
 	read.2 preadv.2 \
 	read.2 readv.2
 MLINKS+=readlink.2 readlinkat.2
 MLINKS+=recv.2 recvfrom.2 \
 	recv.2 recvmsg.2
 MLINKS+=rename.2 renameat.2
 MLINKS+=rtprio.2 rtprio_thread.2
 .if !defined(NO_P1003_1B)
 MLINKS+=sched_get_priority_max.2 sched_get_priority_min.2 \
 	sched_get_priority_max.2 sched_rr_get_interval.2
 MLINKS+=sched_setparam.2 sched_getparam.2
 MLINKS+=sched_setscheduler.2 sched_getscheduler.2
 .endif
 MLINKS+=select.2 FD_CLR.3 \
 	select.2 FD_ISSET.3 \
 	select.2 FD_SET.3 \
 	select.2 FD_ZERO.3
 MLINKS+=send.2 sendmsg.2 \
 	send.2 sendto.2
 MLINKS+=setpgid.2 setpgrp.2
 MLINKS+=setresuid.2 getresgid.2 \
 	setresuid.2 getresuid.2 \
 	setresuid.2 setresgid.2
 MLINKS+=setuid.2 setegid.2 \
 	setuid.2 seteuid.2 \
 	setuid.2 setgid.2
 MLINKS+=shmat.2 shmdt.2
 MLINKS+=shm_open.2 shm_unlink.2
 MLINKS+=sigwaitinfo.2 sigtimedwait.2
 MLINKS+=stat.2 fstat.2 \
 	stat.2 fstatat.2 \
 	stat.2 lstat.2
 MLINKS+=statfs.2 fstatfs.2
 MLINKS+=swapon.2 swapoff.2
 MLINKS+=symlink.2 symlinkat.2
 MLINKS+=syscall.2 __syscall.2
 MLINKS+=timer_settime.2 timer_getoverrun.2 \
 	timer_settime.2 timer_gettime.2
 MLINKS+=thr_kill.2 thr_kill2.2
 MLINKS+=truncate.2 ftruncate.2
 MLINKS+=unlink.2 unlinkat.2
 MLINKS+=utimensat.2 futimens.2
 MLINKS+=utimes.2 futimes.2 \
 	utimes.2 futimesat.2 \
 	utimes.2 lutimes.2
 MLINKS+=wait.2 wait3.2 \
 	wait.2 wait4.2 \
 	wait.2 waitpid.2 \
 	wait.2 waitid.2 \
 	wait.2 wait6.2
 MLINKS+=write.2 pwrite.2 \
 	write.2 pwritev.2 \
 	write.2 writev.2
Index: projects/clang390-import/lib/libc/sys/ptrace.c
===================================================================
--- projects/clang390-import/lib/libc/sys/ptrace.c	(nonexistent)
+++ projects/clang390-import/lib/libc/sys/ptrace.c	(revision 305017)
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016 The FreeBSD Foundation.
+ * All rights reserved.
+ *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice(s), this list of conditions and the following disclaimer as
+ *    the first lines of this file unmodified other than the possible
+ *    addition of one or more copyright notices.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice(s), this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/ptrace.h>
+#include <errno.h>
+#include "libc_private.h"
+
+__weak_reference(_ptrace, ptrace);
+
+int
+_ptrace(int request, pid_t pid, caddr_t addr, int data)
+{
+
+	errno = 0;
+	return (__sys_ptrace(request, pid, addr, data));
+}

Property changes on: projects/clang390-import/lib/libc/sys/ptrace.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/clang390-import/release/doc/en_US.ISO8859-1/relnotes/article.xml
===================================================================
--- projects/clang390-import/release/doc/en_US.ISO8859-1/relnotes/article.xml	(revision 305016)
+++ projects/clang390-import/release/doc/en_US.ISO8859-1/relnotes/article.xml	(revision 305017)
@@ -1,1931 +1,402 @@
 <?xml version="1.0" encoding="iso-8859-1"?>
 <!DOCTYPE article PUBLIC "-//FreeBSD//DTD DocBook XML V5.0-Based Extension//EN"
 	"../../../share/xml/freebsd50.dtd" [
 <!ENTITY % release PUBLIC "-//FreeBSD//ENTITIES Release Specification//EN" "release.ent">
  %release;
 <!ENTITY % sponsor PUBLIC "-//FreeBSD//ENTITIES Sponsor Specification//EN" "sponsor.ent">
  %sponsor;
 <!ENTITY % vendor PUBLIC "-//FreeBSD//ENTITIES Vendor Specification//EN" "vendor.ent">
  %vendor;
 <!ENTITY security SYSTEM "../../share/xml/security.xml">
 <!ENTITY errata SYSTEM "../../share/xml/errata.xml">
 ]>
 <article xmlns="http://docbook.org/ns/docbook"
   xmlns:xlink="http://www.w3.org/1999/xlink" version="5.0">
 
   <info>
     <title>&os; &release.current; Release Notes</title>
 
     <author>
       <orgname>The &os; Project</orgname>
     </author>
 
     <pubdate>$FreeBSD$</pubdate>
 
     <!-- Last rev: 288943 -->
 
     <copyright>
-      <year>2015</year>
       <year>2016</year>
       <holder role="mailto:doc@FreeBSD.org">The &os; Documentation
 	Project</holder>
     </copyright>
 
     <legalnotice xml:id="trademarks" role="trademarks">
       &tm-attrib.freebsd;
       &tm-attrib.ibm;
       &tm-attrib.ieee;
       &tm-attrib.intel;
       &tm-attrib.sparc;
       &tm-attrib.general;
     </legalnotice>
 
     <abstract>
       <para>The release notes for &os; &release.current; contain
 	a summary of the changes made to the &os; base system on the
 	&release.branch; development line.  This document lists
 	applicable security advisories that were issued since the last
 	release, as well as significant changes to the &os; kernel and
 	userland.  Some brief remarks on upgrading are also
 	presented.</para>
     </abstract>
   </info>
 
   <sect1 xml:id="intro">
     <title>Introduction</title>
 
     <para>This document contains the release notes for &os;
       &release.current;.  It describes recently added, changed, or
       deleted features of &os;.  It also provides some notes on
       upgrading from previous versions of &os;.</para>
 
     <para releasetype="current">The &release.type; distribution to
       which these release notes apply represents the latest point
       along the &release.branch; development branch since
       &release.branch; was created.  Information regarding pre-built,
       binary &release.type; distributions along this branch can be
       found at <uri
 	xlink:href="&release.url;">&release.url;</uri>.</para>
 
     <para releasetype="snapshot">The &release.type; distribution to
       which these release notes apply represents a point along the
       &release.branch; development branch between &release.prev; and
       the future &release.next;.  Information regarding pre-built,
       binary &release.type; distributions along this branch can be
       found at <uri
 	xlink:href="&release.url;">&release.url;</uri>.</para>
 
     <para releasetype="release">This distribution of &os;
       &release.current; is a &release.type; distribution.  It can be
       found at <uri xlink:href="&release.url;">&release.url;</uri> or
       any of its mirrors.  More information on obtaining this (or
       other) &release.type; distributions of &os; can be found in the
       <link
 	xlink:href="&url.books.handbook;/mirrors.html"><quote>Obtaining
 	  &os;</quote> appendix</link> to the <link
 	xlink:href="&url.books.handbook;/">&os;
 	Handbook</link>.</para>
 
     <para>All users are encouraged to consult the release errata
       before installing &os;.  The errata document is updated with
       <quote>late-breaking</quote> information discovered late in the
       release cycle or after the release.  Typically, it contains
       information on known bugs, security advisories, and corrections
       to documentation.  An up-to-date copy of the errata for &os;
       &release.current; can be found on the &os; Web site.</para>
 
     <para>This document describes the most user-visible new or changed
       features in &os; since &release.prev;.  In general, changes
       described here are unique to the &release.branch; branch unless
       specifically marked as &merged; features.</para>
 
     <para>Typical release note items document recent security
       advisories issued after &release.prev;, new drivers or hardware
       support, new commands or options, major bug fixes, or
       contributed software upgrades.  They may also list changes to
       major ports/packages or release engineering practices.  Clearly
       the release notes cannot list every single change made to &os;
       between releases; this document focuses primarily on security
       advisories, user-visible changes, and major architectural
       improvements.</para>
   </sect1>
 
   <sect1 xml:id="upgrade">
     <title>Upgrading from Previous Releases of &os;</title>
 
     <para arch="amd64,i386">Binary upgrades between RELEASE versions
       (and snapshots of the various security branches) are supported
       using the &man.freebsd-update.8; utility.  The binary upgrade
       procedure will update unmodified userland utilities, as well as
       unmodified GENERIC kernels distributed as a part of an official
       &os; release.  The &man.freebsd-update.8; utility requires that
       the host being upgraded have Internet connectivity.</para>
 
     <para>Source-based upgrades (those based on recompiling the &os;
       base system from source code) from previous versions are
       supported, according to the instructions in
       <filename>/usr/src/UPDATING</filename>.</para>
 
     <important>
       <para>Upgrading &os; should only be attempted after backing up
 	<emphasis>all</emphasis> data and configuration files.</para>
     </important>
   </sect1>
 
   <sect1 xml:id="security-errata">
     <title>Security and Errata</title>
 
     <para>This section lists the various Security Advisories and
       Errata Notices since &release.prev;.</para>
 
     <sect2 xml:id="security">
       <title>Security Advisories</title>
 
       &security;
     </sect2>
 
     <sect2 xml:id="errata">
       <title>Errata Notices</title>
 
       &errata;
     </sect2>
   </sect1>
 
   <sect1 xml:id="userland">
     <title>Userland</title>
 
     <para>This section covers changes and additions to userland
       applications, contributed software, and system utilities.</para>
 
     <sect2 xml:id="userland-config">
       <title>Userland Configuration Changes</title>
 
-      <para revision="266463">The default &man.newsyslog.conf.5; now
-	includes files in the
-	<filename>/etc/newsyslog.conf.d/</filename> and
-	<filename>/usr/local/etc/newsyslog.conf.d/</filename>
-	directories by default for &man.newsyslog.8;.</para>
-
-      <para revision="270675">The &man.mailwrapper.8; utility has been
-	updated to use &man.mailer.conf.5; from the
-	<literal>LOCALBASE</literal> environment variable, which
-	defaults to <filename class="directory">/usr/local</filename>
-	if unset.</para>
-
-      <para revision="272350">The <literal>MK_ARM_EABI</literal>
-	&man.src.conf.5; option has been removed.</para>
-
-      <para revision="301247">The <application>ntp</application> suite
-	has been updated to version 4.2.8p8.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="userland-programs">
       <title>Userland Application Changes</title>
 
-      <para revision="260594">When unable to load a kernel module with
-	&man.kldload.8;, a message informing to view output of
-	&man.dmesg.8; is now printed, opposed to the previous output
-	<quote>Exec format error.</quote>.</para>
-
-      <para revision="260910">Allow &man.pciconf.8; to identify PCI
-	devices that are attached to a driver to be identified by
-	their device name instead of just the selector.  Additionally,
-	an optional device argument to the <literal>-l</literal> flag
-	to restrict the output to only listing details about a single
-	device.</para>
-
-      <para revision="260913">A new flag, <quote>onifconsole</quote>
-	has been added to <filename>/etc/ttys</filename>.  This allows
-	the system to provide a login prompt via serial console if the
-	device is an active kernel console, otherwise it is equivalent
-	to <literal>off</literal>.</para>
-
-      <para revision="260926">Support for displaying VPD for PCI
-	devices via &man.pciconf.8; has been added.</para>
-
-      <para revision="261498">&man.ping.8; protects against malicious
-	network packets using the Capsicum framework to drop
-	privileges.</para>
-
-      <para revision="265229">The &man.ps.1; utility has been
-	updated to include the <literal>-J</literal> flag, used to
-	filter output by matching &man.jail.8; IDs and names.
-	Additionally, argument <literal>0</literal> can be used to
-	<literal>-J</literal> to only list processes running on the
-	host system.</para>
-
-      <para revision="265249">The &man.top.1; utility has been updated
-	to filter by &man.jail.8; ID or name, in followup to the
-	&man.ps.1; change in <literal>r265229</literal>.</para>
-
-      <para revision="266209">The &man.pmcstat.8; utility has been
-	updated to include a new flag, <literal>-l</literal>, which
-	ends event collection after the specified number of
-	seconds.</para>
-
-      <para revision="270745">The &man.ps.1; utility has been updated
-	to include a new keyword, <quote>tracer</quote>, which
-	displays the <acronym>PID</acronym> of the tracing
-	process.</para>
-
-      <para revision="271482">Support for adding empty partitions has
-	been added to the &man.mkimg.1; utility.</para>
-
-      <para revision="272166">The &man.primes.6; utility has been
-	updated to correctly enumerate prime numbers between
-	<literal>4295098369</literal> and
-	<literal>3825123056546413050</literal>, which prior to this
-	change, it would be possible for returned values to be
-	incorrectly identified as prime numbers.</para>
-
-      <para revision="272198">The &man.mkimg.1; utility has been
-	updated to include three options used to print information
-	about &man.mkimg.1; itself:</para>
-
-      <informaltable frame="none" pgwide="0">
-	<tgroup cols="2">
-	  <colspec colwidth="1*"/>
-	  <colspec colwidth="1*"/>
-	  <thead>
-	    <row>
-	      <entry>Option</entry>
-	      <entry>Output</entry>
-	    </row>
-	  </thead>
-
-	  <tbody>
-	    <row>
-	      <entry><literal>--version</literal></entry>
-	      <entry>The current version of the &man.mkimg.1;
-		utility</entry>
-	    </row>
-
-	    <row>
-	      <entry><literal>--formats</literal></entry>
-	      <entry>The disk image file formats supported by
-		&man.mkimg.1;</entry>
-	    </row>
-
-	    <row>
-	      <entry><literal>--schemes</literal></entry>
-	      <entry>The partition schemes supported by
-		&man.mkimg.1;</entry>
-	    </row>
-	  </tbody>
-	</tgroup>
-      </informaltable>
-
-      <para revision="272488">Userland &man.ctf.5; support in
-	&man.dtrace.1; has been added.  With this change,
-	&man.dtrace.1; is able to resolve type info for function and
-	<acronym>USDT</acronym> probe arguments, and function return
-	values.</para>
-
-      <para revision="274960">The &man.elfdump.1; utility has been
-	updated to support capability mode provided by
-	&man.capsicum.4;.</para>
-
-      <para revision="275680" contrib="sponsor" sponsor="&ff;">The
-	&man.fstyp.8; utility has been added, which is used to
-	determine the filesystem on a specified device.</para>
-
-      <para revision="276881">The <literal>libedit</literal> library
-	has been updated to support <acronym>UTF</acronym>-8, which
-	additionally provides unicode support to &man.sh.1;.</para>
-
-      <para revision="276893" contrib="sponsor" sponsor="&ff;">The
-	&man.mkimg.1; utility has been updated to support the
-	<acronym>MBR</acronym> <acronym>EFI</acronym> partition
-	type.</para>
-
-      <para revision="277166" arch="powerpc">The &man.ptrace.2; system
-	call has been updated include support for Altivec registers on
-	&os;/&arch.powerpc;.</para>
-
-      <para revision="278320">A new device control utility,
-	&man.devctl.8; has been added, which allows making
-	administrative changes to individual devices, such as
-	attaching and detaching drivers, and enabling and disabling
-	devices.  The &man.devctl.8; utility uses the new
-	&man.devctl.3; library.</para>
-
-      <para revision="279122" contrib="sponsor"
-	sponsor="&juniper;">The &man.netstat.1; utility has been
-	updated to link against the &man.libxo.3; shared
-	library.</para>
-
-      <para revision="279139">A new flag, <literal>-c</literal>, has
-	been added to the &man.mkimg.1; utility, which allows
-	specifying the capacity of the target disk image.</para>
-
-      <para revision="279315" contrib="sponsor" sponsor="&ff;">The
-	&man.uefisign.8; utility has been added.</para>
-
-      <para revision="279571" contrib="sponsor"
-	sponsor="&scaleengine;">The &man.freebsd-update.8; utility has
-	been updated to prevent fetching updated binary patches when
-	a previous upgrade has not been thoroughly completed.</para>
-
-      <para revision="280870">A regression in the &man.libarchive.3;
-	library that would prevent a directory from being included in
-	the archive when <literal>--one-file-system</literal> is used
-	has been fixed.</para>
-
-      <para revision="281311" contrib="sponsor" sponsor="&ff;">The
-	&man.ar.1; utility has been updated to set
-	<literal>ARCHIVE_EXTRACT_SECURE_SYMLINKS</literal> and
-	<literal>ARCHIVE_EXTRACT_SECURE_NODOTDOT</literal> to disallow
-	directory traversal when extracting an archive, similar to
-	&man.tar.1;.</para>
-
-      <para revision="281617">A race condition in &man.wc.1; that
-	would cause final results to be sent to &man.stderr.4; when
-	receiving the <literal>SIGINFO</literal> signal has been
-	fixed.</para>
-
-      <para revision="282208" contrib="sponsor"
-	sponsor="&multiplay;">The &man.chflags.1;, &man.chgrp.1;,
-	&man.chmod.1;, and &man.chown.8; utilities now affect symbolic
-	links when the <literal>-R</literal> flag is specified, as
-	documented in &man.symlink.7;.</para>
-
-      <para revision="282608">The &man.date.1; utility has been
-	updated to print the modification time of the file passed as
-	an argument to the <literal>-r</literal> flag, improving
-	compatibility with the <acronym>GNU</acronym> &man.date.1;
-	utility behavior.</para>
-
-      <para revision="283961">The &man.pw.8; utility has been updated
-	with a new flag, <literal>-R</literal>, that sets the root
-	directory within which the utility will operate.</para>
-
-      <para revision="284297" contrib="sponsor"
-	sponsor="&clusterhq;">The &man.lockstat.1; utility has been
-	updated with several improvements:</para>
-
-      <itemizedlist>
-	<listitem>
-	  <para>Spin locks are now reported as the amount of time
-	    spinning, instead of loop iterations.</para>
-	</listitem>
-
-	<listitem>
-	  <para>Reader locks are now recognized as adaptive that can
-	    spin on &os;.</para>
-	</listitem>
-
-	<listitem>
-	  <para>Lock aquisition events for successful reader try-lock
-	    events are now reported.</para>
-	</listitem>
-
-	<listitem>
-	  <para>Spin and block events are now reported before lock
-	    acquisition events.</para>
-	</listitem>
-      </itemizedlist>
-
-      <para revision="284589" contrib="sponsor"
-	sponsor="&scaleengine;">The &man.fstyp.8; utility has been
-	updated to be able to detect &man.zfs.8; and &man.geli.8;
-	filesystems.</para>
-
-      <para revision="284883">The &man.mkimg.1; utility has been
-	updated to include support for <literal>NTFS</literal>
-	filesystems in both <acronym>MBR</acronym> and
-	<acronym>GPT</acronym> partitioning schemes.</para>
-
-      <para revision="285253">The &man.quota.1; utility has been
-	updated to include support for <acronym>IPv6</acronym>.</para>
-
-      <para revision="285420">The &man.jexec.8; utility has been
-	updated to include a new flag, <literal>-l</literal>, which
-	ensures a clean environment in the target jail when used.
-	Additionally, &man.jexec.8; will run a shell within the target
-	jail when run no commands are specified.</para>
-
-      <para revision="285550">The &man.w.1; utility has been updated
-	to display the full IPv6 remote address of the host from which
-	a user is connected.</para>
-
-      <para revision="285685">The &man.jail.8; framework has been
-	updated to allow mounting &man.linprocfs.5; and
-	&man.linsysfs.5; within a jail.</para>
-
-      <para revision="285772" contrib="sponsor"
-	sponsor="&emcisilon;">The &man.patch.1; utility has been
-	updated to include a new option to the <literal>-V</literal>
-	flag, <literal>none</literal>, which disables backup file
-	creation when applying a patch.</para>
-
-      <para revision="286010" contrib="sponsor" sponsor="&ff;">The
-	&man.ar.1; utility now enables deterministic mode
-	(<literal>-D</literal>) by default.  This behavior can be
-	disabled by specifying the <literal>-U</literal> flag.</para>
-
-      <para revision="286289" contrib="sponsor"
-	sponsor="&scaleengine;">The &man.xargs.1; utility has been
-	updated to allow specifying <literal>0</literal> as an
-	argument to the <literal>-P</literal> (parallel mode) flag,
-	which allows creating as many concurrent processes as
-	possible.</para>
-
-      <para revision="286795">The &man.patch.1; utility has been
-	updated to remove the automatic checkout feature.</para>
-
-      <para revision="287473" contrib="sponsor" sponsor="&gandi;">A
-	new utility, &man.sesutil.8;, has been added, which is used
-	to manage &man.ses.4; devices.</para>
-
-      <para revision="287522">The &man.pciconf.8; utility has been
-	updated to use the PCI ID database from the <filename
-	  role="package">misc/pciids</filename> package, if present,
-	falling back to the PCI ID database in the &os; base
-	system.</para>
-
-      <para revision="287842" contrib="sponsor"
-	sponsor="&scaleengine;">The &man.ifconfig.8; utility has been
-	updated to always exit with an error code if an important
-	&man.ioctl.2; fails.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="userland-contrib">
       <title>Contributed Software</title>
 
-      <para revision="260445">&man.byacc.1; has been updated to
-	version 20140101.</para>
-
-      <para revision="296633"><application>OpenSSH</application> has
-	been updated to 7.2p2.</para>
-
-      <para revision="261344"><application>mdocml</application> has
-	been updated to version 1.12.3.</para>
-
-      <para revision="275718">The <application>binutils</application>
-	suite of utilities has been updated to include upstream
-	patches that add new relocations for &arch.powerpc;
-	support.</para>
-
-      <para revision="276398" contrib="sponsor" sponsor="&ff;">The
-	<application>ELF Tool Chain</application> has been updated to
-	upstream revision r3136.</para>
-
-      <para revision="276551">The <application>texinfo</application>
-	utility and <literal>info</literal> pages were removed from
-	the base system.  The <filename
-	  role="package">print/texinfo</filename> port should be
-	installed on systems where <literal>info</literal> pages are
-	needed.</para>
-
-      <para revision="276796" contrib="sponsor" sponsor="&ff;">The ELF
-	object manipulation tools
-	<application>addr2line</application>,
-	<application>elfcopy (strip)</application>,
-	<application>nm</application>,
-	<application>readelf</application>,
-	<application>size</application>, and
-	<application>strings</application> were switched to the
-	versions from the ELF Tool Chain project.</para>
-
-      <para revision="276881">The <literal>libedit</literal> library
-	has been updated to include <acronym>UTF-8</acronym> support,
-	adding <acronym>UTF-8</acronym> support to the &man.sh.1;
-	shell.</para>
-
-      <para revision="278433">The &man.xz.1; utility has been updated
-	to support multi-threaded compression.</para>
-
-      <para revision="280932" contrib="sponsor" sponsor="&ff;">The
-	<application>elftoolchain</application> utilities have been
-	updated to version 3179.</para>
-
-      <para revision="291125">The &man.xz.1; utility has been updated
-	to version 5.2.2.</para>
-
-      <para revision="281373">The &man.nvi.1; utility has been updated
-	to version 2.1.3.</para>
-
-      <para revision="289549">The &man.wpa.supplicant.8; and
-	&man.hostapd.8; utilities have been updated to version
-	2.5.</para>
-
-      <para revision="296190" contrib="sponsor" sponsor="&ff;">The
-	&man.resolvconf.8; utility has been updated to version
-	3.7.3.</para>
-
-      <para revision="293190"><application>less</application> has
-	been updated to version v481.</para>
-
-      <para revision="284254"><application>bmake</application> has
-	been updated to version 20150606.</para>
-
-      <para revision="285229"><application>sendmail</application> has
-	been updated to 8.15.2.  Starting with &os;&nbsp;11.0 and
-	sendmail 8.15, sendmail uses uncompressed IPv6 addresses by
-	default, i.e., they will not contain <quote>::</quote>.  For
-	example, instead of <quote>::1</quote>, it will be
-	<quote>0:0:0:0:0:0:0:1</quote>.  This permits a zero subnet to
-	have a more specific match, such as different map entries for
-	IPv6:0:0 versus IPv6:0.  This change requires that
-	configuration data (including maps, files, classes, custom
-	ruleset, etc.) must use the same format, so make certain such
-	configuration data is upgrading.  As a very simple check
-	search for patterns like 'IPv6:[0-9a-fA-F:]*::' and 'IPv6::'.
-	To return to the old behavior, set the m4 option
-	<literal>confUSE_COMPRESSED_IPV6_ADDRESSES</literal> or the cf
-	option <literal>UseCompressedIPv6Addresses</literal>.</para>
-
-      <para revision="285275">The &man.tcpdump.1; utility has been
-	updated to version 4.7.4.</para>
-
-      <para revision="298998"><application>OpenSSL</application> has
-	been updated to version 1.0.2h.</para>
-
-      <para revision="285642" contrib="sponsor" sponsor="&dell;">The
-	&man.ssh.1; utility has been updated to re-implement hostname
-	canonicalization before locating the host in
-	<filename>known_hosts</filename>.</para>
-
-      <para revision="285972">The &man.libarchive.3; library has been
-	updated to properly skip a sparse file entry in a &man.tar.1;
-	file, which would previously produce errors.</para>
-
-      <para revision="286503">The <application>apr</application>
-	library used by &man.svnlite.1; has been updated to version
-	1.5.2.</para>
-
-      <para revision="286505">The <application>serf</application>
-	library used by &man.svnlite.1; has been updated to version
-	1.3.8.</para>
-
-      <para revision="286505">The &man.svnlite.1; utility has been
-	updated to version 1.8.14.</para>
-
-      <para revision="298161">The <application>sqlite3</application>
-	library used by &man.svnlite.1; and &man.kerberos.8; has been
-	updated to version 3.12.1.</para>
-
-      <para revision="286750">Timezone data files have been updated to
-	version 2015f.</para>
-
-      <para revision="287168">The &man.acpi.4; subsystem has been
-	updated to version 20150818.</para>
-
-      <para revision="287917">The &man.unbound.8; utility has been
-	updated to version 1.5.4.</para>
-
-      <para revision="288090">&man.jemalloc.3; has been updated to
-	version 4.0.2.</para>
-
-      <para revision="302221">The &man.file.1; utility has been
-	updated to version 5.28.</para>
-
-      <para revision="288303">The &man.nc.1; utility has been updated
-	to the OpenBSD 5.8 version.</para>
-
-      <para revision="296417"><application>Clang</application> has
-	been updated to version 3.8.0.</para>
-
-      <para revision="296417"><application>LLVM</application> has
-	been updated to version 3.8.0.</para>
-
-      <para revision="296417"><application>LLDB</application> has
-	been updated to version 3.8.0.</para>
-
-      <para revision="296417"><application>libc++</application> has
-	been updated to version 3.8.0.</para>
-
-      <para revision="296417">The
-	<application>compiler_rt</application> utility has been
-	updated to version 3.8.0.</para>
-
-      <para revision="300879"><application>ACPICA</application> has been
-	updated to version 20160527.</para>
-
-      <para revision="292432"><application>OpenBSM</application> has been
-	updated to version 1.2 alpha 4.</para>
-
-      <para revision="301169" contrib="sponsor" sponsor="&ff;">The NetBSD
-	Project's &man.libblacklist.3; library and applications
-	have been ported and integrated into the system.  Packet
-	filtering support for the &man.pf.4; packet filtering systems
-	has been implemented.  The <application>blacklist</application>
-	system provides the <application>blacklistd</application>
-	daemon, the helper script
-	<application>blacklistd-helper</application> to make changes
-	to the running packet filter system and the
-	<application>blacklistctl</application> control program.
-	A selection of system daemons, including:
-	<application>fingerd</application>,
-	<application>ftpd</application>,
-	<application>rlogind</application>, and
-	<application>rshd</application> have been modified to support
-	sending notifications to the <application>blacklistd</application>
-	daemon.</para>
-
-      <para revision="301736" contrib="sponsor" sponsor="&ff;">Support for
-	the &man.ipfw.4; packet filter has been added to the
-	<application>blacklistd-helper</application> script.</para>
-
-      <para revision="301843" contrib="sponsor" sponsor="&ff;">Support for
-	the &man.ipfilter.4; packet filter has been added to the
-	<application>blacklistd-helper</application> script.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="userland-installer">
       <title>Installation and Configuration Tools</title>
 
-      <para revision="271539">The &man.bsdinstall.8; partition editor
-	and &man.sade.8; utility have been updated to include native
-	<acronym>ZFS</acronym> support.</para>
-
-      <para revision="300436" contrib="sponsor" sponsor="&scaleengine;">
-	&man.bsdinstall.8;/zfsboot GPT+BIOS+GELI installs now make use
-	of GELIBOOT which allows ZFS Boot Environments to be used with
-	GELI encrypted ZFS pools.</para>
-
-      <para revision="301973">A module to configure wifi devices has been added
-	to &man.bsdinstall.8;</para>
-
-      <para revision="272274">The &os; installation utility,
-	&man.bsdinstall.8;, has been updated to set the
-	<literal>canmount</literal> &man.zfs.8; property to
-	<literal>off</literal> for the <filename
-	  class="directory">/var</filename> dataset, preventing the
-	contents of directories within <filename
-	  class="directory">/var</filename> from conflicting when
-	using multiple boot environments, such as that provided by
-	<filename role="package">sysutils/beadm</filename>.</para>
-
-      <para revision="274394">The &man.bsdconfig.8; utility has been
-	updated to skip the initial &man.tzsetup.8;
-	<acronym>UTC</acronym> versus wall-clock time prompt when run
-	in a virtual machine, determined when the
-	<literal>kern.vm_guest</literal> &man.sysctl.8; is set to
-	<literal>1</literal>.</para>
-
-      <para revision="275874">The &man.bsdinstall.8; utility has been
-	updated to use the new &man.dpv.3; library to display progress
-	when extracting the &os; distributions.</para>
-
-      <para revision="285557" contrib="sponsor"
-	sponsor="&scaleengine;">Support for detecting and implementing
-	aligning partitions on 1Mb boundaries has been added to
-	&man.bsdinstall.8;.</para>
-
-      <para revision="285679" contrib="sponsor"
-	sponsor="&scaleengine;">Support for detecting and implementing
-	a workaround for various laptops and motherboards that do not
-	boot properly from <acronym>GPT</acronym>-partitioned disks
-	has been added to &man.bsdinstall.8;.  Additionally, the
-	<literal>active</literal> flag will be set on the partition
-	when needed.</para>
-
-      <para revision="285679" contrib="sponsor"
-	sponsor="&scaleengine;">Support for selecting the partitioning
-	scheme when installing on the <acronym>UFS</acronym>
-	filesystem has been added to &man.bsdinstall.8;.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="userland-rc">
       <title><filename class="directory">/etc/rc.d</filename>
 	Scripts</title>
 
-      <para revision="270676">The &man.rc.8; subsystem has been
-	updated to allow configuring services in <filename
-	  class="directory">&dollar;{LOCALBASE}/etc/rc.conf.d/</filename>.
-	If <literal>LOCALBASE</literal> is unset, it defaults to
-	<filename class="directory">/usr/local</filename>.</para>
-
-      <para revision="273955">A new &man.rc.8; script,
-	<filename>growfs</filename>, has been added, which will resize
-	the root filesystem on boot if <filename>/firstboot</filename>
-	exists.</para>
-
-      <para revision="275299">The <filename>mrouted</filename>
-	&man.rc.8; script has been removed from the base system.  An
-	equivalent script is available from the <filename
-	  role="package">net/mrouted</filename> port.</para>
-
-      <para revision="279463" contrib="sponsor"
-	sponsor="&sandvine;">A new &man.rc.8; script,
-	<filename>iovctl</filename>, has been added, which allows
-	automatically starting the &man.iovctl.8; utility at
-	boot.</para>
-
-      <para revision="287576" contrib="sponsor"
-	sponsor="&scaleengine;">The &man.service.8; utility has been
-	updated to honor entries within <filename
-	  class="directory">/etc/rc.conf.d/</filename>.</para>
-
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="userland-periodic">
       <title><filename class="directory">/etc/periodic</filename>
 	Scripts</title>
 
-      <para revision="271321">The daily &man.periodic.8; script
-	<filename>110.clean-tmps</filename> has been updated to avoid
-	crossing filesystem mount boundaries when cleaning files in
-	<filename class="directory">/tmp</filename>.</para>
-
-      <para revision="277216" contrib="sponsor" sponsor="&ff;">A new
-	&man.periodic.8; script,
-	<filename>510.status-world-kernel</filename>, has been added,
-	which evaluates the running userland and kernel versions from
-	the &man.uname.1; <literal>-U</literal> and
-	<literal>-K</literal> arguments, and prints an error if the
-	system userland and kernel are not in sync.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="userland-libraries">
       <title>Runtime Libraries and API</title>
 
-      <para revision="265995">The Blowfish &man.crypt.3; default
-	format has been changed to
-	<literal>&dollar;2b&dollar;</literal>.</para>
-
-      <para revision="268461">The &man.readline.3; library is now
-	statically linked in software within the base system, and the
-	shared library is no longer installed, allowing the Ports
-	Collection to use a modern version of the library.</para>
-
-      <para revision="272273">The &man.strptime.3; library has been
-	updated to add support for <acronym>POSIX</acronym>-2001
-	features <literal>%U</literal> and
-	<literal>%W</literal>.</para>
-
-      <para revision="272842,272848" contrib="sponsor"
-	sponsor="&ff;">The &man.dl.iterate.phdr.3; library has been
-	changed to always return the path name of the
-	<acronym>ELF</acronym> object in the
-	<literal>dlpi_name</literal> structure member.</para>
-
-      <para revision="273562" contrib="sponsor"
-	sponsor="&juniper;">The &man.libxo.3; library has been
-	imported to the base system.</para>
-
-      <para revision="273806" contrib="sponsor" sponsor="&chelsio;">A
-	userland library for Chelsio Terminator 5 based iWARP cards
-	has been added, allowing userland <acronym>RDMA</acronym>
-	applications to work over compatible
-	<acronym>NIC</acronym>s.</para>
-
-      <para revision="274987">The &man.gpio.3; library has been added,
-	providing a wrapper around the &man.gpio.4; kernel
-	interface.</para>
-
-      <para revision="275800" contrib="sponsor" sponsor="&ff;">The
-	&man.procctl.2; system call has been updated to include
-	a facility for non-&man.init.8; processes to be declared as
-	the reaper of child processes and their decendants.</para>
-
-      <para revision="277610">The <literal>futimens()</literal> and
-	<literal>utimensat()</literal> system calls have been
-	added.  See &man.utimensat.2; for more information.</para>
-
-      <para revision="278934">The &man.elf.3; compile-time dependency
-	has been removed from <filename>dtri.o</filename>, which
-	allows adding <application>DTrace</application> probes to
-	userland applications and libraries without also linking
-	against &man.elf.3;.</para>
-
-      <para revision="279186">The &man.setmode.3; function has been
-	updated to consistently set <literal>errno</literal> on
-	failure.</para>
-
-      <para revision="279663">The &man.qsort.3; functions have been
-	updated to be able to handle 32-bit aligned data on 64-bit
-	platforms, also providing a significant improvement in 32-bit
-	workloads.</para>
-
-      <para revision="281130">Several standard include headers have
-	been updated to use of <application>gcc</application>
-	attributes, such as <literal>__result_use_check()</literal>,
-	<literal>__alloc_size()</literal>, and
-	<literal>__nonnull()</literal>.</para>
-
-      <para revision="281845">Support for file verification in
-	<acronym>MAC</acronym> has been added.</para>
-
-      <para revision="282973" contrib="sponsor" sponsor="&ff;">The
-	<literal>libgomp</literal> library is now only built when
-	building <acronym>GCC</acronym> from the base system.  An
-	up-to-date version is available in the Ports Collection as
-	<filename
-	  role="package">devel/libiomp5-devel</filename>.</para>
-
-      <para revision="282988">The <filename>stdlib.h</filename> and
-	<filename>malloc.h</filename> headers have been updated to
-	make use of the <application>gcc</application>
-	<literal>alloc_align()</literal> attribute.</para>
-
-      <para revision="284483" contrib="sponsor"
-	sponsor="&scaleengine;">The Blowfish &man.crypt.3; library
-	has been updated to support &dollar;2y&dollar; hashes.</para>
-
-      <para revision="285277">The &man.execl.3; and &man.execlp.3;
-	library functions have been updated to use the
-	<literal>__sentinel</literal> <application>gcc</application>
-	attribute.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="userland-abi">
       <title>ABI Compatibility</title>
 
-      <para revision="271982">The &linux; compatibility version has
-	been updated to <literal>2.6.18</literal>.  The
-	<literal>compat.linux.osrelease</literal> &man.sysctl.8; is
-	evaluated when building the <filename
-	  role="package">emulators/linux-c6</filename> and related
-	ports.</para>
-
-      <para revision="288669">The stack protector has been upgraded to
-	the "strong" level, elevating the protection against buffer
-	overflows.  While this significantly improves the security of
-	the system, extensive testing was done to ensure there are no
-	measurable side effects in performance or
-	functionality.</para>
+      <para>&nbsp;</para>
     </sect2>
   </sect1>
 
   <sect1 xml:id="kernel">
     <title>Kernel</title>
 
     <para>This section covers changes to kernel configurations, system
       tuning, and system control parameters that are not otherwise
       categorized.</para>
 
     <sect2 xml:id="kernel-bugfix">
       <title>Kernel Bug Fixes</title>
 
-      <para revision="265876">A kernel bug that inhibited proper
-	functionality of the <literal>dev.cpu.0.freq</literal>
-	&man.sysctl.8; on &intel; processors with Turbo
-	Boost&nbsp;&trade; enabled has been fixed.</para>
-
-      <para revision="271697" arch="powerpc">Support for
-	&man.dtrace.1; stack tracing has been fixed for
-	&os;/&arch.powerpc;, using the <literal>trapexit()</literal>
-	and <literal>asttrapexit()</literal> functions instead of
-	checking within addressed kernel space.</para>
-
-      <para revision="271917">A kernel panic triggered when destroying
-	a &man.vnet.9; &man.jail.8; configured with &man.gif.4; has
-	been fixed.</para>
-
-      <para revision="271918">A kernel panic triggered when destroying
-	a &man.vnet.9; &man.jail.8; configured with &man.gre.4; has
-	been fixed.</para>
-
-      <para revision="272089">A bug in &man.ipfw.4; that could
-	potentially lead to a kernel panic when using &man.dummynet.4;
-	at layer 2 has been fixed.</para>
-
-      <para revision="280930" contrib="sponsor" sponsor="&mitail;">The
-	kernel <acronym>RPC</acronym> has been updated to include
-	several enhancements:</para>
-
-      <itemizedlist>
-	<listitem>
-	  <para>The 45 MiB limit on requests queued for
-	    &man.nfsd.8; threads has been removed.</para>
-	</listitem>
-
-	<listitem>
-	  <para>Avoids unnecessary throttling by not deferring
-	    accounting for completed requests.</para>
-	</listitem>
-
-	<listitem>
-	  <para>Fixes an integer overflow and signedness bugs.</para>
-	</listitem>
-      </itemizedlist>
-
-      <para revision="281261" arch="powerpc">Support for
-	&man.dtrace.1; has been added for the
-	Book-E&nbsp;&trade;.</para>
-
-      <para revision="287886" contrib="sponsor"
-	sponsor="&multiplay;">The &man.kqueue.2; system call has been
-	updated to handle write events to files larger than 2
-	gigabytes.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="kernel-config">
       <title>Kernel Configuration</title>
 
-      <para revision="266531">The <literal>IMAGACT_BINMISC</literal>
-	kernel configuration option has been enabled by default,
-	which enables application execution through emulators, such
-	as <application>Qemu</application>.</para>
-
-      <para revision="268045">The <literal>VT</literal> kernel
-	configuration file has been removed, and the &man.vt.4;
-	driver is included in the <literal>GENERIC</literal> kernel.
-	To enable &man.vt.4;, enter <literal>set kern.vty=vt</literal>
-	at the &man.loader.8; prompt during boot, or add
-	<literal>kern.vty=vt</literal> to &man.loader.conf.5; and
-	reboot the system.</para>
-
-      <para revision="277904">The &man.config.8; utility has been
-	updated to allow using a non-standard <filename
-	  class="directory">src/</filename> tree, specified as an
-	argument to the <literal>-s</literal> flag.</para>
-
-      <para revision="277990" arch="powerpc64">The
-	&os;/&arch.powerpc64; kernel now builds as
-	a position-independent executable, allowing the kernel to be
-	loaded into and run from any physical or virtual
-	address.</para>
-
-      <important>
-	<para>This change requires an update to &man.loader.8;.
-	  The userland and kernel must be updated before rebooting the
-	  system.</para>
-      </important>
-
-      <para revision="278338" arch="arm">A new module for creating
-	<filename>rpi.dtb</filename> has been added for the Raspberry
-	Pi.</para>
-
-      <para revision="278340" arch="arm">The
-	<filename>rpi.dtb</filename> module is now installed to
-	<filename class="directory">/boot/dtb/</filename> by
-	default for the Raspberry Pi system.</para>
-
-      <para revision="279189" contrib="sponsor" sponsor="&ff;"
-	arch="powerpc">Kernel support for Vector-Scalar eXtension
-	(<acronym>VSX</acronym>) found on POWER7 and POWER8 hardware
-	has been added.</para>
-
-      <para revision="279252" contrib="sponsor" sponsor="&ff;"
-	arch="powerpc">The &man.pmap.9; implementation for 64-bit
-	&powerpc; processors has been overhaulded to improve
-	concurrency.</para>
-
-      <para revision="279824" arch="arm">A new module for creating
-	the <filename>dtb</filename> module for AM335x systems has
-	been added.</para>
-
-      <para revision="281495" contrib="sponsor" sponsor="&ff;">The
-	<literal>PAE_TABLES</literal> kernel configuration option has
-	been added for &os;/&arch.i386;, which instructs &man.pmap.9;
-	to use <acronym>PAE</acronym> format for page tables while
-	maintaining a 32-bit physical address size elsewhere in the
-	kernel.  The use of this option can enhance application-level
-	security by enabling the creation of <quote>no execute</quote>
-	mappings on modern &arch.i386; processors.  Unlike the
-	<literal>PAE</literal> option, <literal>PAE_TABLES</literal>
-	preserves kernel binary interface (<acronym>KBI</acronym>)
-	compatibility with non-<literal>PAE</literal> kernels,
-	allowing non-<literal>PAE</literal> kernel modules and drivers
-	to work with a <literal>PAE_TABLES</literal>-enabled kernel.
-	Additionally, system limits are tuned for 4GB maximum
-	<acronym>RAM</acronym>, avoiding kernel virtual address space
-	(<acronym>KVA</acronym>) exhaustion.</para>
-
-      <para revision="282215">The <literal>SIFTR</literal> kernel
-	configuration has been added, allowing building &man.siftr.4;
-	statically into the kernel.</para>
-
-      <para revision="282731" arch="arm">The &arch.arm; boot loader,
-	<filename>ubldr</filename>, is now relocatable.  In addition,
-	<filename>ubldr.bin</filename> is now created during build
-	time, which is a stripped binary with an entry point of
-	<literal>0</literal>, providing the ability to specify the
-	load address by running <literal>go
-	  &dollar;{loadaddr}</literal> in
-	<literal>u-boot</literal>.</para>
-
-      <para revision="282921" contrib="sponsor" sponsor="&intelcorp;"
-	arch="amd64,i386">The &man.nvd.4; and &man.nvme.4; drivers are
-	now included in the <filename>GENERIC</filename> kernel
-	configuration by default.</para>
-
-      <para revision="283959" contrib="sponsor"
-	sponsor="&limelight;">A new kernel configuration option,
-	<literal>EM_MULTIQUEUE</literal>, has been added which enables
-	multi-queue support in the &man.em.4; driver.</para>
-
-      <note>
-	<para>Multi-queue support in the &man.em.4; driver is not
-	  officially supported by &intel;.</para>
-      </note>
-
-      <para revision="285142" contrib="sponsor"
-	sponsor="&netgate;">The <filename>GENERIC</filename> kernel
-	configuration has been updated to include the
-	<literal>IPSEC</literal> option by default.</para>
-
-      <para revision="285387" contrib="sponsor"
-	sponsor="&norse;, &dell;">Initial <acronym>NUMA</acronym>
-	affinity and policy configuration has been added.  See
-	&man.numactl.1;, and &man.numa.getaffinity.2;, for usage
-	details.</para>
-
-      <para revision="286231">The &man.pms.4; driver has been added
-	to the <filename>GENERIC</filename> kernel configuration for
-	supported architectures.</para>
-
-      <para revision="287306" arch="arm">The
-	<filename>CUBIEBOARD2</filename> kernel configuration has been
-	renamed to <filename>A20</filename>.</para>
-
-      <para revision="288176" contrib="sponsor" sponsor="&ff;">Kernel
-	debugging symbols are now installed to <filename
-	  class="directory">/usr/lib/debug/boot/kernel/</filename>.
-	To retain the previous behavior, add
-	<literal>KERN_DEBUGDIR=""</literal> to
-	&man.src.conf.5;.</para>
-
-      <para revision="300291" arch="arm64" contrib="sponsor" sponsor="&ff;">
-	NEW_PCIB is enabled by default.</para>
-
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="kernel-sysctl">
       <title>System Tuning and Controls</title>
 
-      <para revision="275140" contrib="sponsor" sponsor="&ff;">The
-	&man.hwpmc.4; default and maximum callchain depths have been
-	increased.  The default has been increased from 16 to 32, and
-	the maximum increased from 32 to 128.</para>
-
-      <para revision="279361">The <literal>kern.osrelease</literal>
-	and <literal>kern.osreldate</literal> are now configurable
-	&man.jail.8; parameters.</para>
-
-      <para revision="280308,280949" contrib="sponsor"
-	sponsor="&ix;, &ff;">The &man.devfs.5; device filesystem has
-	been changed to update timestamps for read/write operations
-	using seconds precision.  A new &man.sysctl.8;,
-	<literal>vfs.devfs.dotimes</literal> has been added, which
-	when set to a non-zero value, enables default precision
-	timestamps for these operations.</para>
-
-      <para revision="282213" contrib="sponsor" sponsor="&ff;">A new
-	&man.sysctl.8;, <literal>kern.racct.enable</literal>, has been
-	added, which when set to a non-zero value allows using
-	&man.rctl.8; with the <literal>GENERIC</literal> kernel.
-	A new kernel configuration option,
-	<literal>RACCT_DISABLED</literal> has also been added.</para>
-
-      <para revision="282901" contrib="sponsor" sponsor="&ff;">The
-	<literal>GENERIC</literal> kernel configuration now includes
-	<literal>RACCT</literal> and <literal>RCTL</literal> by
-	default.</para>
-
-      <note>
-	<para>To enable <literal>RACCT</literal> and
-	  <literal>RCTL</literal> on a system using the
-	  <literal>GENERIC</literal> kernel configuration, add
-	  <literal>kern.racct.enable=1</literal> to
-	  &man.loader.conf.5;, and reboot the system.</para>
-      </note>
-
-      <para revision="283136" contrib="sponsor"
-	sponsor="&limelight;">A new &man.sysctl.8;,
-	<literal>net.inet.tcp.hostcache.purgenow</literal>, has
-	been added, which when set to <literal>1</literal> during
-	runtime will flush all
-	<literal>net.inet.tcp.hostcache</literal> entries.</para>
-
-      <para revision="285524">A new &man.sysctl.8;,
-	<literal>hw.model</literal>, has been added, which displays
-	<acronym>CPU</acronym> model information.</para>
-
-      <para revision="286591">The &man.uart.4; driver has been
-	updated to allow tuning pulses per second captured in the
-	CTS line during runtime, whereas previously only the DCD line
-	could be used without rebuilding the kernel.</para>
+      <para>&nbsp;</para>
     </sect2>
   </sect1>
 
   <sect1 xml:id="drivers">
     <title>Devices and Drivers</title>
 
     <para>This section covers changes and additions to devices and
       device drivers since &release.prev;.</para>
 
     <sect2 xml:id="drivers-device">
       <title>Device Drivers</title>
 
-      <para revision="260903">Support for GPS ports has been added to
-	&man.uhso.4;.</para>
-
-      <para revision="265132">The &man.full.4; device has been added,
-	and the <literal>lindev(4)</literal> device has been removed.
-	Prior to this change, <literal>lindev(4)</literal> provided
-	only the <filename>/dev/full</filename> character device,
-	returning <literal>ENOSPC</literal> on write attempts.  As
-	this device is not specific to &linux;, a native &os; version
-	has been added.</para>
-
-      <para revision="271705">Hardware context support has been
-	added to the <literal>drm/i915</literal> driver, adding
-	support for <application>Mesa</application> 9.2 and
-	later.</para>
-
-      <para revision="273178">The &man.vt.4; driver has been updated,
-	replacing the bitmapped <literal>kern.vt.spclkeys</literal>
-	&man.sysctl.8; with individual
-	<literal>kern.vt.kbd_*</literal> variants.</para>
-
-      <para revision="273598">The &man.hpet.4; driver has been updated
-	to create a
-	<filename>/dev/hpet<replaceable>N</replaceable></filename>
-	device, providing access to <acronym>HPET</acronym> from
-	userspace.</para>
-
-      <para revision="280183">The <literal>drm</literal> code has
-	been updated to match &linux; version 3.8.13.</para>
-
-      <para revision="281440">The &man.psm.4; driver has been updated
-	to include improved support for newer Synaptics&nbsp;&reg;
-	touchpads and the ClickPad&nbsp;&reg; mouse on newer
-	Lenovo&nbsp;&trade; laptops.</para>
-
-      <para revision="282783" arch="powerpc">Support for the Freescale
-	<acronym>PCI</acronym> Root Complex device has been
-	added.</para>
-
-      <para revision="285876">The &man.cyapa.4; driver has been added,
-	supporting the Cypress APA I2C trackpad.</para>
-
-      <para revision="285883">The &man.isl.4; driver has been added,
-	supporting the Intersil I2C ISL29018 digital ambient light
-	sensor.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="drivers-storage">
       <title>Storage Drivers</title>
 
-      <para revision="265236" contrib="sponsor"
-	sponsor="&lsi;, &spectralogic;" sponsorurl="">The &man.mpr.4;
-	device has been added, providing support for LSI Fusion-MPT
-	3 12Gb SCSI/SATA controllers.</para>
-
-      <para revision="265555" contrib="sponsor"
-	  sponsor="&lsi;">The &man.mrsas.4; driver has been added,
-	providing support for LSI MegaRAID SAS controllers.  The
-	&man.mfi.4; driver will attach to the controller, by default.
-	To enable &man.mrsas.4; add
-	<literal>hw.mfi.mrsas_enable=1</literal> to
-	<filename>/boot/loader.conf</filename>, which turns off
-	&man.mfi.4; device probing.</para>
-
-      <note>
-	<para>At this time, the &man.mfiutil.8; utility and the &os;
-	  version of <application>MegaCLI</application> and
-	  <application>StorCli</application> do not work with
-	  &man.mrsas.4;.</para>
-      </note>
-
-      <para revision="275461" contrib="sponsor" sponsor="&ix;">The
-	&man.ctl.4; subsystem has been updated, increasing the ports
-	limit from <literal>128</literal> to <literal>256</literal>,
-	and <acronym>LUN</acronym> limit from <literal>256</literal>
-	to <literal>1024</literal>.</para>
-
-      <para revision="276526">The <literal>asr(4)</literal> driver has
-	been removed, and is no longer supported.</para>
-
-      <para revision="281387">The &man.hptnr.4; driver has been
-	updated to version 1.1.1.</para>
-
-      <para revision="285662">The &man.pms.4; driver has been added,
-	providing support for the PMC Sierra line of
-	<acronym>SAS</acronym>/<acronym>SATA</acronym> host bus
-	adapters.</para>
-
-      <para revision="287117" contrib="sponsor"
-	sponsor="&emcisilon;">The &man.ioat.4; driver has been added,
-	providing support for the <acronym>PSE</acronym> (Platform
-	Storage Extension).</para>
-
-      <para revision="287621" contrib="sponsor" sponsor="&ix;">The
-	<acronym>CTL</acronym> High Availability implementation has
-	been rewritten.</para>
-
-      <para revision="288310">The &man.ctl.4; driver has been updated
-	to support CD-ROM and removable devices.</para>
-
-      <para contrib="sponsor" sponsor="&ix;">The &man.isp.4; driver has
-	been updated and improved: added support for 16Gbps FC cards,
-	improved target mode support, completed Multi-ID (NPIV)
-	functionality.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="drivers-network">
       <title>Network Drivers</title>
 
-      <para revision="258830">Support for Broadcom chipsets BCM57764,
-	BCM57767, BCM57782, BCM57786 and BCM57787 has been added to
-	&man.bge.4;.</para>
-
-      <para revision="260448">Support for the &intel; Centrino&trade;
-	Wireless-N 135 chipset has been added.</para>
-
-      <para revision="260552">Firmware for &intel; Centrino&trade;
-	Wireless-N 105 devices has been added to the base
-	system.</para>
-
-      <para revision="261975">The deprecated nve(4) driver has been
-	removed.  Users of NVIDIA nForce MCP network adapters are
-	advised to use the &man.nfe.4; driver instead, which has been
-	the default driver for this hardware since
-	&os;&nbsp;7.0.</para>
-
-      <para revision="264601" contrib="sponsor"
-	sponsor="&darpa_afrl;">The <literal>if_nf10bmac(4)</literal>
-	device has been added, providing support for NetFPGA-10G
-	Embedded CPU Ethernet Core.</para>
-
-      <note>
-	<para>The <literal>if_nf10bmac(4)</literal> driver operates on
-	  the FPGA, and is not suited for the PCI host
-	  interface.</para>
-      </note>
-
-      <para revision="265348" contrib="sponsor"
-	sponsor="&netgate;">The &man.ath.hal.4; driver has been
-	updated to support the Atheros AR1111 chipset.</para>
-
-      <para revision="266770">Support for the &intel; Centrino&trade;
-	Wireless-N 105 chipset has been added.</para>
-
-      <para revision="266757" contrib="sponsor"
-	sponsor="&chelsio;">Support for the &man.cxgbe.4; Terminator
-	5 (T5) 10G/40G cards has been added to &man.netmap.4;.</para>
-
-      <para revision="272730">The &man.alc.4; driver has been updated
-	to support AR816x and AR817x ethernet controllers.</para>
-
-      <para revision="272906">The &man.pf.4; packet filter default
-	hash has been changed from <literal>Jenkins</literal> to
-	<literal>Murmur3</literal>, providing a 3-percent performance
-	increase in packets-per-second.</para>
-
-      <para revision="273331">The &man.vxlan.4; driver has been added,
-	which creates a virtual Layer 2 (Ethernet) network overlaid in
-	a Layer 3 (IP/UDP) network.  The &man.vxlan.4; driver is
-	analogous to &man.vlan.4;, but is designed to be better suited
-	for large, multiple-tenant datacenter environments.</para>
-
-      <para revision="274246" contrib="sponsor" sponsor="&yandex;">The
-	&man.gre.4; driver has been significantly overhauled, and has
-	been split into two separate modules, &man.gre.4; and
-	&man.me.4;.</para>
-
-      <para revision="278551">The &man.ral.4; driver has been updated
-	to support the RT5390 and RT5392 chipsets.</para>
-
-      <para revision="283514" contrib="sponsor"
-	sponsor="&solarflare;">The &man.sfxge.4; driver has been
-	updated to support Solarflare Flareon Ultra 7000-series
-	chipsets.</para>
-
-      <para revision="283766" contrib="sponsor"
-	sponsor="&limelight;">The &man.em.4; driver has been updated
-	with improved transmission queue hang detection.</para>
-
-      <para revision="284125">The &man.cdce.4; driver has been updated
-	to include support for the RTL8153 chipset.</para>
-
-      <para revision="286441">The &man.iwm.4; driver has been imported
-	from OpenBSD, providing support for &intel; 3160/7260/7265
-	wireless chipsets.</para>
-
-      <para revision="286829" contrib="sponsor"
-	sponsor="&limelight;">The &man.em.4; driver has been updated
-	to allow disabling <acronym>CRC</acronym> stripping.</para>
-
-      <para revision="287222">The &man.pf.4; implementation has been
-	updated to remove support for the <literal>scrub fragment
-	  crop|drop-ovl</literal> filtering rule.  Systems with this
-	rule in &man.pf.conf.5; will implicitly be converted to the
-	<literal>scrub fragment reassemble</literal> filtering rule,
-	without necessary intervention.</para>
-
-      <para revision="288654">The &man.lagg.4; driver has been updated
-	to remove support for the <literal>fec</literal>
-	protocol.</para>
-
-       <para revision="302110" contrib="sponsor" sponsor="&chelsio;">netmap
-	support from the ncxgbe/ncxl interfaces has been merged into the
-	vcxgbe/vcxl interfaces for the &man.cxgbe.4; driver.</para>
-
+      <para>&nbsp;</para>
     </sect2>
   </sect1>
 
   <sect1 xml:id="hardware">
     <title>Hardware Support</title>
 
     <para>This section covers general hardware support for physical
       machines, hypervisors, and virtualization environments, as well
       as hardware changes and updates that do not otherwise fit in
       other sections of this document.</para>
 
     <sect2 xml:id="hardware-support">
       <title>Hardware Support</title>
 
-      <para revision="268303">The &man.asmc.4; driver has been
-	updated to support the &apple;&nbsp;MacMini 3,1.</para>
-
-      <para revision="268351">Support for &os;/ia64 has been dropped
-	as of &os;&nbsp;11.</para>
-
-      <para revision="274386">An issue that could cause a system to
-	hang when entering <acronym>ACPI</acronym>
-	<literal>S3</literal> state (suspend to
-	<acronym>RAM</acronym>) has been corrected in the &man.acpi.4;
-	and &man.pci.4; drivers.</para>
-
-      <para revision="274733" arch="powerpc">The power management unit
-	subsystem has been updated to support power button events on
-	certain &arch.powerpc; hardware, such as aluminum
-	PowerBook&nbsp;&reg;.</para>
-
-      <para revision="275171,275190" arch="powerpc">The &man.hwpmc.4;
-	driver has been updated to correct performance counter
-	sampling on G4 (MPC74xxx) and G5 class processors.</para>
-
-      <para revision="275732" contrib="sponsor"
-	sponsor="&ff;,&netgate;">The
-	<application>OpenCrypto</application> framework has been
-	updated to include <literal>AES-ICM</literal> and
-	<literal>AES-GCM</literal> modes, both of which have also been
-	added to the &man.aesni.4; driver.</para>
-
-      <para revision="281713" arch="powerpc">The &man.hwpmc.4;
-	driver has been updated to support the Freescale e500
-	core.</para>
-
-      <para revision="283766">The &man.ig4.4; driver has been added,
-	providing support for the fourth generation &intel;
-	<acronym>I2C</acronym> SMBus.</para>
-
-      <para>The &man.uart.4; driver has been updated to support
-	<acronym>AMT</acronym> devices on newer systems.</para>
-
-      <para revision="285316" contrib="sponsor" sponsor="&ff;"
-	arch="arm64">Initial <acronym>SMP</acronym> support has been
-	added to the &os;/&arch.arm64; port.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="hardware-virtualization">
       <title>Virtualization Support</title>
 
-      <para revision="260410">Support for the <quote>Virtual Interrupt
-	  Delivery</quote> feature of &intel;&nbsp;VT-x is enabled if
-	supported by the CPU.  This feature can be disabled by running
-	<literal>sysctl hw.vmm.vmx.use_apic_vid=0</literal>.
-	Additionally, to persist this setting across reboots, add
-	<literal>hw.vmm.vmx.use_apic_vid=0</literal> to
-	<filename>/etc/sysctl.conf</filename>.</para>
-
-      <para revision="260532">Support for <quote>Posted Interrupt
-	  Processing</quote> is enabled if supported by the CPU.  This
-	feature can be disabled by running <literal>sysctl
-	  hw.vmm.vmx.use_apic_pir=0</literal>.  Additionally, to
-	persist this setting across reboots, add
-	<literal>hw.vmm.vmx.use_apic_pir=0</literal> to
-	<filename>/etc/sysctl.conf</filename>.</para>
-
-      <para revision="260582">Unmapped IO support has been added to
-	&man.virtio_blk.4;.</para>
-
-      <para revision="260583">Unmapped IO support has been added to
-	&man.virtio_scsi.4;.</para>
-
-      <para revision="260847">The &man.virtio_random.4; driver has
-	been added to harvest entropy from the host system.</para>
-
-      <para revision="261504">&os;/&arch.i386; guests can be run under
-	bhyve.</para>
-
-      <para revision="267536" contrib="sponsor"
-	sponsor="&citrix.rd;">Support for running a &os;/&arch.amd64;
-	<application>Xen</application> guest instance as
-	<acronym>PVH</acronym> guest has been added.
-	<acronym>PVH</acronym> mode, short for <quote>Para-Virtualized
-	  Hardware</quote>, uses para-virtualized drivers for boot and
-	I/O, and uses hardware virtualization extensions for all other
-	tasks, without the need for emulation.</para>
-
-      <para revision="273375">The &man.bhyve.8; hypervisor has been
-	updated to support &amd; processors with
-	<acronym>SVM</acronym> and <acronym>AMD-V</acronym> hardware
-	extensions.</para>
-
-      <para revision="273515">The &man.virtio.console.4; driver has
-	been added, which provides an interface to VirtIO console
-	devices through a &man.tty.4; device.</para>
-
-      <para revision="279957">The &man.bhyve.8; hypervisor has been
-	updated to support <literal>DSM TRIM</literal> commands for
-	virtual <acronym>AHCI</acronym> disks.</para>
-
-      <para revision="302332">Native graphics support has been added to
-	the &man.bhyve.8; hypervisor.</para>
-
-      <para revision="281439" arch="arm">Support for the
-	<application>QEMU</application> <literal>virt</literal> system
-	has been added.</para>
-
-      <para revision="282212" contrib="sponsor" sponsor="&msostc;">The
-	Hyper-V&trade; drivers have been updated with several
-	enhancements:</para>
-
-      <itemizedlist>
-	<listitem>
-	  <para>The &man.hv.vmbus.4; driver now has multi-channel
-	    support.</para>
-	</listitem>
-
-	<listitem>
-	  <para>The &man.hv.storvsc.4; driver now has scatter/gather
-	    support, in addition to performance improvements.</para>
-	</listitem>
-
-	<listitem>
-	  <para>The &man.hv.kvp.4; driver has received several bug
-	    fixes.</para>
-	</listitem>
-      </itemizedlist>
-
-      <para revision="282274">Support for &man.xen.4; para-virtualized
-	<literal>domU</literal> kernels has been removed.</para>
-
-      <para revision="284746" contrib="sponsor" sponsor="&msostc;">The
-	&man.hv.netvsc.4; driver has been updated to support checksum
-	offloading and <acronym>TSO</acronym>.</para>
-
-      <para revision="286062">The &man.xen.4; driver has been updated
-	to include support for <literal>blkif</literal> indirect
-	segment I/O.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="hardware-arm">
       <title>ARM Support</title>
 
-      <para revision="260921">The &man.nand.4; device is enabled for
-	ARM devices by default.</para>
-
-      <para revision="266943" arch="arm">Support for the Exynos 5420
-	Octa system has been added.</para>
-
-      <para revision="267390" arch="arm">The <acronym>SMP</acronym>
-	option has been enabled for all Exynos 5 systems supported by
-	&os;.</para>
-
-      <para revision="268838" arch="arm">Support for the Toradex
-	Apalis i.MX6 development board has been added.</para>
-
-      <para revision="273264" arch="armv6">An issue that could cause
-	instability when detecting <acronym>SD</acronym> cards on the
-	Raspberry Pi <acronym>SOC</acronym> has been fixed.</para>
-
-      <para revision="275963">The <literal>bcm2835_cpufreq</literal>
-	driver has been added, which supports <acronym>CPU</acronym>
-	frequency and voltage control on the Raspberry Pi
-	<acronym>SOC</acronym>.</para>
-
-      <para revision="277042" arch="arm">Support to turn off the
-	BeagleBone Black system with the &man.shutdown.8;
-	<literal>-p</literal> flag or by invoking &man.poweroff.8; has
-	been added.</para>
-
-      <para revision="277644" arch="arm">Audio transmission drivers
-	have been added for Digital Audio Multiplexer
-	(<acronym>AUDMUXM</acronym>), Smart Direct Memory Access
-	Controller (<acronym>SDMA</acronym>), and Syncronous Serial
-	Interface (<acronym>SSI</acronym>).</para>
-
-      <para revision="280259" contrib="sponsor" sponsor="&ff;">Initial
-	support for the ARM AArch64 architecture has been
-	added.</para>
-
-      <para revision="282779" arch="arm">Kernel support for Thumb-2
-	userland has been added.</para>
-
-      <para revision="282827">Support for the hardware power button
-	on the BeagleBone Black system has been added.</para>
-
-      <para revision="284273" contrib="sponsor"
-	sponsor="&ff;">Initial
-	<acronym>ACPI</acronym> support has been added for
-	&os;/&arch.arm64;.</para>
-
-      <para revision="287225">Support for 1-Wire devices has been
-	added, providing support for 1-Wire hardware through
-	&man.gpio.4;.  See &man.ow.4;, &man.owc.4;, and
-	&man.ow.temp.4; for more information.</para>
-
-      <para revision="287371" arch="arm64" contrib="sponsor"
-	sponsor="&abt;">Support for the HiSilicon HI6220 SoC has been
-	added.</para>
-
-      <para revision="263698" arch="arm">The second CPU core on
-	Allwinner A20 SoC have been enabled.</para>
-
-       <para revision="299688" arch="arm">Support for the Allwinner H3 SoC
-	has been added.</para>
-
-      <para revision="299786" arch="arm">Support for X-Powers AXP813 and
-	AXP818 power management integrated circuits have been added.</para>
-
-      <para revision="300777" arch="arm">Support for GPIO, Sensors and
-	interrupts on AXP209 power management integrated circuits have been
-	added.</para>
-
+      <para>&nbsp;</para>
     </sect2>
   </sect1>
 
   <sect1 xml:id="storage">
     <title>Storage</title>
 
     <para>This section covers changes and additions to file systems
       and other storage subsystems, both local and networked.</para>
 
     <sect2 xml:id="storage-general">
       <title>General Storage</title>
 
-      <para revision="278037" contrib="sponsor" sponsor="&ix;">The
-	&man.ctl.4; <acronym>LUN</acronym> mapping has been rewritten,
-	replacing <acronym>iSCSI</acronym>-specific mapping mechanisms
-	with a new mechanism that works for any port.</para>
-
-      <para revision="278354" contrib="sponsor" sponsor="&ix;">The
-	&man.ctld.8; utility has been updated to allow controlling
-	non-<acronym>iSCSI</acronym> &man.ctl.4; ports.</para>
-
-      <para revision="275681" contrib="sponsor" sponsor="&ff;">The
-	&man.autofs.5; subsystem has been updated to include a new
-	&man.auto.master.5; map, <literal>-media</literal>, which
-	allows automatically mounting removable media, such as
-	<acronym>CD</acronym> drives or <acronym>USB</acronym> flash
-	drives.</para>
-
-      <para revision="279955" contrib="sponsor" sponsor="&ff;">The
-	&man.autofs.5; subsystem has been updated to include a new
-	&man.auto.master.5; map, <literal>-noauto</literal>, which
-	handles &man.fstab.5; entries set to
-	<literal>noauto</literal>.</para>
-
-      <para revision="286444">The <acronym>GELI</acronym> class has
-	been updated to support the <literal>BIO_DELETE</literal>
-	&man.g.bio.9; <literal>bio_cmd</literal> field, providing
-	<acronym>TRIM</acronym>/<acronym>UNMAP</acronym> support on
-	<acronym>GELI</acronym>-backed <acronym>SSD</acronym> storage
-	providers.</para>
-
-      <para revision="300880" contrib="sponsor" sponsor="&spectralogic;">
-	Leading spaces are now stripped off <acronym>SCSI</acronym> disk serial
-	numbers when populating the CAM serial number. This affects the output of
-	&man.diskinfo.8; and the names of <filename>/dev/diskid/DISK-*</filename>
-	device nodes, among other things.</para>
-
-      <para revision="300207" contrib="sponsor" sponsor="&spectralogic;">
-	Support for managing Shingled Magnetic Recording (SMR) drives
-	has been added.</para>
-
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="storage-net">
       <title>Networked Storage</title>
 
-      <para revision="270096" contrib="sponsor" sponsor="&ff;">The new
-	filesystem automount facility, &man.autofs.5;, has been added.
-	The new &man.autofs.5; facility is similar to that found in
-	other &unix;-like operating systems, such as OS&nbsp;X&trade;
-	and Solaris&trade;.  The &man.autofs.5; facility uses
-	a &sun;-compatible &man.auto.master.5; configuration file, and
-	is administered with the &man.automount.8; userland utility,
-	and the &man.automountd.8; and &man.autounmountd.8;
-	daemons.</para>
-
-      <para revision="273849" contrib="sponsor" sponsor="&ff;">Support
-	for the <literal>timeo</literal>, <literal>actimeo</literal>,
-	<literal>noac</literal>, and <literal>proto</literal> options
-	have been added to &man.mount.nfs.8;.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="storage-zfs">
       <title>ZFS</title>
 
-      <para revision="275748">The <literal>arc_meta_limit</literal>
-	statistics are now visible through the
-	<literal>kstat</literal> &man.sysctl.8;.  As a result of this
-	change, the <literal>vfs.zfs.arc_meta_used</literal>
-	&man.sysctl.8; has been removed, and replaced with the
-	<literal>kstat.zfs.misc.arcstats.arc_meta_used</literal>
-	&man.sysctl.8;.</para>
-
-      <para revision="287099" contrib="sponsor"
-	sponsor="&clusterhq;">The &man.zfs.8; <literal>l2arc</literal>
-	code has been updated to take <literal>ashift</literal> into
-	account when gathering buffers to be written to the
-	<literal>l2arc</literal> device.</para>
-
-      <para revision="300906" contrib="sponsor"
-	sponsor="&ix;, &spectralogic;">The zfsd daemon has been added,
-	which manages hotspares and replements in drive slots that publish
-	physical paths.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="storage-geom">
       <title>&man.geom.4;</title>
 
-      <para revision="267359">Support for the
-	<literal>disklabel64</literal> partitioning scheme has been
-	added to &man.gpart.8;.</para>
-
-      <para revision="282465">Support for the
-	<literal>apple-boot</literal>, <literal>apple-hfs</literal>,
-	and <literal>apple-ufs</literal> <acronym>MBR</acronym>
-	partitioning schemes have been added to &man.gpart.8;.</para>
-
-      <para revision="285594" contrib="sponsor"
-	sponsor="&scaleengine;">The &man.gpart.8; utility has been
-	updated to include a new attribute for <acronym>GPT</acronym>
-	partitions, <literal>lenovofix</literal>, which when set,
-	which works around <acronym>BIOS</acronym> compatibility
-	issues reported on several Lenovo&nbsp;&trade; laptops.</para>
+      <para>&nbsp;</para>
     </sect2>
   </sect1>
 
   <sect1 xml:id="boot">
     <title>Boot Loader Changes</title>
 
     <para>This section covers the boot loader, boot menu, and other
       boot-related changes.</para>
 
     <sect2 xml:id="boot-loader">
       <title>Boot Loader Changes</title>
 
-      <para revision="258431" contrib="sponsor" sponsor="&ff;">The
-	memory test run at boot time on &os;/&arch.amd64; platforms
-	has been disabled by default.</para>
-
-      <para revision="262955">A new &man.ttys.5; class,
-	<literal>3wire</literal>, has been added.  This is similar to
-	the existing terminal classes, but does not have a defined
-	baudrate.</para>
-
-      <para revision="274085">The &man.vt.4; driver has been made the
-	default system console driver.  The &man.syscons.4; driver is
-	still available, and can be enabled by adding
-	<literal>kern.vty=sc</literal> in &man.loader.conf.5;.
-	Alternatively, &man.syscons.4; can be enabled at boot time by
-	entering <literal>set kern.vty=sc</literal> at the
-	&man.loader.8; prompt.</para>
-
-      <para revision="279950">Support for <literal>bzipfs</literal>
-	has been added to the <acronym>EFI</acronym> loader.</para>
-
-      <para revision="281616">The boot loader has been updated to
-	support entering the <acronym>GELI</acronym> passphrase before
-	loading the kernel.  To enable this behavior, add
-	<literal>geom_eli_passphrase_prompt="YES"</literal> to
-	&man.loader.conf.5;.</para>
-
-      <para revision="284683" contrib="sponsor" sponsor="&ff;"
-	arch="arm">The &man.ttys.5; file for &os;/&arch.arm; has been
-	updated to enable <filename>ttyu1</filename>,
-	<filename>ttyu2</filename>, and <filename>ttyu3</filename> by
-	default, if the callin port is an active console port.</para>
+      <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="boot-menu">
       <title>Boot Menu Changes</title>
 
       <para>&nbsp;</para>
     </sect2>
   </sect1>
 
   <sect1 xml:id="network">
     <title>Networking</title>
 
     <para>This section describes changes that affect networking in
       &os;.</para>
 
     <sect2 xml:id="network-protocols">
       <title>Network Protocols</title>
 
-      <para revision="263140">Support for the IPX network transport
-	protocol has been removed, and will not be supported in
-	&os;&nbsp;11 and later releases.</para>
-
-      <para revision="272720" contrib="sponsor"
-	sponsor="&limelight;">Support for <acronym>PLPMTUD</acronym>
-	blackhole detection (<acronym>RFC</acronym> 4821) has been
-	added to the &man.tcp.4; stack, disabled by default.  New
-	control tunables have been added:</para>
-
-      <informaltable frame="none" pgwide="0">
-	<tgroup cols="2">
-	  <colspec colwidth="1*"/>
-	  <colspec colwidth="1*"/>
-	  <thead>
-	    <row>
-	      <entry>Tunable</entry>
-	      <entry>Description</entry>
-	    </row>
-	  </thead>
-
-	  <tbody>
-	    <row>
-	      <entry><literal>net.inet.tcp.pmtud_blackhole_detection</literal></entry>
-	      <entry>Enables or disables <acronym>PLPMTUD</acronym>
-		blackhole detection</entry>
-	    </row>
-
-	    <row>
-	      <entry><literal>net.inet.tcp.pmtud_blackhole_mss</literal></entry>
-	      <entry><acronym>MSS</acronym> to try for IPv4</entry>
-	    </row>
-
-	    <row>
-	      <entry><literal>net.inet.tcp.v6pmtud_blackhole_mss</literal></entry>
-	      <entry><acronym>MSS to try for IPv6</acronym></entry>
-	    </row>
-	  </tbody>
-	</tgroup>
-      </informaltable>
-
-      <para>New monitoring &man.sysctl.8;s haven been added:</para>
-
-      <informaltable frame="none" pgwide="0">
-	<tgroup cols="2">
-	  <colspec colwidth="1*"/>
-	  <colspec colwidth="1*"/>
-	  <thead>
-	    <row>
-	      <entry>Tunable</entry>
-	      <entry>Description</entry>
-	    </row>
-	  </thead>
-
-	  <tbody>
-	    <row>
-	      <entry><literal>net.inet.tcp.pmtud_blackhole_activated</literal></entry>
-	      <entry>Number of times the code was activated to attempt
-		downshifting the <acronym>MSS</acronym></entry>
-	    </row>
-
-	    <row>
-	      <entry><literal>net.inet.tcp.pmtud_blackhole_min_activated</literal></entry>
-	      <entry>Number of times the blackhole
-		<acronym>MSS</acronym> was used in an attempt to
-		downshift</entry>
-	    </row>
-
-	    <row>
-	      <entry><literal>net.inet.tcp.pmtud_blackhole_failed</literal></entry>
-	      <entry>Number of times that the blackhole failed to
-		connect after downshifting the
-		<acronym>MSS</acronym></entry>
-	    </row>
-	  </tbody>
-	</tgroup>
-      </informaltable>
-
-      <para revision="280971" contrib="sponsor"
-	sponsor="&netflix;, &nginx;">Support for <acronym>IP</acronym>
-	identification for atomic datagrams (<acronym>RFC</acronym>
-	6864) has been added.  Support for this feature can be toggled
-	with the <literal>net.inet.ip.rfc6864</literal>
-	&man.sysctl.8;, which is enabled by default.</para>
-
-      <para revision="285336" contrib="sponsor"
-	sponsor="&netgate;">The <acronym>IPSEC</acronym> has been
-	updated to include support for <acronym>AES</acronym> modes on
-	both software-only and hardware-backed (&man.aesni.4;)
-	systems.</para>
-
-      <para revision="287798" contrib="sponsor" sponsor="&dell;">The
-	network stack has been updated to fix handling of
-	<acronym>IPv6</acronym> On-Link redirects.</para>
-
-	<para revision="300240">The net.inet.tcp.ecn.enable sysctl mib has been
-	changed from a binary off/on control to a three way setting.</para>
-
-      <informaltable frame="none" pgwide="0">
-	<tgroup cols="2">
-	  <colspec colwidth="1*"/>
-	  <colspec colwidth="1*"/>
-	  <thead>
-	    <row>
-	      <entry>Value</entry>
-	      <entry>Description</entry>
-	    </row>
-	  </thead>
-
-	  <tbody>
-	    <row>
-	      <entry><literal>0</literal></entry>
-	      <entry>Totally disable ECN.</entry>
-	    </row>
-
-	    <row>
-	      <entry><literal>1</literal></entry>
-	      <entry>Enable ECN if incoming connections request it. Outgoing
-	      connections will request ECN.</entry>
-	    </row>
-
-	    <row>
-	      <entry><literal>2</literal></entry>
-	      <entry>Enable ECN if incoming connections request it. Outgoing
-	      conections will not request ECN.</entry>
-	    </row>
-
-	  </tbody>
-	</tgroup>
-      </informaltable>
-
-      <para revision="300779">Dummynet AQM, an independent implementation of
-      CoDel and FQ-CoDel for ipfw/dummynet has been imported to the base
-      system.</para>
-
+      <para>&nbsp;</para>
     </sect2>
   </sect1>
 
   <sect1 xml:id="ports">
     <title>Ports Collection and Package Infrastructure</title>
 
     <para>This section covers changes to the &os;&nbsp;Ports
       Collection, package infrastructure, and package maintenance and
       installation tools.</para>
 
     <sect2 xml:id="ports-infrastructure">
       <title>Infrastructure Changes</title>
 
       <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="ports-packages ">
       <title>Packaging Changes</title>
 
       <para>&nbsp;</para>
     </sect2>
   </sect1>
 
   <sect1 xml:id="doc">
     <title>Documentation</title>
 
     <para>This section covers changes to the &os;&nbsp;Documentation
       Project sources and toolchain.</para>
 
     <sect2 xml:id="doc-sources">
       <title>Documentation Source Changes</title>
 
       <para>&nbsp;</para>
     </sect2>
 
     <sect2 xml:id="doc-toolchain">
       <title>Documentation Toolchain Changes</title>
 
       <para>&nbsp;</para>
     </sect2>
   </sect1>
 
   <sect1 xml:id="releng">
     <title>Release Engineering and Integration</title>
 
     <para>This section convers changes that are specific to the
       &os;&nbsp;Release Engineering processes.</para>
 
     <sect2 xml:id="releng-changes">
       <title>Integration Changes</title>
 
-      <para revision="277458" contrib="sponsor" sponsor="&ff;">The
-	Release Engineering build tools have been updated to include
-	support for producing virtual machine disk images for various
-	cloud hosting providers.</para>
-
-      <para revision="278926">The Release Engineering build tools have
-	been updated to use multi-threaded &man.xz.1;.  By default,
-	the number of &man.xz.1; threads is set to the number of cores
-	available.</para>
-
-      <para revision="281802" contrib="sponsor" sponsor="&ff;">The
-	Release Engineering build tools have been updated to include
-	support for building &os;/&arch.arm64; virtual machine and
-	memory stick installation images.</para>
-
-      <para revision="282693" contrib="sponsor" sponsor="&ff;">The
-	Release Engineering build tools have been updated to support
-	building &os;/&arch.arm; images without external utilities for
-	supported boards where a corresponding
-	<literal>u-boot</literal> port exists in the Ports
-	Collection.</para>
-
-      <para revision="283307" contrib="sponsor" sponsor="&ff;">The
-	&os;/&arch.i386; memory stick installation images are now
-	created using the &man.mkimg.1; utility, matching the way
-	the &os;/&arch.amd64; images are created.</para>
+      <para>&nbsp;</para>
     </sect2>
   </sect1>
 </article>
Index: projects/clang390-import/sbin/hastd/lzf.h
===================================================================
--- projects/clang390-import/sbin/hastd/lzf.h	(revision 305016)
+++ projects/clang390-import/sbin/hastd/lzf.h	(revision 305017)
@@ -1,211 +1,215 @@
 /*
  * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
  * 
  * Redistribution and use in source and binary forms, with or without modifica-
  * tion, are permitted provided that the following conditions are met:
  * 
  *   1.  Redistributions of source code must retain the above copyright notice,
  *       this list of conditions and the following disclaimer.
  * 
  *   2.  Redistributions in binary form must reproduce the above copyright
  *       notice, this list of conditions and the following disclaimer in the
  *       documentation and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
  * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
  * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
  * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  * OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Alternatively, the contents of this file may be used under the terms of
  * the GNU General Public License ("GPL") version 2 or any later version,
  * in which case the provisions of the GPL are applicable instead of
  * the above. If you wish to allow the use of your version of this file
  * only under the terms of the GPL and not to allow others to use your
  * version of this file under the BSD license, indicate your decision
  * by deleting the provisions above and replace them with the notice
  * and other provisions required by the GPL. If you do not delete the
  * provisions above, a recipient may use your version of this file under
  * either the BSD or the GPL.
  */
 
 #ifndef LZF_H
 #define LZF_H
 
 /***********************************************************************
 **
 **	lzf -- an extremely fast/free compression/decompression-method
 **	http://liblzf.plan9.de/
 **
 **	This algorithm is believed to be patent-free.
 **
 ***********************************************************************/
 
 #define LZF_VERSION 0x0105 /* 1.5, API version */
 
 /*
  * Compress in_len bytes stored at the memory block starting at
  * in_data and write the result to out_data, up to a maximum length
  * of out_len bytes.
  *
  * If the output buffer is not large enough or any error occurs return 0,
  * otherwise return the number of bytes used, which might be considerably
  * more than in_len (but less than 104% of the original size), so it
  * makes sense to always use out_len == in_len - 1), to ensure _some_
  * compression, and store the data uncompressed otherwise (with a flag, of
  * course.
  *
  * lzf_compress might use different algorithms on different systems and
  * even different runs, thus might result in different compressed strings
  * depending on the phase of the moon or similar factors. However, all
  * these strings are architecture-independent and will result in the
  * original data when decompressed using lzf_decompress.
  *
  * The buffers must not be overlapping.
  *
  * If the option LZF_STATE_ARG is enabled, an extra argument must be
  * supplied which is not reflected in this header file. Refer to lzfP.h
  * and lzf_c.c.
  *
  */
 unsigned int 
 lzf_compress (const void *const in_data,  unsigned int in_len,
               void             *out_data, unsigned int out_len);
 
 /*
  * Decompress data compressed with some version of the lzf_compress
  * function and stored at location in_data and length in_len. The result
  * will be stored at out_data up to a maximum of out_len characters.
  *
  * If the output buffer is not large enough to hold the decompressed
  * data, a 0 is returned and errno is set to E2BIG. Otherwise the number
  * of decompressed bytes (i.e. the original length of the data) is
  * returned.
  *
  * If an error in the compressed data is detected, a zero is returned and
  * errno is set to EINVAL.
  *
  * This function is very fast, about as fast as a copying loop.
  */
 unsigned int 
 lzf_decompress (const void *const in_data,  unsigned int in_len,
                 void             *out_data, unsigned int out_len);
 
 /*
  * Size of hashtable is (1 << HLOG) * sizeof (char *)
  * decompression is independent of the hash table size
  * the difference between 15 and 14 is very small
  * for small blocks (and 14 is usually a bit faster).
  * For a low-memory/faster configuration, use HLOG == 13;
  * For best compression, use 15 or 16 (or more, up to 23).
  */
 #ifndef HLOG
 # define HLOG 16
 #endif
 
 /*
  * Sacrifice very little compression quality in favour of compression speed.
  * This gives almost the same compression as the default code, and is
  * (very roughly) 15% faster. This is the preferred mode of operation.
  */
 #ifndef VERY_FAST
 # define VERY_FAST 1
 #endif
 
 /*
  * Sacrifice some more compression quality in favour of compression speed.
  * (roughly 1-2% worse compression for large blocks and
  * 9-10% for small, redundant, blocks and >>20% better speed in both cases)
  * In short: when in need for speed, enable this for binary data,
  * possibly disable this for text data.
  */
 #ifndef ULTRA_FAST
 # define ULTRA_FAST 0
 #endif
 
 /*
  * Unconditionally aligning does not cost very much, so do it if unsure
  */
 #ifndef STRICT_ALIGN
-# define STRICT_ALIGN !(defined(__i386) || defined (__amd64))
+# if !(defined(__i386) || defined (__amd64))
+#  define STRICT_ALIGN 1
+# else
+#  define STRICT_ALIGN 0
+# endif
 #endif
 
 /*
  * You may choose to pre-set the hash table (might be faster on some
  * modern cpus and large (>>64k) blocks, and also makes compression
  * deterministic/repeatable when the configuration otherwise is the same).
  */
 #ifndef INIT_HTAB
 # define INIT_HTAB 1
 #endif
 
 /*
  * Avoid assigning values to errno variable? for some embedding purposes
  * (linux kernel for example), this is necessary. NOTE: this breaks
  * the documentation in lzf.h.
  */
 #ifndef AVOID_ERRNO
 # define AVOID_ERRNO 0
 #endif
 
 /*
  * Wether to pass the LZF_STATE variable as argument, or allocate it
  * on the stack. For small-stack environments, define this to 1.
  * NOTE: this breaks the prototype in lzf.h.
  */
 #ifndef LZF_STATE_ARG
 # define LZF_STATE_ARG 0
 #endif
 
 /*
  * Wether to add extra checks for input validity in lzf_decompress
  * and return EINVAL if the input stream has been corrupted. This
  * only shields against overflowing the input buffer and will not
  * detect most corrupted streams.
  * This check is not normally noticeable on modern hardware
  * (<1% slowdown), but might slow down older cpus considerably.
  */
 #ifndef CHECK_INPUT
 # define CHECK_INPUT 1
 #endif
 
 /*****************************************************************************/
 /* nothing should be changed below */
 
 typedef unsigned char u8;
 
 typedef const u8 *LZF_STATE[1 << (HLOG)];
 
 #if !STRICT_ALIGN
 /* for unaligned accesses we need a 16 bit datatype. */
 # include <limits.h>
 # if USHRT_MAX == 65535
     typedef unsigned short u16;
 # elif UINT_MAX == 65535
     typedef unsigned int u16;
 # else
 #  undef STRICT_ALIGN
 #  define STRICT_ALIGN 1
 # endif
 #endif
 
 #if ULTRA_FAST
 # if defined(VERY_FAST)
 #  undef VERY_FAST
 # endif
 #endif
 
 #if INIT_HTAB
 # ifdef __cplusplus
 #  include <cstring>
 # else
 #  include <string.h>
 # endif
 #endif
 
 #endif
Index: projects/clang390-import/share/mk/bsd.dep.mk
===================================================================
--- projects/clang390-import/share/mk/bsd.dep.mk	(revision 305016)
+++ projects/clang390-import/share/mk/bsd.dep.mk	(revision 305017)
@@ -1,312 +1,314 @@
 # $FreeBSD$
 #
 # The include file <bsd.dep.mk> handles Makefile dependencies.
 #
 #
 # +++ variables +++
 #
 # CLEANDEPENDDIRS	Additional directories to remove for the cleandepend
 # 			target.
 #
 # CLEANDEPENDFILES	Additional files to remove for the cleandepend target.
 #
 # CTAGS		A tags file generation program [gtags]
 #
 # CTAGSFLAGS	Options for ctags(1) [not set]
 #
 # DEPENDFILE	dependencies file [.depend]
 #
 # GTAGSFLAGS	Options for gtags(1) [-o]
 #
 # HTAGSFLAGS	Options for htags(1) [not set]
 #
 # SRCS          List of source files (c, c++, assembler)
 #
 # DPSRCS	List of source files which are needed for generating
 #		dependencies, ${SRCS} are always part of it.
 #
 # +++ targets +++
 #
 #	cleandepend:
 #		remove ${CLEANDEPENDFILES}; remove ${CLEANDEPENDDIRS} and all
 #		contents.
 #
 #	depend:
 #		Make the dependencies for the source files, and store
 #		them in the file ${DEPENDFILE}.
 #
 #	tags:
 #		In "ctags" mode, create a tags file for the source files.
 #		In "gtags" mode, create a (GLOBAL) gtags file for the
 #		source files.  If HTML is defined, htags(1) is also run
 #		after gtags(1).
 
 .if !target(__<bsd.init.mk>__)
 .error bsd.dep.mk cannot be included directly.
 .endif
 
 CTAGS?=		gtags
 CTAGSFLAGS?=
 GTAGSFLAGS?=	-o
 HTAGSFLAGS?=
 
 .if ${MK_DIRDEPS_BUILD} == "no"
 .MAKE.DEPENDFILE= ${DEPENDFILE}
 .endif
 CLEANDEPENDFILES+=	${DEPENDFILE} ${DEPENDFILE}.*
 .if ${MK_META_MODE} == "yes"
 CLEANDEPENDFILES+=	*.meta
 .endif
 
 # Keep `tags' here, before SRCS are mangled below for `depend'.
 .if !target(tags) && defined(SRCS) && !defined(NO_TAGS)
 tags: ${SRCS}
 .if ${CTAGS:T} == "gtags"
 	@cd ${.CURDIR} && ${CTAGS} ${GTAGSFLAGS} ${.OBJDIR}
 .if defined(HTML)
 	@cd ${.CURDIR} && htags ${HTAGSFLAGS} -d ${.OBJDIR} ${.OBJDIR}
 .endif
 .else
 	@${CTAGS} ${CTAGSFLAGS} -f /dev/stdout \
 	    ${.ALLSRC:N*.h} | sed "s;${.CURDIR}/;;" > ${.TARGET}
 .endif
 .endif
 
 .if !empty(.MAKE.MODE:Mmeta) && empty(.MAKE.MODE:Mnofilemon)
 _meta_filemon=	1
 .endif
 
 # Skip reading .depend when not needed to speed up tree-walks and simple
 # lookups.  For install, only do this if no other targets are specified.
 # Also skip generating or including .depend.* files if in meta+filemon mode
 # since it will track dependencies itself.  OBJS_DEPEND_GUESS is still used.
 .if !empty(.MAKEFLAGS:M-V${_V_READ_DEPEND}) || make(obj) || make(clean*) || \
     ${.TARGETS:M*install*} == ${.TARGETS} || \
     make(analyze) || defined(_meta_filemon)
 _SKIP_READ_DEPEND=	1
 .if ${MK_DIRDEPS_BUILD} == "no"
 .MAKE.DEPENDFILE=	/dev/null
 .endif
 .endif
 
 .if defined(SRCS)
 CLEANFILES?=
 
 .for _S in ${SRCS:N*.[dhly]}
 OBJS_DEPEND_GUESS.${_S:R}.o+=	${_S}
 .endfor
 
 # Lexical analyzers
 .for _LSRC in ${SRCS:M*.l:N*/*}
 .for _LC in ${_LSRC:R}.c
 ${_LC}: ${_LSRC}
 	${LEX} ${LFLAGS} -o${.TARGET} ${.ALLSRC}
 OBJS_DEPEND_GUESS.${_LC:R}.o+=	${_LC}
 SRCS:=	${SRCS:S/${_LSRC}/${_LC}/}
 CLEANFILES+= ${_LC}
 .endfor
 .endfor
 
 # Yacc grammars
 .for _YSRC in ${SRCS:M*.y:N*/*}
 .for _YC in ${_YSRC:R}.c
 SRCS:=	${SRCS:S/${_YSRC}/${_YC}/}
 CLEANFILES+= ${_YC}
 .if !empty(YFLAGS:M-d) && !empty(SRCS:My.tab.h)
 .ORDER: ${_YC} y.tab.h
 y.tab.h: .NOMETA
 ${_YC} y.tab.h: ${_YSRC}
 	${YACC} ${YFLAGS} ${.ALLSRC}
 	cp y.tab.c ${_YC}
 CLEANFILES+= y.tab.c y.tab.h
 .elif !empty(YFLAGS:M-d)
 .for _YH in ${_YC:R}.h
 .ORDER: ${_YC} ${_YH}
 ${_YH}: .NOMETA
 ${_YC} ${_YH}: ${_YSRC}
 	${YACC} ${YFLAGS} -o ${_YC} ${.ALLSRC}
 SRCS+=	${_YH}
 CLEANFILES+= ${_YH}
 .endfor
 .else
 ${_YC}: ${_YSRC}
 	${YACC} ${YFLAGS} -o ${_YC} ${.ALLSRC}
 .endif
 OBJS_DEPEND_GUESS.${_YC:R}.o+=	${_YC}
 .endfor
 .endfor
 
 # DTrace probe definitions
 .if ${SRCS:M*.d}
 CFLAGS+=	-I${.OBJDIR}
 .endif
 .for _DSRC in ${SRCS:M*.d:N*/*}
 .for _D in ${_DSRC:R}
 SRCS+=	${_D}.h
 ${_D}.h: ${_DSRC}
 	${DTRACE} ${DTRACEFLAGS} -h -s ${.ALLSRC}
 SRCS:=	${SRCS:S/^${_DSRC}$//}
 OBJS+=	${_D}.o
 CLEANFILES+= ${_D}.h ${_D}.o
 ${_D}.o: ${_DSRC} ${OBJS:S/^${_D}.o$//}
 	@rm -f ${.TARGET}
 	${DTRACE} ${DTRACEFLAGS} -G -o ${.TARGET} -s ${.ALLSRC:N*.h}
 .if defined(LIB)
 CLEANFILES+= ${_D}.So ${_D}.po
 ${_D}.So: ${_DSRC} ${SOBJS:S/^${_D}.So$//}
 	@rm -f ${.TARGET}
 	${DTRACE} ${DTRACEFLAGS} -G -o ${.TARGET} -s ${.ALLSRC:N*.h}
 ${_D}.po: ${_DSRC} ${POBJS:S/^${_D}.po$//}
 	@rm -f ${.TARGET}
 	${DTRACE} ${DTRACEFLAGS} -G -o ${.TARGET} -s ${.ALLSRC:N*.h}
 .endif
 .endfor
 .endfor
 
 
 .if ${MAKE_VERSION} < 20160220
 DEPEND_MP?=	-MP
 .endif
 # Handle OBJS=../somefile.o hacks.  Just replace '/' rather than use :T to
 # avoid collisions.
 DEPEND_FILTER=	C,/,_,g
 DEPENDSRCS=	${SRCS:M*.[cSC]} ${SRCS:M*.cxx} ${SRCS:M*.cpp} ${SRCS:M*.cc}
 .if !empty(DEPENDSRCS)
 DEPENDOBJS+=	${DEPENDSRCS:R:S,$,.o,}
 .endif
 DEPENDFILES_OBJS=	${DEPENDOBJS:O:u:${DEPEND_FILTER}:C/^/${DEPENDFILE}./}
 DEPEND_CFLAGS+=	-MD ${DEPEND_MP} -MF${DEPENDFILE}.${.TARGET:${DEPEND_FILTER}}
 DEPEND_CFLAGS+=	-MT${.TARGET}
 .if !defined(_meta_filemon)
 .if defined(.PARSEDIR)
 # Only add in DEPEND_CFLAGS for CFLAGS on files we expect from DEPENDOBJS
 # as those are the only ones we will include.
 DEPEND_CFLAGS_CONDITION= "${DEPENDOBJS:M${.TARGET:${DEPEND_FILTER}}}" != ""
 CFLAGS+=	${${DEPEND_CFLAGS_CONDITION}:?${DEPEND_CFLAGS}:}
 .else
 CFLAGS+=	${DEPEND_CFLAGS}
 .endif
 .if !defined(_SKIP_READ_DEPEND)
 .for __depend_obj in ${DEPENDFILES_OBJS}
 .if ${MAKE_VERSION} < 20160220
 .sinclude "${.OBJDIR}/${__depend_obj}"
 .else
 .dinclude "${.OBJDIR}/${__depend_obj}"
 .endif
 .endfor
 .endif	# !defined(_SKIP_READ_DEPEND)
 .endif	# !defined(_meta_filemon)
 .endif	# defined(SRCS)
 
 .if ${MK_DIRDEPS_BUILD} == "yes"
 # Prevent meta.autodep.mk from tracking "local dependencies".
 .depend:
 .include <meta.autodep.mk>
 # If using filemon then _EXTRADEPEND is skipped since it is not needed.
 .if defined(_meta_filemon)
 # this depend: bypasses that below
 # the dependency helps when bootstrapping
 depend: beforedepend ${DPSRCS} ${SRCS} afterdepend
 beforedepend:
 afterdepend: beforedepend
 .endif
 .endif
 
 # Guess some dependencies for when no ${DEPENDFILE}.OBJ is generated yet.
 # For meta+filemon the .meta file is checked for since it is the dependency
 # file used.
 .for __obj in ${DEPENDOBJS:O:u}
 .if (defined(_meta_filemon) && !exists(${.OBJDIR}/${__obj}.meta)) || \
     (!defined(_meta_filemon) && !exists(${.OBJDIR}/${DEPENDFILE}.${__obj}))
 ${__obj}: ${OBJS_DEPEND_GUESS}
 ${__obj}: ${OBJS_DEPEND_GUESS.${__obj}}
 .elif defined(_meta_filemon)
 # For meta mode we still need to know which file to depend on to avoid
 # ambiguous suffix transformation rules from .PATH.  Meta mode does not
 # use .depend files.  We really only need source files, not headers since
 # they are typically in SRCS/beforebuild already.  For target-specific
 # guesses do include headers though since they may not be in SRCS.
 ${__obj}: ${OBJS_DEPEND_GUESS:N*.h}
 ${__obj}: ${OBJS_DEPEND_GUESS.${__obj}}
 .endif
 .endfor
 
 # Always run 'make depend' to generate dependencies early and to avoid the
 # need for manually running it.  The dirdeps build should only do this in
 # sub-makes though since MAKELEVEL0 is for dirdeps calculations.
 .if ${MK_DIRDEPS_BUILD} == "no" || ${.MAKE.LEVEL} > 0
 beforebuild: depend
 .endif
 
 .if !target(depend)
 .if defined(SRCS)
 depend: beforedepend ${DEPENDFILE} afterdepend
 
 # Tell bmake not to look for generated files via .PATH
 .NOPATH: ${DEPENDFILE} ${DEPENDFILES_OBJS}
 
 DPSRCS+= ${SRCS}
 # A .depend file will only be generated if there are commands in
 # beforedepend/_EXTRADEPEND/afterdepend  The _EXTRADEPEND target is
 # ignored if using meta+filemon since it handles all dependencies.  The other
 # targets are kept as they be used for generating something.  The target is
 # kept to allow 'make depend' to generate files.
 ${DEPENDFILE}: ${DPSRCS}
 .if exists(${.OBJDIR}/${DEPENDFILE}) || \
     ((commands(beforedepend) || \
     (!defined(_meta_filemon) && commands(_EXTRADEPEND)) || \
     commands(afterdepend)) && !empty(.MAKE.MODE:Mmeta))
 	rm -f ${DEPENDFILE}
 .endif
 .if !defined(_meta_filemon) && target(_EXTRADEPEND)
 _EXTRADEPEND: .USE
 ${DEPENDFILE}: _EXTRADEPEND
 .endif
 
 .ORDER: ${DEPENDFILE} afterdepend
 .else
 depend: beforedepend afterdepend
 .endif
 .if !target(beforedepend)
 beforedepend:
 .else
 .ORDER: beforedepend ${DEPENDFILE}
 .ORDER: beforedepend afterdepend
 .endif
 .if !target(afterdepend)
 afterdepend:
 .endif
 .endif
 
 .if defined(SRCS)
 .if ${CTAGS:T} == "gtags"
 CLEANDEPENDFILES+=	GPATH GRTAGS GSYMS GTAGS
 .if defined(HTML)
 CLEANDEPENDDIRS+=	HTML
 .endif
 .else
 CLEANDEPENDFILES+=	tags
 .endif
 .endif
 .if !target(cleandepend)
 cleandepend:
 .if !empty(CLEANDEPENDFILES)
 	rm -f ${CLEANDEPENDFILES}
 .endif
 .if !empty(CLEANDEPENDDIRS)
 	rm -rf ${CLEANDEPENDDIRS}
 .endif
 .endif
+.ORDER: cleandepend all
+.ORDER: cleandepend depend
 
 .if !target(checkdpadd) && (defined(DPADD) || defined(LDADD))
 _LDADD_FROM_DPADD=	${DPADD:R:T:C;^lib(.*)$;-l\1;g}
 # Ignore -Wl,--start-group/-Wl,--end-group as it might be required in the
 # LDADD list due to unresolved symbols
 _LDADD_CANONICALIZED=	${LDADD:N:R:T:C;^lib(.*)$;-l\1;g:N-Wl,--[es]*-group}
 checkdpadd:
 .if ${_LDADD_FROM_DPADD} != ${_LDADD_CANONICALIZED}
 	@echo ${.CURDIR}
 	@echo "DPADD -> ${_LDADD_FROM_DPADD}"
 	@echo "LDADD -> ${_LDADD_CANONICALIZED}"
 .endif
 .endif
Index: projects/clang390-import/share/mk/bsd.obj.mk
===================================================================
--- projects/clang390-import/share/mk/bsd.obj.mk	(revision 305016)
+++ projects/clang390-import/share/mk/bsd.obj.mk	(revision 305017)
@@ -1,211 +1,212 @@
 # $FreeBSD$
 #
 # The include file <bsd.obj.mk> handles creating the 'obj' directory
 # and cleaning up object files, etc.
 #
 # +++ variables +++
 #
 # CLEANDIRS	Additional directories to remove for the clean target.
 #
 # CLEANFILES	Additional files to remove for the clean target.
 #
 # MAKEOBJDIR 	A pathname for the directory where the targets
 #		are built.  Note: MAKEOBJDIR is an *environment* variable
 #		and works properly only if set as an environment variable,
 #		not as a global or command line variable!
 #
 #		E.g. use `env MAKEOBJDIR=temp-obj make'
 #
 # MAKEOBJDIRPREFIX  Specifies somewhere other than /usr/obj to root the object
 #		tree.  Note: MAKEOBJDIRPREFIX is an *environment* variable
 #		and works properly only if set as an environment variable,
 #		not as a global or command line variable!
 #
 #		E.g. use `env MAKEOBJDIRPREFIX=/somewhere/obj make'
 #
 # NO_OBJ	Do not create object directories.  This should not be set
 #		if anything is built.
 #
 # +++ targets +++
 #
 #	clean:
 #		remove ${CLEANFILES}; remove ${CLEANDIRS} and all contents.
 #
 #	cleandir:
 #		remove the build directory (and all its contents) created by obj
 #
 #	obj:
 #		create build directory.
 #
 
 .if !target(__<bsd.obj.mk>__)
 __<bsd.obj.mk>__:
 .include <bsd.own.mk>
 
 .if ${MK_AUTO_OBJ} == "yes"
 # it is done by now
 objwarn:
 obj:
 CANONICALOBJDIR= ${.OBJDIR}
 .if defined(NO_OBJ)
 # but this makefile does not want it!
 .OBJDIR: ${.CURDIR}
 .endif
 .elif defined(MAKEOBJDIRPREFIX)
 CANONICALOBJDIR:=${MAKEOBJDIRPREFIX}${.CURDIR}
 .elif defined(MAKEOBJDIR) && ${MAKEOBJDIR:M/*} != ""
 CANONICALOBJDIR:=${MAKEOBJDIR}
 OBJTOP?= ${MAKEOBJDIR}
 .else
 CANONICALOBJDIR:=/usr/obj${.CURDIR}
 .endif
 
 OBJTOP?= ${.OBJDIR:S,${.CURDIR},,}${SRCTOP}
 
 #
 # Warn of unorthodox object directory.
 #
 # The following directories are tried in order for ${.OBJDIR}:
 #
 # 1.  ${MAKEOBJDIRPREFIX}/`pwd`
 # 2.  ${MAKEOBJDIR}
 # 3.  obj.${MACHINE}
 # 4.  obj
 # 5.  /usr/obj/`pwd`
 # 6.  ${.CURDIR}
 #
 # If ${.OBJDIR} is constructed using canonical cases 1 or 5, or
 # case 2 (using MAKEOBJDIR), don't issue a warning.  Otherwise,
 # issue a warning differentiating between cases 6 and (3 or 4).
 #
 objwarn:
 .if !defined(NO_OBJ) && ${.OBJDIR} != ${CANONICALOBJDIR} && \
     !(defined(MAKEOBJDIRPREFIX) && exists(${CANONICALOBJDIR}/)) && \
     !(defined(MAKEOBJDIR) && exists(${MAKEOBJDIR}/))
 .if ${.OBJDIR} == ${.CURDIR}
 	@${ECHO} "Warning: Object directory not changed from original ${.CURDIR}"
 .elif exists(${.CURDIR}/obj.${MACHINE}/) || exists(${.CURDIR}/obj/)
 	@${ECHO} "Warning: Using ${.OBJDIR} as object directory instead of\
 		canonical ${CANONICALOBJDIR}"
 .endif
 .endif
 beforebuild: objwarn
 
 .if !defined(NO_OBJ)
 .if !target(obj)
 obj: .PHONY
 	@if ! test -d ${CANONICALOBJDIR}/; then \
 		mkdir -p ${CANONICALOBJDIR}; \
 		if ! test -d ${CANONICALOBJDIR}/; then \
 			${ECHO} "Unable to create ${CANONICALOBJDIR}."; \
 			exit 1; \
 		fi; \
 		${ECHO} "${CANONICALOBJDIR} created for ${.CURDIR}"; \
 	fi
 .for dir in ${SRCS:H:O:u} ${DPSRCS:H:O:u}
 	@if ! test -d ${CANONICALOBJDIR}/${dir}/; then \
 		mkdir -p ${CANONICALOBJDIR}/${dir}; \
 		if ! test -d ${CANONICALOBJDIR}/${dir}/; then \
 			${ECHO} "Unable to create ${CANONICALOBJDIR}/${dir}."; \
 			exit 1; \
 		fi; \
 		${ECHO} "${CANONICALOBJDIR}/${dir} created for ${.CURDIR}"; \
 	fi
 .endfor
 .endif
 
 .if !target(objlink)
 objlink:
 	@if test -d ${CANONICALOBJDIR}/; then \
 		rm -f ${.CURDIR}/obj; \
 		ln -s ${CANONICALOBJDIR} ${.CURDIR}/obj; \
 	else \
 		echo "No ${CANONICALOBJDIR} to link to - do a make obj."; \
 	fi
 .endif
 .endif # !defined(NO_OBJ)
 
 #
 # where would that obj directory be?
 #
 .if !target(whereobj)
 whereobj:
 	@echo ${.OBJDIR}
 .endif
 
 .if ${CANONICALOBJDIR} != ${.CURDIR} && exists(${CANONICALOBJDIR}/)
 cleanobj:
 	@-rm -rf ${CANONICALOBJDIR}
 .else
 cleanobj: clean cleandepend
 .endif
 	@if [ -L ${.CURDIR}/obj ]; then rm -f ${.CURDIR}/obj; fi
 
 # Tell bmake not to look for generated files via .PATH
 NOPATH_FILES+=	${CLEANFILES}
 .if !empty(NOPATH_FILES)
 .NOPATH: ${NOPATH_FILES}
 .endif
 
 .if !target(clean)
 clean:
 .if defined(CLEANFILES) && !empty(CLEANFILES)
 	rm -f ${CLEANFILES}
 .endif
 .if defined(CLEANDIRS) && !empty(CLEANDIRS)
 	-rm -rf ${CLEANDIRS}
 .endif
 .endif
+.ORDER: clean all
 
 cleandir: cleanobj
 
 .include <bsd.subdir.mk>
 
 .if make(destroy*) && defined(OBJROOT)
 # this (rm -rf objdir) is much faster and more reliable than cleaning.
 
 # just in case we are playing games with these...
 _OBJDIR?= ${.OBJDIR}
 _CURDIR?= ${.CURDIR}
 
 # destroy almost everything
 destroy: destroy-all
 destroy-all:
 
 # just remove our objdir
 destroy-arch: .NOMETA
 .if ${_OBJDIR} != ${_CURDIR}
 	cd ${_CURDIR} && rm -rf ${_OBJDIR}
 .endif
 
 .if defined(HOST_OBJTOP)
 destroy-host: destroy.host
 destroy.host: .NOMETA
 	cd ${_CURDIR} && rm -rf ${HOST_OBJTOP}/${RELDIR:N.}
 .endif
 
 .if make(destroy-all) && ${RELDIR} == "."
 destroy-all: destroy-stage
 .endif
 
 # remove the stage tree
 destroy-stage: .NOMETA
 .if defined(STAGE_ROOT)
 	cd ${_CURDIR} && rm -rf ${STAGE_ROOT}
 .endif
 
 # allow parallel destruction
 _destroy_machine_list = common host ${ALL_MACHINE_LIST}
 .for m in ${_destroy_machine_list:O:u}
 destroy-all: destroy.$m
 .if !target(destroy.$m)
 destroy.$m: .NOMETA
 .if ${_OBJDIR} != ${_CURDIR}
 	cd ${_CURDIR} && rm -rf ${OBJROOT}$m*/${RELDIR:N.}
 .endif
 .endif
 .endfor
 
 .endif
 
 .endif # !target(__<bsd.obj.mk>__)
Index: projects/clang390-import/sys/amd64/amd64/pmap.c
===================================================================
--- projects/clang390-import/sys/amd64/amd64/pmap.c	(revision 305016)
+++ projects/clang390-import/sys/amd64/amd64/pmap.c	(revision 305017)
@@ -1,7272 +1,7265 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2003 Peter Wemm
  * All rights reserved.
  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #define	AMD64_NPT_AWARE
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Manages physical address maps.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_pmap.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/bitstring.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/turnstile.h>
 #include <sys/vmem.h>
 #include <sys/vmmeter.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 
 static __inline boolean_t
 pmap_type_guest(pmap_t pmap)
 {
 
 	return ((pmap->pm_type == PT_EPT) || (pmap->pm_type == PT_RVI));
 }
 
 static __inline boolean_t
 pmap_emulate_ad_bits(pmap_t pmap)
 {
 
 	return ((pmap->pm_flags & PMAP_EMULATE_AD_BITS) != 0);
 }
 
 static __inline pt_entry_t
 pmap_valid_bit(pmap_t pmap)
 {
 	pt_entry_t mask;
 
 	switch (pmap->pm_type) {
 	case PT_X86:
 	case PT_RVI:
 		mask = X86_PG_V;
 		break;
 	case PT_EPT:
 		if (pmap_emulate_ad_bits(pmap))
 			mask = EPT_PG_EMUL_V;
 		else
 			mask = EPT_PG_READ;
 		break;
 	default:
 		panic("pmap_valid_bit: invalid pm_type %d", pmap->pm_type);
 	}
 
 	return (mask);
 }
 
 static __inline pt_entry_t
 pmap_rw_bit(pmap_t pmap)
 {
 	pt_entry_t mask;
 
 	switch (pmap->pm_type) {
 	case PT_X86:
 	case PT_RVI:
 		mask = X86_PG_RW;
 		break;
 	case PT_EPT:
 		if (pmap_emulate_ad_bits(pmap))
 			mask = EPT_PG_EMUL_RW;
 		else
 			mask = EPT_PG_WRITE;
 		break;
 	default:
 		panic("pmap_rw_bit: invalid pm_type %d", pmap->pm_type);
 	}
 
 	return (mask);
 }
 
 static __inline pt_entry_t
 pmap_global_bit(pmap_t pmap)
 {
 	pt_entry_t mask;
 
 	switch (pmap->pm_type) {
 	case PT_X86:
 		mask = X86_PG_G;
 		break;
 	case PT_RVI:
 	case PT_EPT:
 		mask = 0;
 		break;
 	default:
 		panic("pmap_global_bit: invalid pm_type %d", pmap->pm_type);
 	}
 
 	return (mask);
 }
 
 static __inline pt_entry_t
 pmap_accessed_bit(pmap_t pmap)
 {
 	pt_entry_t mask;
 
 	switch (pmap->pm_type) {
 	case PT_X86:
 	case PT_RVI:
 		mask = X86_PG_A;
 		break;
 	case PT_EPT:
 		if (pmap_emulate_ad_bits(pmap))
 			mask = EPT_PG_READ;
 		else
 			mask = EPT_PG_A;
 		break;
 	default:
 		panic("pmap_accessed_bit: invalid pm_type %d", pmap->pm_type);
 	}
 
 	return (mask);
 }
 
 static __inline pt_entry_t
 pmap_modified_bit(pmap_t pmap)
 {
 	pt_entry_t mask;
 
 	switch (pmap->pm_type) {
 	case PT_X86:
 	case PT_RVI:
 		mask = X86_PG_M;
 		break;
 	case PT_EPT:
 		if (pmap_emulate_ad_bits(pmap))
 			mask = EPT_PG_WRITE;
 		else
 			mask = EPT_PG_M;
 		break;
 	default:
 		panic("pmap_modified_bit: invalid pm_type %d", pmap->pm_type);
 	}
 
 	return (mask);
 }
 
 extern	struct pcpu __pcpu[];
 
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
 #else
 #define PMAP_INLINE	extern inline
 #endif
 #else
 #define PMAP_INLINE
 #endif
 
 #ifdef PV_STATS
 #define PV_STAT(x)	do { x ; } while (0)
 #else
 #define PV_STAT(x)	do { } while (0)
 #endif
 
 #define	pa_index(pa)	((pa) >> PDRSHIFT)
 #define	pa_to_pvh(pa)	(&pv_table[pa_index(pa)])
 
 #define	NPV_LIST_LOCKS	MAXCPU
 
 #define	PHYS_TO_PV_LIST_LOCK(pa)	\
 			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
 
 #define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
 	struct rwlock **_lockp = (lockp);		\
 	struct rwlock *_new_lock;			\
 							\
 	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
 	if (_new_lock != *_lockp) {			\
 		if (*_lockp != NULL)			\
 			rw_wunlock(*_lockp);		\
 		*_lockp = _new_lock;			\
 		rw_wlock(*_lockp);			\
 	}						\
 } while (0)
 
 #define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
 			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
 
 #define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
 	struct rwlock **_lockp = (lockp);		\
 							\
 	if (*_lockp != NULL) {				\
 		rw_wunlock(*_lockp);			\
 		*_lockp = NULL;				\
 	}						\
 } while (0)
 
 #define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
 			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
 
 struct pmap kernel_pmap_store;
 
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 
 int nkpt;
 SYSCTL_INT(_machdep, OID_AUTO, nkpt, CTLFLAG_RD, &nkpt, 0,
     "Number of kernel page table pages allocated on bootup");
 
 static int ndmpdp;
 vm_paddr_t dmaplimit;
 vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
 pt_entry_t pg_nx;
 
 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
 
 static int pat_works = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1,
     "Is page attribute table fully functional?");
 
 static int pg_ps_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &pg_ps_enabled, 0, "Are large page mappings enabled?");
 
 #define	PAT_INDEX_SIZE	8
 static int pat_index[PAT_INDEX_SIZE];	/* cache mode to PAT index conversion */
 
 static u_int64_t	KPTphys;	/* phys addr of kernel level 1 */
 static u_int64_t	KPDphys;	/* phys addr of kernel level 2 */
 u_int64_t		KPDPphys;	/* phys addr of kernel level 3 */
 u_int64_t		KPML4phys;	/* phys addr of kernel level 4 */
 
 static u_int64_t	DMPDphys;	/* phys addr of direct mapped level 2 */
 static u_int64_t	DMPDPphys;	/* phys addr of direct mapped level 3 */
 static int		ndmpdpphys;	/* number of DMPDPphys pages */
 
 /*
  * pmap_mapdev support pre initialization (i.e. console)
  */
 #define	PMAP_PREINIT_MAPPING_COUNT	8
 static struct pmap_preinit_mapping {
 	vm_paddr_t	pa;
 	vm_offset_t	va;
 	vm_size_t	sz;
 	int		mode;
 } pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT];
 static int pmap_initialized;
 
 /*
  * Data for the pv entry allocation mechanism.
  * Updates to pv_invl_gen are protected by the pv_list_locks[]
  * elements, but reads are not.
  */
 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
 static struct mtx pv_chunks_mutex;
 static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
 static u_long pv_invl_gen[NPV_LIST_LOCKS];
 static struct md_page *pv_table;
 static struct md_page pv_dummy;
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 pt_entry_t *CMAP1 = 0;
 caddr_t CADDR1 = 0;
 static vm_offset_t qframe = 0;
 static struct mtx qframe_mtx;
 
 static int pmap_flags = PMAP_PDE_SUPERPAGE;	/* flags for x86 pmaps */
 
 int pmap_pcid_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &pmap_pcid_enabled, 0, "Is TLB Context ID enabled ?");
 int invpcid_works = 0;
 SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0,
     "Is the invpcid instruction available ?");
 
 static int
 pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS)
 {
 	int i;
 	uint64_t res;
 
 	res = 0;
 	CPU_FOREACH(i) {
 		res += cpuid_to_pcpu[i]->pc_pm_save_cnt;
 	}
 	return (sysctl_handle_64(oidp, &res, 0, req));
 }
 SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RW |
     CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU",
     "Count of saved TLB context on switch");
 
 static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker =
     LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker);
 static struct mtx invl_gen_mtx;
 static u_long pmap_invl_gen = 0;
 /* Fake lock object to satisfy turnstiles interface. */
 static struct lock_object invl_gen_ts = {
 	.lo_name = "invlts",
 };
 
 #define	PMAP_ASSERT_NOT_IN_DI() \
     KASSERT(curthread->td_md.md_invl_gen.gen == 0, ("DI already started"))
 
 /*
  * Start a new Delayed Invalidation (DI) block of code, executed by
  * the current thread.  Within a DI block, the current thread may
  * destroy both the page table and PV list entries for a mapping and
  * then release the corresponding PV list lock before ensuring that
  * the mapping is flushed from the TLBs of any processors with the
  * pmap active.
  */
 static void
 pmap_delayed_invl_started(void)
 {
 	struct pmap_invl_gen *invl_gen;
 	u_long currgen;
 
 	invl_gen = &curthread->td_md.md_invl_gen;
 	PMAP_ASSERT_NOT_IN_DI();
 	mtx_lock(&invl_gen_mtx);
 	if (LIST_EMPTY(&pmap_invl_gen_tracker))
 		currgen = pmap_invl_gen;
 	else
 		currgen = LIST_FIRST(&pmap_invl_gen_tracker)->gen;
 	invl_gen->gen = currgen + 1;
 	LIST_INSERT_HEAD(&pmap_invl_gen_tracker, invl_gen, link);
 	mtx_unlock(&invl_gen_mtx);
 }
 
 /*
  * Finish the DI block, previously started by the current thread.  All
  * required TLB flushes for the pages marked by
  * pmap_delayed_invl_page() must be finished before this function is
  * called.
  *
  * This function works by bumping the global DI generation number to
  * the generation number of the current thread's DI, unless there is a
  * pending DI that started earlier.  In the latter case, bumping the
  * global DI generation number would incorrectly signal that the
  * earlier DI had finished.  Instead, this function bumps the earlier
  * DI's generation number to match the generation number of the
  * current thread's DI.
  */
 static void
 pmap_delayed_invl_finished(void)
 {
 	struct pmap_invl_gen *invl_gen, *next;
 	struct turnstile *ts;
 
 	invl_gen = &curthread->td_md.md_invl_gen;
 	KASSERT(invl_gen->gen != 0, ("missed invl_started"));
 	mtx_lock(&invl_gen_mtx);
 	next = LIST_NEXT(invl_gen, link);
 	if (next == NULL) {
 		turnstile_chain_lock(&invl_gen_ts);
 		ts = turnstile_lookup(&invl_gen_ts);
 		pmap_invl_gen = invl_gen->gen;
 		if (ts != NULL) {
 			turnstile_broadcast(ts, TS_SHARED_QUEUE);
 			turnstile_unpend(ts, TS_SHARED_LOCK);
 		}
 		turnstile_chain_unlock(&invl_gen_ts);
 	} else {
 		next->gen = invl_gen->gen;
 	}
 	LIST_REMOVE(invl_gen, link);
 	mtx_unlock(&invl_gen_mtx);
 	invl_gen->gen = 0;
 }
 
 #ifdef PV_STATS
 static long invl_wait;
 SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait, CTLFLAG_RD, &invl_wait, 0,
     "Number of times DI invalidation blocked pmap_remove_all/write");
 #endif
 
 static u_long *
 pmap_delayed_invl_genp(vm_page_t m)
 {
 
 	return (&pv_invl_gen[pa_index(VM_PAGE_TO_PHYS(m)) % NPV_LIST_LOCKS]);
 }
 
 /*
  * Ensure that all currently executing DI blocks, that need to flush
  * TLB for the given page m, actually flushed the TLB at the time the
  * function returned.  If the page m has an empty PV list and we call
  * pmap_delayed_invl_wait(), upon its return we know that no CPU has a
  * valid mapping for the page m in either its page table or TLB.
  *
  * This function works by blocking until the global DI generation
  * number catches up with the generation number associated with the
  * given page m and its PV list.  Since this function's callers
  * typically own an object lock and sometimes own a page lock, it
  * cannot sleep.  Instead, it blocks on a turnstile to relinquish the
  * processor.
  */
 static void
 pmap_delayed_invl_wait(vm_page_t m)
 {
 	struct thread *td;
 	struct turnstile *ts;
 	u_long *m_gen;
 #ifdef PV_STATS
 	bool accounted = false;
 #endif
 
 	td = curthread;
 	m_gen = pmap_delayed_invl_genp(m);
 	while (*m_gen > pmap_invl_gen) {
 #ifdef PV_STATS
 		if (!accounted) {
 			atomic_add_long(&invl_wait, 1);
 			accounted = true;
 		}
 #endif
 		ts = turnstile_trywait(&invl_gen_ts);
 		if (*m_gen > pmap_invl_gen)
 			turnstile_wait(ts, NULL, TS_SHARED_QUEUE);
 		else
 			turnstile_cancel(ts);
 	}
 }
 
 /*
  * Mark the page m's PV list as participating in the current thread's
  * DI block.  Any threads concurrently using m's PV list to remove or
  * restrict all mappings to m will wait for the current thread's DI
  * block to complete before proceeding.
  *
  * The function works by setting the DI generation number for m's PV
  * list to at least the DI generation number of the current thread.
  * This forces a caller of pmap_delayed_invl_wait() to block until
  * current thread calls pmap_delayed_invl_finished().
  */
 static void
 pmap_delayed_invl_page(vm_page_t m)
 {
 	u_long gen, *m_gen;
 
 	rw_assert(VM_PAGE_TO_PV_LIST_LOCK(m), RA_WLOCKED);
 	gen = curthread->td_md.md_invl_gen.gen;
 	if (gen == 0)
 		return;
 	m_gen = pmap_delayed_invl_genp(m);
 	if (*m_gen < gen)
 		*m_gen = gen;
 }
 
 /*
  * Crashdump maps.
  */
 static caddr_t crashdumpmap;
 
 static void	free_pv_chunk(struct pv_chunk *pc);
 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
 static int	popcnt_pc_map_pq(uint64_t *map);
 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
 static void	reserve_pv_entries(pmap_t pmap, int needed,
 		    struct rwlock **lockp);
 static void	pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
 		    struct rwlock **lockp);
 static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
 		    struct rwlock **lockp);
 static void	pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
 		    struct rwlock **lockp);
 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
 		    vm_offset_t va);
 
 static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
 static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
 static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde,
     vm_offset_t va, struct rwlock **lockp);
 static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
     vm_offset_t va);
 static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, struct rwlock **lockp);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
 static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
 static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
 static void pmap_pde_attr(pd_entry_t *pde, int cache_bits, int mask);
 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
     struct rwlock **lockp);
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
     vm_prot_t prot);
 static void pmap_pte_attr(pt_entry_t *pte, int cache_bits, int mask);
 static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
     struct spglist *free, struct rwlock **lockp);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
     pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
 static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
 static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
     struct spglist *free);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m, struct rwlock **lockp);
 static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
     pd_entry_t newpde);
 static void pmap_update_pde_invalidate(pmap_t, vm_offset_t va, pd_entry_t pde);
 
 static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex,
 		struct rwlock **lockp);
 static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va,
 		struct rwlock **lockp);
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va,
 		struct rwlock **lockp);
 
 static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct spglist *free);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
 static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
 
 /*
  * Move the kernel virtual free pointer to the next
  * 2MB.  This is used to help improve performance
  * by using a large (2MB) page for much of the kernel
  * (.text, .data, .bss)
  */
 static vm_offset_t
 pmap_kmem_choose(vm_offset_t addr)
 {
 	vm_offset_t newaddr = addr;
 
 	newaddr = roundup2(addr, NBPDR);
 	return (newaddr);
 }
 
 /********************/
 /* Inline functions */
 /********************/
 
 /* Return a non-clipped PD index for a given VA */
 static __inline vm_pindex_t
 pmap_pde_pindex(vm_offset_t va)
 {
 	return (va >> PDRSHIFT);
 }
 
 
 /* Return various clipped indexes for a given VA */
 static __inline vm_pindex_t
 pmap_pte_index(vm_offset_t va)
 {
 
 	return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1));
 }
 
 static __inline vm_pindex_t
 pmap_pde_index(vm_offset_t va)
 {
 
 	return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
 }
 
 static __inline vm_pindex_t
 pmap_pdpe_index(vm_offset_t va)
 {
 
 	return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1));
 }
 
 static __inline vm_pindex_t
 pmap_pml4e_index(vm_offset_t va)
 {
 
 	return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1));
 }
 
 /* Return a pointer to the PML4 slot that corresponds to a VA */
 static __inline pml4_entry_t *
 pmap_pml4e(pmap_t pmap, vm_offset_t va)
 {
 
 	return (&pmap->pm_pml4[pmap_pml4e_index(va)]);
 }
 
 /* Return a pointer to the PDP slot that corresponds to a VA */
 static __inline pdp_entry_t *
 pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va)
 {
 	pdp_entry_t *pdpe;
 
 	pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME);
 	return (&pdpe[pmap_pdpe_index(va)]);
 }
 
 /* Return a pointer to the PDP slot that corresponds to a VA */
 static __inline pdp_entry_t *
 pmap_pdpe(pmap_t pmap, vm_offset_t va)
 {
 	pml4_entry_t *pml4e;
 	pt_entry_t PG_V;
 
 	PG_V = pmap_valid_bit(pmap);
 	pml4e = pmap_pml4e(pmap, va);
 	if ((*pml4e & PG_V) == 0)
 		return (NULL);
 	return (pmap_pml4e_to_pdpe(pml4e, va));
 }
 
 /* Return a pointer to the PD slot that corresponds to a VA */
 static __inline pd_entry_t *
 pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va)
 {
 	pd_entry_t *pde;
 
 	pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME);
 	return (&pde[pmap_pde_index(va)]);
 }
 
 /* Return a pointer to the PD slot that corresponds to a VA */
 static __inline pd_entry_t *
 pmap_pde(pmap_t pmap, vm_offset_t va)
 {
 	pdp_entry_t *pdpe;
 	pt_entry_t PG_V;
 
 	PG_V = pmap_valid_bit(pmap);
 	pdpe = pmap_pdpe(pmap, va);
 	if (pdpe == NULL || (*pdpe & PG_V) == 0)
 		return (NULL);
 	return (pmap_pdpe_to_pde(pdpe, va));
 }
 
 /* Return a pointer to the PT slot that corresponds to a VA */
 static __inline pt_entry_t *
 pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va)
 {
 	pt_entry_t *pte;
 
 	pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
 	return (&pte[pmap_pte_index(va)]);
 }
 
 /* Return a pointer to the PT slot that corresponds to a VA */
 static __inline pt_entry_t *
 pmap_pte(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t *pde;
 	pt_entry_t PG_V;
 
 	PG_V = pmap_valid_bit(pmap);
 	pde = pmap_pde(pmap, va);
 	if (pde == NULL || (*pde & PG_V) == 0)
 		return (NULL);
 	if ((*pde & PG_PS) != 0)	/* compat with i386 pmap_pte() */
 		return ((pt_entry_t *)pde);
 	return (pmap_pde_to_pte(pde, va));
 }
 
 static __inline void
 pmap_resident_count_inc(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pmap->pm_stats.resident_count += count;
 }
 
 static __inline void
 pmap_resident_count_dec(pmap_t pmap, int count)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT(pmap->pm_stats.resident_count >= count,
 	    ("pmap %p resident count underflow %ld %d", pmap,
 	    pmap->pm_stats.resident_count, count));
 	pmap->pm_stats.resident_count -= count;
 }
 
 PMAP_INLINE pt_entry_t *
 vtopte(vm_offset_t va)
 {
 	u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
 
 	KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopte on a uva/gpa 0x%0lx", va));
 
 	return (PTmap + ((va >> PAGE_SHIFT) & mask));
 }
 
 static __inline pd_entry_t *
 vtopde(vm_offset_t va)
 {
 	u_int64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
 
 	KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopde on a uva/gpa 0x%0lx", va));
 
 	return (PDmap + ((va >> PDRSHIFT) & mask));
 }
 
 static u_int64_t
 allocpages(vm_paddr_t *firstaddr, int n)
 {
 	u_int64_t ret;
 
 	ret = *firstaddr;
 	bzero((void *)ret, n * PAGE_SIZE);
 	*firstaddr += n * PAGE_SIZE;
 	return (ret);
 }
 
 CTASSERT(powerof2(NDMPML4E));
 
 /* number of kernel PDP slots */
 #define	NKPDPE(ptpgs)		howmany(ptpgs, NPDEPG)
 
 static void
 nkpt_init(vm_paddr_t addr)
 {
 	int pt_pages;
 	
 #ifdef NKPT
 	pt_pages = NKPT;
 #else
 	pt_pages = howmany(addr, 1 << PDRSHIFT);
 	pt_pages += NKPDPE(pt_pages);
 
 	/*
 	 * Add some slop beyond the bare minimum required for bootstrapping
 	 * the kernel.
 	 *
 	 * This is quite important when allocating KVA for kernel modules.
 	 * The modules are required to be linked in the negative 2GB of
 	 * the address space.  If we run out of KVA in this region then
 	 * pmap_growkernel() will need to allocate page table pages to map
 	 * the entire 512GB of KVA space which is an unnecessary tax on
 	 * physical memory.
 	 *
 	 * Secondly, device memory mapped as part of setting up the low-
 	 * level console(s) is taken from KVA, starting at virtual_avail.
 	 * This is because cninit() is called after pmap_bootstrap() but
 	 * before vm_init() and pmap_init(). 20MB for a frame buffer is
 	 * not uncommon.
 	 */
 	pt_pages += 32;		/* 64MB additional slop. */
 #endif
 	nkpt = pt_pages;
 }
 
 static void
 create_pagetables(vm_paddr_t *firstaddr)
 {
 	int i, j, ndm1g, nkpdpe;
 	pt_entry_t *pt_p;
 	pd_entry_t *pd_p;
 	pdp_entry_t *pdp_p;
 	pml4_entry_t *p4_p;
 
 	/* Allocate page table pages for the direct map */
 	ndmpdp = howmany(ptoa(Maxmem), NBPDP);
 	if (ndmpdp < 4)		/* Minimum 4GB of dirmap */
 		ndmpdp = 4;
 	ndmpdpphys = howmany(ndmpdp, NPDPEPG);
 	if (ndmpdpphys > NDMPML4E) {
 		/*
 		 * Each NDMPML4E allows 512 GB, so limit to that,
 		 * and then readjust ndmpdp and ndmpdpphys.
 		 */
 		printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512);
 		Maxmem = atop(NDMPML4E * NBPML4);
 		ndmpdpphys = NDMPML4E;
 		ndmpdp = NDMPML4E * NPDEPG;
 	}
 	DMPDPphys = allocpages(firstaddr, ndmpdpphys);
 	ndm1g = 0;
 	if ((amd_feature & AMDID_PAGE1GB) != 0)
 		ndm1g = ptoa(Maxmem) >> PDPSHIFT;
 	if (ndm1g < ndmpdp)
 		DMPDphys = allocpages(firstaddr, ndmpdp - ndm1g);
 	dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT;
 
 	/* Allocate pages */
 	KPML4phys = allocpages(firstaddr, 1);
 	KPDPphys = allocpages(firstaddr, NKPML4E);
 
 	/*
 	 * Allocate the initial number of kernel page table pages required to
 	 * bootstrap.  We defer this until after all memory-size dependent
 	 * allocations are done (e.g. direct map), so that we don't have to
 	 * build in too much slop in our estimate.
 	 *
 	 * Note that when NKPML4E > 1, we have an empty page underneath
 	 * all but the KPML4I'th one, so we need NKPML4E-1 extra (zeroed)
 	 * pages.  (pmap_enter requires a PD page to exist for each KPML4E.)
 	 */
 	nkpt_init(*firstaddr);
 	nkpdpe = NKPDPE(nkpt);
 
 	KPTphys = allocpages(firstaddr, nkpt);
 	KPDphys = allocpages(firstaddr, nkpdpe);
 
 	/* Fill in the underlying page table pages */
 	/* Nominally read-only (but really R/W) from zero to physfree */
 	/* XXX not fully used, underneath 2M pages */
 	pt_p = (pt_entry_t *)KPTphys;
 	for (i = 0; ptoa(i) < *firstaddr; i++)
 		pt_p[i] = ptoa(i) | X86_PG_RW | X86_PG_V | X86_PG_G;
 
 	/* Now map the page tables at their location within PTmap */
 	pd_p = (pd_entry_t *)KPDphys;
 	for (i = 0; i < nkpt; i++)
 		pd_p[i] = (KPTphys + ptoa(i)) | X86_PG_RW | X86_PG_V;
 
 	/* Map from zero to end of allocations under 2M pages */
 	/* This replaces some of the KPTphys entries above */
 	for (i = 0; (i << PDRSHIFT) < *firstaddr; i++)
 		pd_p[i] = (i << PDRSHIFT) | X86_PG_RW | X86_PG_V | PG_PS |
 		    X86_PG_G;
 
 	/* And connect up the PD to the PDP (leaving room for L4 pages) */
 	pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE));
 	for (i = 0; i < nkpdpe; i++)
 		pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | X86_PG_RW | X86_PG_V |
 		    PG_U;
 
 	/*
 	 * Now, set up the direct map region using 2MB and/or 1GB pages.  If
 	 * the end of physical memory is not aligned to a 1GB page boundary,
 	 * then the residual physical memory is mapped with 2MB pages.  Later,
 	 * if pmap_mapdev{_attr}() uses the direct map for non-write-back
 	 * memory, pmap_change_attr() will demote any 2MB or 1GB page mappings
 	 * that are partially used. 
 	 */
 	pd_p = (pd_entry_t *)DMPDphys;
 	for (i = NPDEPG * ndm1g, j = 0; i < NPDEPG * ndmpdp; i++, j++) {
 		pd_p[j] = (vm_paddr_t)i << PDRSHIFT;
 		/* Preset PG_M and PG_A because demotion expects it. */
 		pd_p[j] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G |
 		    X86_PG_M | X86_PG_A;
 	}
 	pdp_p = (pdp_entry_t *)DMPDPphys;
 	for (i = 0; i < ndm1g; i++) {
 		pdp_p[i] = (vm_paddr_t)i << PDPSHIFT;
 		/* Preset PG_M and PG_A because demotion expects it. */
 		pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G |
 		    X86_PG_M | X86_PG_A;
 	}
 	for (j = 0; i < ndmpdp; i++, j++) {
 		pdp_p[i] = DMPDphys + ptoa(j);
 		pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_U;
 	}
 
 	/* And recursively map PML4 to itself in order to get PTmap */
 	p4_p = (pml4_entry_t *)KPML4phys;
 	p4_p[PML4PML4I] = KPML4phys;
 	p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | PG_U;
 
 	/* Connect the Direct Map slot(s) up to the PML4. */
 	for (i = 0; i < ndmpdpphys; i++) {
 		p4_p[DMPML4I + i] = DMPDPphys + ptoa(i);
 		p4_p[DMPML4I + i] |= X86_PG_RW | X86_PG_V | PG_U;
 	}
 
 	/* Connect the KVA slots up to the PML4 */
 	for (i = 0; i < NKPML4E; i++) {
 		p4_p[KPML4BASE + i] = KPDPphys + ptoa(i);
 		p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V | PG_U;
 	}
 }
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On amd64 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 void
 pmap_bootstrap(vm_paddr_t *firstaddr)
 {
 	vm_offset_t va;
 	pt_entry_t *pte;
 	int i;
 
 	/*
 	 * Create an initial set of page tables to run the kernel in.
 	 */
 	create_pagetables(firstaddr);
 
 	/*
 	 * Add a physical memory segment (vm_phys_seg) corresponding to the
 	 * preallocated kernel page table pages so that vm_page structures
 	 * representing these pages will be created.  The vm_page structures
 	 * are required for promotion of the corresponding kernel virtual
 	 * addresses to superpage mappings.
 	 */
 	vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt));
 
 	virtual_avail = (vm_offset_t) KERNBASE + *firstaddr;
 	virtual_avail = pmap_kmem_choose(virtual_avail);
 
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 
 	/* XXX do %cr0 as well */
 	load_cr4(rcr4() | CR4_PGE);
 	load_cr3(KPML4phys);
 	if (cpu_stdext_feature & CPUID_STDEXT_SMEP)
 		load_cr4(rcr4() | CR4_SMEP);
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
 	PMAP_LOCK_INIT(kernel_pmap);
 	kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
 	kernel_pmap->pm_cr3 = KPML4phys;
 	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 	kernel_pmap->pm_flags = pmap_flags;
 
  	/*
 	 * Initialize the TLB invalidations generation number lock.
 	 */
 	mtx_init(&invl_gen_mtx, "invlgn", NULL, MTX_DEF);
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = vtopte(va);
 
 	/*
 	 * Crashdump maps.  The first page is reused as CMAP1 for the
 	 * memory test.
 	 */
 	SYSMAP(caddr_t, CMAP1, crashdumpmap, MAXDUMPPGS)
 	CADDR1 = crashdumpmap;
 
 	virtual_avail = va;
 
 	/* Initialize the PAT MSR. */
 	pmap_init_pat();
 
 	/* Initialize TLB Context Id. */
 	TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled);
 	if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) {
 		/* Check for INVPCID support */
 		invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID)
 		    != 0;
 		for (i = 0; i < MAXCPU; i++) {
 			kernel_pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN;
 			kernel_pmap->pm_pcids[i].pm_gen = 1;
 		}
 		__pcpu[0].pc_pcid_next = PMAP_PCID_KERN + 1;
 		__pcpu[0].pc_pcid_gen = 1;
 		/*
 		 * pcpu area for APs is zeroed during AP startup.
 		 * pc_pcid_next and pc_pcid_gen are initialized by AP
 		 * during pcpu setup.
 		 */
 		load_cr4(rcr4() | CR4_PCIDE);
 	} else {
 		pmap_pcid_enabled = 0;
 	}
 }
 
 /*
  * Setup the PAT MSR.
  */
 void
 pmap_init_pat(void)
 {
 	int pat_table[PAT_INDEX_SIZE];
 	uint64_t pat_msr;
 	u_long cr0, cr4;
 	int i;
 
 	/* Bail if this CPU doesn't implement PAT. */
 	if ((cpu_feature & CPUID_PAT) == 0)
 		panic("no PAT??");
 
 	/* Set default PAT index table. */
 	for (i = 0; i < PAT_INDEX_SIZE; i++)
 		pat_table[i] = -1;
 	pat_table[PAT_WRITE_BACK] = 0;
 	pat_table[PAT_WRITE_THROUGH] = 1;
 	pat_table[PAT_UNCACHEABLE] = 3;
 	pat_table[PAT_WRITE_COMBINING] = 3;
 	pat_table[PAT_WRITE_PROTECTED] = 3;
 	pat_table[PAT_UNCACHED] = 3;
 
 	/* Initialize default PAT entries. */
 	pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) |
 	    PAT_VALUE(1, PAT_WRITE_THROUGH) |
 	    PAT_VALUE(2, PAT_UNCACHED) |
 	    PAT_VALUE(3, PAT_UNCACHEABLE) |
 	    PAT_VALUE(4, PAT_WRITE_BACK) |
 	    PAT_VALUE(5, PAT_WRITE_THROUGH) |
 	    PAT_VALUE(6, PAT_UNCACHED) |
 	    PAT_VALUE(7, PAT_UNCACHEABLE);
 
 	if (pat_works) {
 		/*
 		 * Leave the indices 0-3 at the default of WB, WT, UC-, and UC.
 		 * Program 5 and 6 as WP and WC.
 		 * Leave 4 and 7 as WB and UC.
 		 */
 		pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6));
 		pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) |
 		    PAT_VALUE(6, PAT_WRITE_COMBINING);
 		pat_table[PAT_UNCACHED] = 2;
 		pat_table[PAT_WRITE_PROTECTED] = 5;
 		pat_table[PAT_WRITE_COMBINING] = 6;
 	} else {
 		/*
 		 * Just replace PAT Index 2 with WC instead of UC-.
 		 */
 		pat_msr &= ~PAT_MASK(2);
 		pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
 		pat_table[PAT_WRITE_COMBINING] = 2;
 	}
 
 	/* Disable PGE. */
 	cr4 = rcr4();
 	load_cr4(cr4 & ~CR4_PGE);
 
 	/* Disable caches (CD = 1, NW = 0). */
 	cr0 = rcr0();
 	load_cr0((cr0 & ~CR0_NW) | CR0_CD);
 
 	/* Flushes caches and TLBs. */
 	wbinvd();
 	invltlb();
 
 	/* Update PAT and index table. */
 	wrmsr(MSR_PAT, pat_msr);
 	for (i = 0; i < PAT_INDEX_SIZE; i++)
 		pat_index[i] = pat_table[i];
 
 	/* Flush caches and TLBs again. */
 	wbinvd();
 	invltlb();
 
 	/* Restore caches and PGE. */
 	load_cr0(cr0);
 	load_cr4(cr4);
 }
 
 /*
  *	Initialize a vm_page's machine-dependent fields.
  */
 void
 pmap_page_init(vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.pv_list);
 	m->md.pat_mode = PAT_WRITE_BACK;
 }
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  */
 void
 pmap_init(void)
 {
 	struct pmap_preinit_mapping *ppim;
 	vm_page_t mpte;
 	vm_size_t s;
 	int error, i, pv_npg;
 
 	/*
 	 * Initialize the vm page array entries for the kernel pmap's
 	 * page table pages.
 	 */ 
 	for (i = 0; i < nkpt; i++) {
 		mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT));
 		KASSERT(mpte >= vm_page_array &&
 		    mpte < &vm_page_array[vm_page_array_size],
 		    ("pmap_init: page table page is out of range"));
 		mpte->pindex = pmap_pde_pindex(KERNBASE) + i;
 		mpte->phys_addr = KPTphys + (i << PAGE_SHIFT);
 	}
 
 	/*
 	 * If the kernel is running on a virtual machine, then it must assume
 	 * that MCA is enabled by the hypervisor.  Moreover, the kernel must
 	 * be prepared for the hypervisor changing the vendor and family that
 	 * are reported by CPUID.  Consequently, the workaround for AMD Family
 	 * 10h Erratum 383 is enabled if the processor's feature set does not
 	 * include at least one feature that is only supported by older Intel
 	 * or newer AMD processors.
 	 */
 	if (vm_guest != VM_GUEST_NO && (cpu_feature & CPUID_SS) == 0 &&
 	    (cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI |
 	    CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP |
 	    AMDID2_FMA4)) == 0)
 		workaround_erratum383 = 1;
 
 	/*
 	 * Are large page mappings enabled?
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
 	if (pg_ps_enabled) {
 		KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
 		    ("pmap_init: can't assign to pagesizes[1]"));
 		pagesizes[1] = NBPDR;
 	}
 
 	/*
 	 * Initialize the pv chunk list mutex.
 	 */
 	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
 
 	/*
 	 * Initialize the pool of pv list locks.
 	 */
 	for (i = 0; i < NPV_LIST_LOCKS; i++)
 		rw_init(&pv_list_locks[i], "pmap pv list");
 
 	/*
 	 * Calculate the size of the pv head table for superpages.
 	 */
 	pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR);
 
 	/*
 	 * Allocate memory for the pv head table for superpages.
 	 */
 	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
 	s = round_page(s);
 	pv_table = (struct md_page *)kmem_malloc(kernel_arena, s,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < pv_npg; i++)
 		TAILQ_INIT(&pv_table[i].pv_list);
 	TAILQ_INIT(&pv_dummy.pv_list);
 
 	pmap_initialized = 1;
 	for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
 		ppim = pmap_preinit_mapping + i;
 		if (ppim->va == 0)
 			continue;
 		/* Make the direct map consistent */
 		if (ppim->pa < dmaplimit && ppim->pa + ppim->sz < dmaplimit) {
 			(void)pmap_change_attr(PHYS_TO_DMAP(ppim->pa),
 			    ppim->sz, ppim->mode);
 		}
 		if (!bootverbose)
 			continue;
 		printf("PPIM %u: PA=%#lx, VA=%#lx, size=%#lx, mode=%#x\n", i,
 		    ppim->pa, ppim->va, ppim->sz, ppim->mode);
 	}
 
 	mtx_init(&qframe_mtx, "qfrmlk", NULL, MTX_SPIN);
 	error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK,
 	    (vmem_addr_t *)&qframe);
 	if (error != 0)
 		panic("qframe allocation failed");
 }
 
 static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
     "2MB page mapping counters");
 
 static u_long pmap_pde_demotions;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD,
     &pmap_pde_demotions, 0, "2MB page demotions");
 
 static u_long pmap_pde_mappings;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
     &pmap_pde_mappings, 0, "2MB page mappings");
 
 static u_long pmap_pde_p_failures;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD,
     &pmap_pde_p_failures, 0, "2MB page promotion failures");
 
 static u_long pmap_pde_promotions;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD,
     &pmap_pde_promotions, 0, "2MB page promotions");
 
 static SYSCTL_NODE(_vm_pmap, OID_AUTO, pdpe, CTLFLAG_RD, 0,
     "1GB page mapping counters");
 
 static u_long pmap_pdpe_demotions;
 SYSCTL_ULONG(_vm_pmap_pdpe, OID_AUTO, demotions, CTLFLAG_RD,
     &pmap_pdpe_demotions, 0, "1GB page demotions");
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 static pt_entry_t
 pmap_swap_pat(pmap_t pmap, pt_entry_t entry)
 {
 	int x86_pat_bits = X86_PG_PTE_PAT | X86_PG_PDE_PAT;
 
 	switch (pmap->pm_type) {
 	case PT_X86:
 	case PT_RVI:
 		/* Verify that both PAT bits are not set at the same time */
 		KASSERT((entry & x86_pat_bits) != x86_pat_bits,
 		    ("Invalid PAT bits in entry %#lx", entry));
 
 		/* Swap the PAT bits if one of them is set */
 		if ((entry & x86_pat_bits) != 0)
 			entry ^= x86_pat_bits;
 		break;
 	case PT_EPT:
 		/*
 		 * Nothing to do - the memory attributes are represented
 		 * the same way for regular pages and superpages.
 		 */
 		break;
 	default:
 		panic("pmap_switch_pat_bits: bad pm_type %d", pmap->pm_type);
 	}
 
 	return (entry);
 }
 
 /*
  * Determine the appropriate bits to set in a PTE or PDE for a specified
  * caching mode.
  */
 static int
 pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde)
 {
 	int cache_bits, pat_flag, pat_idx;
 
 	if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0)
 		panic("Unknown caching mode %d\n", mode);
 
 	switch (pmap->pm_type) {
 	case PT_X86:
 	case PT_RVI:
 		/* The PAT bit is different for PTE's and PDE's. */
 		pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT;
 
 		/* Map the caching mode to a PAT index. */
 		pat_idx = pat_index[mode];
 
 		/* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
 		cache_bits = 0;
 		if (pat_idx & 0x4)
 			cache_bits |= pat_flag;
 		if (pat_idx & 0x2)
 			cache_bits |= PG_NC_PCD;
 		if (pat_idx & 0x1)
 			cache_bits |= PG_NC_PWT;
 		break;
 
 	case PT_EPT:
 		cache_bits = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(mode);
 		break;
 
 	default:
 		panic("unsupported pmap type %d", pmap->pm_type);
 	}
 
 	return (cache_bits);
 }
 
 static int
 pmap_cache_mask(pmap_t pmap, boolean_t is_pde)
 {
 	int mask;
 
 	switch (pmap->pm_type) {
 	case PT_X86:
 	case PT_RVI:
 		mask = is_pde ? X86_PG_PDE_CACHE : X86_PG_PTE_CACHE;
 		break;
 	case PT_EPT:
 		mask = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(0x7);
 		break;
 	default:
 		panic("pmap_cache_mask: invalid pm_type %d", pmap->pm_type);
 	}
 
 	return (mask);
 }
 
 static __inline boolean_t
 pmap_ps_enabled(pmap_t pmap)
 {
 
 	return (pg_ps_enabled && (pmap->pm_flags & PMAP_PDE_SUPERPAGE) != 0);
 }
 
 static void
 pmap_update_pde_store(pmap_t pmap, pd_entry_t *pde, pd_entry_t newpde)
 {
 
 	switch (pmap->pm_type) {
 	case PT_X86:
 		break;
 	case PT_RVI:
 	case PT_EPT:
 		/*
 		 * XXX
 		 * This is a little bogus since the generation number is
 		 * supposed to be bumped up when a region of the address
 		 * space is invalidated in the page tables.
 		 *
 		 * In this case the old PDE entry is valid but yet we want
 		 * to make sure that any mappings using the old entry are
 		 * invalidated in the TLB.
 		 *
 		 * The reason this works as expected is because we rendezvous
 		 * "all" host cpus and force any vcpu context to exit as a
 		 * side-effect.
 		 */
 		atomic_add_acq_long(&pmap->pm_eptgen, 1);
 		break;
 	default:
 		panic("pmap_update_pde_store: bad pm_type %d", pmap->pm_type);
 	}
 	pde_store(pde, newpde);
 }
 
 /*
  * After changing the page size for the specified virtual address in the page
  * table, flush the corresponding entries from the processor's TLB.  Only the
  * calling processor's TLB is affected.
  *
  * The calling thread must be pinned to a processor.
  */
 static void
 pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde)
 {
 	pt_entry_t PG_G;
 
 	if (pmap_type_guest(pmap))
 		return;
 
 	KASSERT(pmap->pm_type == PT_X86,
 	    ("pmap_update_pde_invalidate: invalid type %d", pmap->pm_type));
 
 	PG_G = pmap_global_bit(pmap);
 
 	if ((newpde & PG_PS) == 0)
 		/* Demotion: flush a specific 2MB page mapping. */
 		invlpg(va);
 	else if ((newpde & PG_G) == 0)
 		/*
 		 * Promotion: flush every 4KB page mapping from the TLB
 		 * because there are too many to flush individually.
 		 */
 		invltlb();
 	else {
 		/*
 		 * Promotion: flush every 4KB page mapping from the TLB,
 		 * including any global (PG_G) mappings.
 		 */
 		invltlb_glob();
 	}
 }
 #ifdef SMP
 
 /*
  * For SMP, these functions have to use the IPI mechanism for coherence.
  *
  * N.B.: Before calling any of the following TLB invalidation functions,
  * the calling processor must ensure that all stores updating a non-
  * kernel page table are globally performed.  Otherwise, another
  * processor could cache an old, pre-update entry without being
  * invalidated.  This can happen one of two ways: (1) The pmap becomes
  * active on another processor after its pm_active field is checked by
  * one of the following functions but before a store updating the page
  * table is globally performed. (2) The pmap becomes active on another
  * processor before its pm_active field is checked but due to
  * speculative loads one of the following functions stills reads the
  * pmap as inactive on the other processor.
  * 
  * The kernel page table is exempt because its pm_active field is
  * immutable.  The kernel page table is always active on every
  * processor.
  */
 
 /*
  * Interrupt the cpus that are executing in the guest context.
  * This will force the vcpu to exit and the cached EPT mappings
  * will be invalidated by the host before the next vmresume.
  */
 static __inline void
 pmap_invalidate_ept(pmap_t pmap)
 {
 	int ipinum;
 
 	sched_pin();
 	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
 	    ("pmap_invalidate_ept: absurd pm_active"));
 
 	/*
 	 * The TLB mappings associated with a vcpu context are not
 	 * flushed each time a different vcpu is chosen to execute.
 	 *
 	 * This is in contrast with a process's vtop mappings that
 	 * are flushed from the TLB on each context switch.
 	 *
 	 * Therefore we need to do more than just a TLB shootdown on
 	 * the active cpus in 'pmap->pm_active'. To do this we keep
 	 * track of the number of invalidations performed on this pmap.
 	 *
 	 * Each vcpu keeps a cache of this counter and compares it
 	 * just before a vmresume. If the counter is out-of-date an
 	 * invept will be done to flush stale mappings from the TLB.
 	 */
 	atomic_add_acq_long(&pmap->pm_eptgen, 1);
 
 	/*
 	 * Force the vcpu to exit and trap back into the hypervisor.
 	 */
 	ipinum = pmap->pm_flags & PMAP_NESTED_IPIMASK;
 	ipi_selected(pmap->pm_active, ipinum);
 	sched_unpin();
 }
 
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 	cpuset_t *mask;
 	u_int cpuid, i;
 
 	if (pmap_type_guest(pmap)) {
 		pmap_invalidate_ept(pmap);
 		return;
 	}
 
 	KASSERT(pmap->pm_type == PT_X86,
 	    ("pmap_invalidate_page: invalid type %d", pmap->pm_type));
 
 	sched_pin();
 	if (pmap == kernel_pmap) {
 		invlpg(va);
 		mask = &all_cpus;
 	} else {
 		cpuid = PCPU_GET(cpuid);
 		if (pmap == PCPU_GET(curpmap))
 			invlpg(va);
 		else if (pmap_pcid_enabled)
 			pmap->pm_pcids[cpuid].pm_gen = 0;
 		if (pmap_pcid_enabled) {
 			CPU_FOREACH(i) {
 				if (cpuid != i)
 					pmap->pm_pcids[i].pm_gen = 0;
 			}
 		}
 		mask = &pmap->pm_active;
 	}
 	smp_masked_invlpg(*mask, va);
 	sched_unpin();
 }
 
 /* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */
 #define	PMAP_INVLPG_THRESHOLD	(4 * 1024 * PAGE_SIZE)
 
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	cpuset_t *mask;
 	vm_offset_t addr;
 	u_int cpuid, i;
 
 	if (eva - sva >= PMAP_INVLPG_THRESHOLD) {
 		pmap_invalidate_all(pmap);
 		return;
 	}
 
 	if (pmap_type_guest(pmap)) {
 		pmap_invalidate_ept(pmap);
 		return;
 	}
 
 	KASSERT(pmap->pm_type == PT_X86,
 	    ("pmap_invalidate_range: invalid type %d", pmap->pm_type));
 
 	sched_pin();
 	cpuid = PCPU_GET(cpuid);
 	if (pmap == kernel_pmap) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		mask = &all_cpus;
 	} else {
 		if (pmap == PCPU_GET(curpmap)) {
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
 		} else if (pmap_pcid_enabled) {
 			pmap->pm_pcids[cpuid].pm_gen = 0;
 		}
 		if (pmap_pcid_enabled) {
 			CPU_FOREACH(i) {
 				if (cpuid != i)
 					pmap->pm_pcids[i].pm_gen = 0;
 			}
 		}
 		mask = &pmap->pm_active;
 	}
 	smp_masked_invlpg_range(*mask, sva, eva);
 	sched_unpin();
 }
 
 void
 pmap_invalidate_all(pmap_t pmap)
 {
 	cpuset_t *mask;
 	struct invpcid_descr d;
 	u_int cpuid, i;
 
 	if (pmap_type_guest(pmap)) {
 		pmap_invalidate_ept(pmap);
 		return;
 	}
 
 	KASSERT(pmap->pm_type == PT_X86,
 	    ("pmap_invalidate_all: invalid type %d", pmap->pm_type));
 
 	sched_pin();
 	if (pmap == kernel_pmap) {
 		if (pmap_pcid_enabled && invpcid_works) {
 			bzero(&d, sizeof(d));
 			invpcid(&d, INVPCID_CTXGLOB);
 		} else {
 			invltlb_glob();
 		}
 		mask = &all_cpus;
 	} else {
 		cpuid = PCPU_GET(cpuid);
 		if (pmap == PCPU_GET(curpmap)) {
 			if (pmap_pcid_enabled) {
 				if (invpcid_works) {
 					d.pcid = pmap->pm_pcids[cpuid].pm_pcid;
 					d.pad = 0;
 					d.addr = 0;
 					invpcid(&d, INVPCID_CTX);
 				} else {
 					load_cr3(pmap->pm_cr3 | pmap->pm_pcids
 					    [PCPU_GET(cpuid)].pm_pcid);
 				}
 			} else {
 				invltlb();
 			}
 		} else if (pmap_pcid_enabled) {
 			pmap->pm_pcids[cpuid].pm_gen = 0;
 		}
 		if (pmap_pcid_enabled) {
 			CPU_FOREACH(i) {
 				if (cpuid != i)
 					pmap->pm_pcids[i].pm_gen = 0;
 			}
 		}
 		mask = &pmap->pm_active;
 	}
 	smp_masked_invltlb(*mask, pmap);
 	sched_unpin();
 }
 
 void
 pmap_invalidate_cache(void)
 {
 
 	sched_pin();
 	wbinvd();
 	smp_cache_flush();
 	sched_unpin();
 }
 
 struct pde_action {
 	cpuset_t invalidate;	/* processors that invalidate their TLB */
 	pmap_t pmap;
 	vm_offset_t va;
 	pd_entry_t *pde;
 	pd_entry_t newpde;
 	u_int store;		/* processor that updates the PDE */
 };
 
 static void
 pmap_update_pde_action(void *arg)
 {
 	struct pde_action *act = arg;
 
 	if (act->store == PCPU_GET(cpuid))
 		pmap_update_pde_store(act->pmap, act->pde, act->newpde);
 }
 
 static void
 pmap_update_pde_teardown(void *arg)
 {
 	struct pde_action *act = arg;
 
 	if (CPU_ISSET(PCPU_GET(cpuid), &act->invalidate))
 		pmap_update_pde_invalidate(act->pmap, act->va, act->newpde);
 }
 
 /*
  * Change the page size for the specified virtual address in a way that
  * prevents any possibility of the TLB ever having two entries that map the
  * same virtual address using different page sizes.  This is the recommended
  * workaround for Erratum 383 on AMD Family 10h processors.  It prevents a
  * machine check exception for a TLB state that is improperly diagnosed as a
  * hardware error.
  */
 static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 	struct pde_action act;
 	cpuset_t active, other_cpus;
 	u_int cpuid;
 
 	sched_pin();
 	cpuid = PCPU_GET(cpuid);
 	other_cpus = all_cpus;
 	CPU_CLR(cpuid, &other_cpus);
 	if (pmap == kernel_pmap || pmap_type_guest(pmap)) 
 		active = all_cpus;
 	else {
 		active = pmap->pm_active;
 	}
 	if (CPU_OVERLAP(&active, &other_cpus)) { 
 		act.store = cpuid;
 		act.invalidate = active;
 		act.va = va;
 		act.pmap = pmap;
 		act.pde = pde;
 		act.newpde = newpde;
 		CPU_SET(cpuid, &active);
 		smp_rendezvous_cpus(active,
 		    smp_no_rendevous_barrier, pmap_update_pde_action,
 		    pmap_update_pde_teardown, &act);
 	} else {
 		pmap_update_pde_store(pmap, pde, newpde);
 		if (CPU_ISSET(cpuid, &active))
 			pmap_update_pde_invalidate(pmap, va, newpde);
 	}
 	sched_unpin();
 }
 #else /* !SMP */
 /*
  * Normal, non-SMP, invalidation functions.
  */
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
 	if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) {
 		pmap->pm_eptgen++;
 		return;
 	}
 	KASSERT(pmap->pm_type == PT_X86,
 	    ("pmap_invalidate_range: unknown type %d", pmap->pm_type));
 
 	if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap))
 		invlpg(va);
 	else if (pmap_pcid_enabled)
 		pmap->pm_pcids[0].pm_gen = 0;
 }
 
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
 	if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) {
 		pmap->pm_eptgen++;
 		return;
 	}
 	KASSERT(pmap->pm_type == PT_X86,
 	    ("pmap_invalidate_range: unknown type %d", pmap->pm_type));
 
 	if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 	} else if (pmap_pcid_enabled) {
 		pmap->pm_pcids[0].pm_gen = 0;
 	}
 }
 
 void
 pmap_invalidate_all(pmap_t pmap)
 {
 	struct invpcid_descr d;
 
 	if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) {
 		pmap->pm_eptgen++;
 		return;
 	}
 	KASSERT(pmap->pm_type == PT_X86,
 	    ("pmap_invalidate_all: unknown type %d", pmap->pm_type));
 
 	if (pmap == kernel_pmap) {
 		if (pmap_pcid_enabled && invpcid_works) {
 			bzero(&d, sizeof(d));
 			invpcid(&d, INVPCID_CTXGLOB);
 		} else {
 			invltlb_glob();
 		}
 	} else if (pmap == PCPU_GET(curpmap)) {
 		if (pmap_pcid_enabled) {
 			if (invpcid_works) {
 				d.pcid = pmap->pm_pcids[0].pm_pcid;
 				d.pad = 0;
 				d.addr = 0;
 				invpcid(&d, INVPCID_CTX);
 			} else {
 				load_cr3(pmap->pm_cr3 | pmap->pm_pcids[0].
 				    pm_pcid);
 			}
 		} else {
 			invltlb();
 		}
 	} else if (pmap_pcid_enabled) {
 		pmap->pm_pcids[0].pm_gen = 0;
 	}
 }
 
 PMAP_INLINE void
 pmap_invalidate_cache(void)
 {
 
 	wbinvd();
 }
 
 static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 
 	pmap_update_pde_store(pmap, pde, newpde);
 	if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap))
 		pmap_update_pde_invalidate(pmap, va, newpde);
 	else
 		pmap->pm_pcids[0].pm_gen = 0;
 }
 #endif /* !SMP */
 
 #define PMAP_CLFLUSH_THRESHOLD   (2 * 1024 * 1024)
 
 void
 pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
 {
 
 	if (force) {
 		sva &= ~(vm_offset_t)cpu_clflush_line_size;
 	} else {
 		KASSERT((sva & PAGE_MASK) == 0,
 		    ("pmap_invalidate_cache_range: sva not page-aligned"));
 		KASSERT((eva & PAGE_MASK) == 0,
 		    ("pmap_invalidate_cache_range: eva not page-aligned"));
 	}
 
 	if ((cpu_feature & CPUID_SS) != 0 && !force)
 		; /* If "Self Snoop" is supported and allowed, do nothing. */
 	else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
 	    eva - sva < PMAP_CLFLUSH_THRESHOLD) {
 		/*
 		 * XXX: Some CPUs fault, hang, or trash the local APIC
 		 * registers if we use CLFLUSH on the local APIC
 		 * range.  The local APIC is always uncached, so we
 		 * don't need to flush for that range anyway.
 		 */
 		if (pmap_kextract(sva) == lapic_paddr)
 			return;
 
 		/*
 		 * Otherwise, do per-cache line flush.  Use the mfence
 		 * instruction to insure that previous stores are
 		 * included in the write-back.  The processor
 		 * propagates flush to other processors in the cache
 		 * coherence domain.
 		 */
 		mfence();
 		for (; sva < eva; sva += cpu_clflush_line_size)
 			clflushopt(sva);
 		mfence();
 	} else if ((cpu_feature & CPUID_CLFSH) != 0 &&
 	    eva - sva < PMAP_CLFLUSH_THRESHOLD) {
 		if (pmap_kextract(sva) == lapic_paddr)
 			return;
 		/*
 		 * Writes are ordered by CLFLUSH on Intel CPUs.
 		 */
 		if (cpu_vendor_id != CPU_VENDOR_INTEL)
 			mfence();
 		for (; sva < eva; sva += cpu_clflush_line_size)
 			clflush(sva);
 		if (cpu_vendor_id != CPU_VENDOR_INTEL)
 			mfence();
 	} else {
 
 		/*
 		 * No targeted cache flush methods are supported by CPU,
 		 * or the supplied range is bigger than 2MB.
 		 * Globally invalidate cache.
 		 */
 		pmap_invalidate_cache();
 	}
 }
 
 /*
  * Remove the specified set of pages from the data and instruction caches.
  *
  * In contrast to pmap_invalidate_cache_range(), this function does not
  * rely on the CPU's self-snoop feature, because it is intended for use
  * when moving pages into a different cache domain.
  */
 void
 pmap_invalidate_cache_pages(vm_page_t *pages, int count)
 {
 	vm_offset_t daddr, eva;
 	int i;
 	bool useclflushopt;
 
 	useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0;
 	if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
 	    ((cpu_feature & CPUID_CLFSH) == 0 && !useclflushopt))
 		pmap_invalidate_cache();
 	else {
 		if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL)
 			mfence();
 		for (i = 0; i < count; i++) {
 			daddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pages[i]));
 			eva = daddr + PAGE_SIZE;
 			for (; daddr < eva; daddr += cpu_clflush_line_size) {
 				if (useclflushopt)
 					clflushopt(daddr);
 				else
 					clflush(daddr);
 			}
 		}
 		if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL)
 			mfence();
 	}
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_paddr_t 
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
 	pdp_entry_t *pdpe;
 	pd_entry_t *pde;
 	pt_entry_t *pte, PG_V;
 	vm_paddr_t pa;
 
 	pa = 0;
 	PG_V = pmap_valid_bit(pmap);
 	PMAP_LOCK(pmap);
 	pdpe = pmap_pdpe(pmap, va);
 	if (pdpe != NULL && (*pdpe & PG_V) != 0) {
 		if ((*pdpe & PG_PS) != 0)
 			pa = (*pdpe & PG_PS_FRAME) | (va & PDPMASK);
 		else {
 			pde = pmap_pdpe_to_pde(pdpe, va);
 			if ((*pde & PG_V) != 0) {
 				if ((*pde & PG_PS) != 0) {
 					pa = (*pde & PG_PS_FRAME) |
 					    (va & PDRMASK);
 				} else {
 					pte = pmap_pde_to_pte(pde, va);
 					pa = (*pte & PG_FRAME) |
 					    (va & PAGE_MASK);
 				}
 			}
 		}
 	}
 	PMAP_UNLOCK(pmap);
 	return (pa);
 }
 
 /*
  *	Routine:	pmap_extract_and_hold
  *	Function:
  *		Atomically extract and hold the physical page
  *		with the given pmap and virtual address pair
  *		if that mapping permits the given protection.
  */
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	pd_entry_t pde, *pdep;
 	pt_entry_t pte, PG_RW, PG_V;
 	vm_paddr_t pa;
 	vm_page_t m;
 
 	pa = 0;
 	m = NULL;
 	PG_RW = pmap_rw_bit(pmap);
 	PG_V = pmap_valid_bit(pmap);
 	PMAP_LOCK(pmap);
 retry:
 	pdep = pmap_pde(pmap, va);
 	if (pdep != NULL && (pde = *pdep)) {
 		if (pde & PG_PS) {
 			if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
 				if (vm_page_pa_tryrelock(pmap, (pde &
 				    PG_PS_FRAME) | (va & PDRMASK), &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
 				    (va & PDRMASK));
 				vm_page_hold(m);
 			}
 		} else {
 			pte = *pmap_pde_to_pte(pdep, va);
 			if ((pte & PG_V) &&
 			    ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
 				if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
 				    &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
 				vm_page_hold(m);
 			}
 		}
 	}
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 vm_paddr_t
 pmap_kextract(vm_offset_t va)
 {
 	pd_entry_t pde;
 	vm_paddr_t pa;
 
 	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
 		pa = DMAP_TO_PHYS(va);
 	} else {
 		pde = *vtopde(va);
 		if (pde & PG_PS) {
 			pa = (pde & PG_PS_FRAME) | (va & PDRMASK);
 		} else {
 			/*
 			 * Beware of a concurrent promotion that changes the
 			 * PDE at this point!  For example, vtopte() must not
 			 * be used to access the PTE because it would use the
 			 * new PDE.  It is, however, safe to use the old PDE
 			 * because the page table page is preserved by the
 			 * promotion.
 			 */
 			pa = *pmap_pde_to_pte(&pde, va);
 			pa = (pa & PG_FRAME) | (va & PAGE_MASK);
 		}
 	}
 	return (pa);
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * Add a wired page to the kva.
  * Note: not SMP coherent.
  */
 PMAP_INLINE void 
 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	pte_store(pte, pa | X86_PG_RW | X86_PG_V | X86_PG_G);
 }
 
 static __inline void
 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
 {
 	pt_entry_t *pte;
 	int cache_bits;
 
 	pte = vtopte(va);
 	cache_bits = pmap_cache_bits(kernel_pmap, mode, 0);
 	pte_store(pte, pa | X86_PG_RW | X86_PG_V | X86_PG_G | cache_bits);
 }
 
 /*
  * Remove a page from the kernel pagetables.
  * Note: not SMP coherent.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	pte_clear(pte);
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 	return PHYS_TO_DMAP(start);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
 	pt_entry_t *endpte, oldpte, pa, *pte;
 	vm_page_t m;
 	int cache_bits;
 
 	oldpte = 0;
 	pte = vtopte(sva);
 	endpte = pte + count;
 	while (pte < endpte) {
 		m = *ma++;
 		cache_bits = pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0);
 		pa = VM_PAGE_TO_PHYS(m) | cache_bits;
 		if ((*pte & (PG_FRAME | X86_PG_PTE_CACHE)) != pa) {
 			oldpte |= *pte;
 			pte_store(pte, pa | X86_PG_G | X86_PG_RW | X86_PG_V);
 		}
 		pte++;
 	}
 	if (__predict_false((oldpte & X86_PG_V) != 0))
 		pmap_invalidate_range(kernel_pmap, sva, sva + count *
 		    PAGE_SIZE);
 }
 
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		KASSERT(va >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", va));
 		pmap_kremove(va);
 		va += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 static __inline void
 pmap_free_zero_pages(struct spglist *free)
 {
 	vm_page_t m;
 
 	while ((m = SLIST_FIRST(free)) != NULL) {
 		SLIST_REMOVE_HEAD(free, plinks.s.ss);
 		/* Preserve the page's PG_ZERO setting. */
 		vm_page_free_toq(m);
 	}
 }
 
 /*
  * Schedule the specified unused page table page to be freed.  Specifically,
  * add the page to the specified list of pages that will be released to the
  * physical memory manager after the TLB has been updated.
  */
 static __inline void
 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
     boolean_t set_PG_ZERO)
 {
 
 	if (set_PG_ZERO)
 		m->flags |= PG_ZERO;
 	else
 		m->flags &= ~PG_ZERO;
 	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
 }
 	
 /*
  * Inserts the specified page table page into the specified pmap's collection
  * of idle page table pages.  Each of a pmap's page table pages is responsible
  * for mapping a distinct range of virtual addresses.  The pmap's collection is
  * ordered by this virtual address range.
  */
 static __inline int
 pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	return (vm_radix_insert(&pmap->pm_root, mpte));
 }
 
 /*
  * Looks for a page table page mapping the specified virtual address in the
  * specified pmap's collection of idle page table pages.  Returns NULL if there
  * is no page table page corresponding to the specified virtual address.
  */
 static __inline vm_page_t
 pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	return (vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(va)));
 }
 
 /*
  * Removes the specified page table page from the specified pmap's collection
  * of idle page table pages.  The specified page table page must be a member of
  * the pmap's collection.
  */
 static __inline void
 pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	vm_radix_remove(&pmap->pm_root, mpte->pindex);
 }
 
 /*
  * Decrements a page table page's wire count, which is used to record the
  * number of valid page table entries within the page.  If the wire count
  * drops to zero, then the page table page is unmapped.  Returns TRUE if the
  * page table page was unmapped and FALSE otherwise.
  */
 static inline boolean_t
 pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 
 	--m->wire_count;
 	if (m->wire_count == 0) {
 		_pmap_unwire_ptp(pmap, va, m, free);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 static void
 _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/*
 	 * unmap the page table page
 	 */
 	if (m->pindex >= (NUPDE + NUPDPE)) {
 		/* PDP page */
 		pml4_entry_t *pml4;
 		pml4 = pmap_pml4e(pmap, va);
 		*pml4 = 0;
 	} else if (m->pindex >= NUPDE) {
 		/* PD page */
 		pdp_entry_t *pdp;
 		pdp = pmap_pdpe(pmap, va);
 		*pdp = 0;
 	} else {
 		/* PTE page */
 		pd_entry_t *pd;
 		pd = pmap_pde(pmap, va);
 		*pd = 0;
 	}
 	pmap_resident_count_dec(pmap, 1);
 	if (m->pindex < NUPDE) {
 		/* We just released a PT, unhold the matching PD */
 		vm_page_t pdpg;
 
 		pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & PG_FRAME);
 		pmap_unwire_ptp(pmap, va, pdpg, free);
 	}
 	if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) {
 		/* We just released a PD, unhold the matching PDP */
 		vm_page_t pdppg;
 
 		pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME);
 		pmap_unwire_ptp(pmap, va, pdppg, free);
 	}
 
 	/*
 	 * This is a release store so that the ordinary store unmapping
 	 * the page table page is globally performed before TLB shoot-
 	 * down is begun.
 	 */
 	atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
 
 	/* 
 	 * Put page on a list so that it is released after
 	 * *ALL* TLB shootdown is done
 	 */
 	pmap_add_delayed_free_list(m, free, TRUE);
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
     struct spglist *free)
 {
 	vm_page_t mpte;
 
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (0);
 	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
 	mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
 	return (pmap_unwire_ptp(pmap, va, mpte, free));
 }
 
 void
 pmap_pinit0(pmap_t pmap)
 {
 	int i;
 
 	PMAP_LOCK_INIT(pmap);
 	pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
 	pmap->pm_cr3 = KPML4phys;
 	pmap->pm_root.rt_root = 0;
 	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	pmap->pm_flags = pmap_flags;
 	CPU_FOREACH(i) {
 		pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE;
 		pmap->pm_pcids[i].pm_gen = 0;
 	}
 	PCPU_SET(curpmap, kernel_pmap);
 	pmap_activate(curthread);
 	CPU_FILL(&kernel_pmap->pm_active);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 int
 pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
 {
 	vm_page_t pml4pg;
 	vm_paddr_t pml4phys;
 	int i;
 
 	/*
 	 * allocate the page directory page
 	 */
 	while ((pml4pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
 		VM_WAIT;
 
 	pml4phys = VM_PAGE_TO_PHYS(pml4pg);
 	pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pml4phys);
 	CPU_FOREACH(i) {
 		pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE;
 		pmap->pm_pcids[i].pm_gen = 0;
 	}
 	pmap->pm_cr3 = ~0;	/* initialize to an invalid value */
 
 	if ((pml4pg->flags & PG_ZERO) == 0)
 		pagezero(pmap->pm_pml4);
 
 	/*
 	 * Do not install the host kernel mappings in the nested page
 	 * tables. These mappings are meaningless in the guest physical
 	 * address space.
 	 */
 	if ((pmap->pm_type = pm_type) == PT_X86) {
 		pmap->pm_cr3 = pml4phys;
 
 		/* Wire in kernel global address entries. */
 		for (i = 0; i < NKPML4E; i++) {
 			pmap->pm_pml4[KPML4BASE + i] = (KPDPphys + ptoa(i)) |
 			    X86_PG_RW | X86_PG_V | PG_U;
 		}
 		for (i = 0; i < ndmpdpphys; i++) {
 			pmap->pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa(i)) |
 			    X86_PG_RW | X86_PG_V | PG_U;
 		}
 
 		/* install self-referential address mapping entry(s) */
 		pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) |
 		    X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M;
 	}
 
 	pmap->pm_root.rt_root = 0;
 	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 	pmap->pm_flags = flags;
 	pmap->pm_eptgen = 0;
 
 	return (1);
 }
 
 int
 pmap_pinit(pmap_t pmap)
 {
 
 	return (pmap_pinit_type(pmap, PT_X86, pmap_flags));
 }
 
 /*
  * This routine is called if the desired page table page does not exist.
  *
  * If page table page allocation fails, this routine may sleep before
  * returning NULL.  It sleeps only if a lock pointer was given.
  *
  * Note: If a page allocation fails at page table level two or three,
  * one or two pages may be held during the wait, only to be released
  * afterwards.  This conservative approach is easily argued to avoid
  * race conditions.
  */
 static vm_page_t
 _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
 {
 	vm_page_t m, pdppg, pdpg;
 	pt_entry_t PG_A, PG_M, PG_RW, PG_V;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	PG_A = pmap_accessed_bit(pmap);
 	PG_M = pmap_modified_bit(pmap);
 	PG_V = pmap_valid_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 
 	/*
 	 * Allocate a page table page.
 	 */
 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 		if (lockp != NULL) {
 			RELEASE_PV_LIST_LOCK(lockp);
 			PMAP_UNLOCK(pmap);
 			PMAP_ASSERT_NOT_IN_DI();
 			VM_WAIT;
 			PMAP_LOCK(pmap);
 		}
 
 		/*
 		 * Indicate the need to retry.  While waiting, the page table
 		 * page may have been allocated.
 		 */
 		return (NULL);
 	}
 	if ((m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	if (ptepindex >= (NUPDE + NUPDPE)) {
 		pml4_entry_t *pml4;
 		vm_pindex_t pml4index;
 
 		/* Wire up a new PDPE page */
 		pml4index = ptepindex - (NUPDE + NUPDPE);
 		pml4 = &pmap->pm_pml4[pml4index];
 		*pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
 
 	} else if (ptepindex >= NUPDE) {
 		vm_pindex_t pml4index;
 		vm_pindex_t pdpindex;
 		pml4_entry_t *pml4;
 		pdp_entry_t *pdp;
 
 		/* Wire up a new PDE page */
 		pdpindex = ptepindex - NUPDE;
 		pml4index = pdpindex >> NPML4EPGSHIFT;
 
 		pml4 = &pmap->pm_pml4[pml4index];
 		if ((*pml4 & PG_V) == 0) {
 			/* Have to allocate a new pdp, recurse */
 			if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index,
 			    lockp) == NULL) {
 				--m->wire_count;
 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
 		} else {
 			/* Add reference to pdp page */
 			pdppg = PHYS_TO_VM_PAGE(*pml4 & PG_FRAME);
 			pdppg->wire_count++;
 		}
 		pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
 
 		/* Now find the pdp page */
 		pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
 		*pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
 
 	} else {
 		vm_pindex_t pml4index;
 		vm_pindex_t pdpindex;
 		pml4_entry_t *pml4;
 		pdp_entry_t *pdp;
 		pd_entry_t *pd;
 
 		/* Wire up a new PTE page */
 		pdpindex = ptepindex >> NPDPEPGSHIFT;
 		pml4index = pdpindex >> NPML4EPGSHIFT;
 
 		/* First, find the pdp and check that its valid. */
 		pml4 = &pmap->pm_pml4[pml4index];
 		if ((*pml4 & PG_V) == 0) {
 			/* Have to allocate a new pd, recurse */
 			if (_pmap_allocpte(pmap, NUPDE + pdpindex,
 			    lockp) == NULL) {
 				--m->wire_count;
 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 				vm_page_free_zero(m);
 				return (NULL);
 			}
 			pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
 			pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
 		} else {
 			pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
 			pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
 			if ((*pdp & PG_V) == 0) {
 				/* Have to allocate a new pd, recurse */
 				if (_pmap_allocpte(pmap, NUPDE + pdpindex,
 				    lockp) == NULL) {
 					--m->wire_count;
 					atomic_subtract_int(&vm_cnt.v_wire_count,
 					    1);
 					vm_page_free_zero(m);
 					return (NULL);
 				}
 			} else {
 				/* Add reference to the pd page */
 				pdpg = PHYS_TO_VM_PAGE(*pdp & PG_FRAME);
 				pdpg->wire_count++;
 			}
 		}
 		pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME);
 
 		/* Now we know where the page directory page is */
 		pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)];
 		*pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
 	}
 
 	pmap_resident_count_inc(pmap, 1);
 
 	return (m);
 }
 
 static vm_page_t
 pmap_allocpde(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
 	vm_pindex_t pdpindex, ptepindex;
 	pdp_entry_t *pdpe, PG_V;
 	vm_page_t pdpg;
 
 	PG_V = pmap_valid_bit(pmap);
 
 retry:
 	pdpe = pmap_pdpe(pmap, va);
 	if (pdpe != NULL && (*pdpe & PG_V) != 0) {
 		/* Add a reference to the pd page. */
 		pdpg = PHYS_TO_VM_PAGE(*pdpe & PG_FRAME);
 		pdpg->wire_count++;
 	} else {
 		/* Allocate a pd page. */
 		ptepindex = pmap_pde_pindex(va);
 		pdpindex = ptepindex >> NPDPEPGSHIFT;
 		pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, lockp);
 		if (pdpg == NULL && lockp != NULL)
 			goto retry;
 	}
 	return (pdpg);
 }
 
 static vm_page_t
 pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
 {
 	vm_pindex_t ptepindex;
 	pd_entry_t *pd, PG_V;
 	vm_page_t m;
 
 	PG_V = pmap_valid_bit(pmap);
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = pmap_pde_pindex(va);
 retry:
 	/*
 	 * Get the page directory entry
 	 */
 	pd = pmap_pde(pmap, va);
 
 	/*
 	 * This supports switching from a 2MB page to a
 	 * normal 4K page.
 	 */
 	if (pd != NULL && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
 		if (!pmap_demote_pde_locked(pmap, pd, va, lockp)) {
 			/*
 			 * Invalidation of the 2MB page mapping may have caused
 			 * the deallocation of the underlying PD page.
 			 */
 			pd = NULL;
 		}
 	}
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (pd != NULL && (*pd & PG_V) != 0) {
 		m = PHYS_TO_VM_PAGE(*pd & PG_FRAME);
 		m->wire_count++;
 	} else {
 		/*
 		 * Here if the pte page isn't mapped, or if it has been
 		 * deallocated.
 		 */
 		m = _pmap_allocpte(pmap, ptepindex, lockp);
 		if (m == NULL && lockp != NULL)
 			goto retry;
 	}
 	return (m);
 }
 
 
 /***************************************************
  * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 	vm_page_t m;
 	int i;
 
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
 	KASSERT(vm_radix_is_empty(&pmap->pm_root),
 	    ("pmap_release: pmap has reserved page table page(s)"));
 	KASSERT(CPU_EMPTY(&pmap->pm_active),
 	    ("releasing active pmap %p", pmap));
 
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4));
 
 	for (i = 0; i < NKPML4E; i++)	/* KVA */
 		pmap->pm_pml4[KPML4BASE + i] = 0;
 	for (i = 0; i < ndmpdpphys; i++)/* Direct Map */
 		pmap->pm_pml4[DMPML4I + i] = 0;
 	pmap->pm_pml4[PML4PML4I] = 0;	/* Recursive Mapping */
 
 	m->wire_count--;
 	atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 	vm_page_free_zero(m);
 }
 
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
 
 	return sysctl_handle_long(oidp, &ksize, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "LU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
 	return sysctl_handle_long(oidp, &kfree, 0, req);
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "LU", "Amount of KVM free");
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	vm_paddr_t paddr;
 	vm_page_t nkpg;
 	pd_entry_t *pde, newpdir;
 	pdp_entry_t *pdpe;
 
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 
 	/*
 	 * Return if "addr" is within the range of kernel page table pages
 	 * that were preallocated during pmap bootstrap.  Moreover, leave
 	 * "kernel_vm_end" and the kernel page table as they were.
 	 *
 	 * The correctness of this action is based on the following
 	 * argument: vm_map_insert() allocates contiguous ranges of the
 	 * kernel virtual address space.  It calls this function if a range
 	 * ends after "kernel_vm_end".  If the kernel is mapped between
 	 * "kernel_vm_end" and "addr", then the range cannot begin at
 	 * "kernel_vm_end".  In fact, its beginning address cannot be less
 	 * than the kernel.  Thus, there is no immediate need to allocate
 	 * any new kernel page table pages between "kernel_vm_end" and
 	 * "KERNBASE".
 	 */
 	if (KERNBASE < addr && addr <= KERNBASE + nkpt * NBPDR)
 		return;
 
 	addr = roundup2(addr, NBPDR);
 	if (addr - 1 >= kernel_map->max_offset)
 		addr = kernel_map->max_offset;
 	while (kernel_vm_end < addr) {
 		pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end);
 		if ((*pdpe & X86_PG_V) == 0) {
 			/* We need a new PDP entry */
 			nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDPSHIFT,
 			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 			if (nkpg == NULL)
 				panic("pmap_growkernel: no memory to grow kernel");
 			if ((nkpg->flags & PG_ZERO) == 0)
 				pmap_zero_page(nkpg);
 			paddr = VM_PAGE_TO_PHYS(nkpg);
 			*pdpe = (pdp_entry_t)(paddr | X86_PG_V | X86_PG_RW |
 			    X86_PG_A | X86_PG_M);
 			continue; /* try again */
 		}
 		pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end);
 		if ((*pde & X86_PG_V) != 0) {
 			kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
 			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 				kernel_vm_end = kernel_map->max_offset;
 				break;                       
 			}
 			continue;
 		}
 
 		nkpg = vm_page_alloc(NULL, pmap_pde_pindex(kernel_vm_end),
 		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		if (nkpg == NULL)
 			panic("pmap_growkernel: no memory to grow kernel");
 		if ((nkpg->flags & PG_ZERO) == 0)
 			pmap_zero_page(nkpg);
 		paddr = VM_PAGE_TO_PHYS(nkpg);
 		newpdir = paddr | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M;
 		pde_store(pde, newpdir);
 
 		kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
 		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 			kernel_vm_end = kernel_map->max_offset;
 			break;                       
 		}
 	}
 }
 
 
 /***************************************************
  * page management routines.
  ***************************************************/
 
 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 CTASSERT(_NPCM == 3);
 CTASSERT(_NPCPV == 168);
 
 static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
 	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 
 #define	PC_FREE0	0xfffffffffffffffful
 #define	PC_FREE1	0xfffffffffffffffful
 #define	PC_FREE2	0x000000fffffffffful
 
 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
 
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 	"Current number of pv entry chunks");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 	"Current number of pv entry chunks allocated");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 	"Current number of pv entry chunks frees");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 	"Number of times tried to get a chunk page but failed.");
 
 static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
 static int pv_entry_spare;
 
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 	"Current number of pv entry frees");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 	"Current number of pv entry allocs");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 	"Current number of pv entries");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 	"Current number of spare pv entries");
 #endif
 
 /*
  * We are in a serious low memory condition.  Resort to
  * drastic measures to free some pages so we can allocate
  * another pv entry chunk.
  *
  * Returns NULL if PV entries were reclaimed from the specified pmap.
  *
  * We do not, however, unmap 2mpages because subsequent accesses will
  * allocate per-page pv entries until repromotion occurs, thereby
  * exacerbating the shortage of free pv entries.
  */
 static vm_page_t
 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
 {
 	struct pch new_tail;
 	struct pv_chunk *pc;
 	struct md_page *pvh;
 	pd_entry_t *pde;
 	pmap_t pmap;
 	pt_entry_t *pte, tpte;
 	pt_entry_t PG_G, PG_A, PG_M, PG_RW;
 	pv_entry_t pv;
 	vm_offset_t va;
 	vm_page_t m, m_pc;
 	struct spglist free;
 	uint64_t inuse;
 	int bit, field, freed;
 
 	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
 	KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
 	pmap = NULL;
 	m_pc = NULL;
 	PG_G = PG_A = PG_M = PG_RW = 0;
 	SLIST_INIT(&free);
 	TAILQ_INIT(&new_tail);
 	pmap_delayed_invl_started();
 	mtx_lock(&pv_chunks_mutex);
 	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && SLIST_EMPTY(&free)) {
 		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 		mtx_unlock(&pv_chunks_mutex);
 		if (pmap != pc->pc_pmap) {
 			if (pmap != NULL) {
 				pmap_invalidate_all(pmap);
 				if (pmap != locked_pmap)
 					PMAP_UNLOCK(pmap);
 			}
 			pmap_delayed_invl_finished();
 			pmap_delayed_invl_started();
 			pmap = pc->pc_pmap;
 			/* Avoid deadlock and lock recursion. */
 			if (pmap > locked_pmap) {
 				RELEASE_PV_LIST_LOCK(lockp);
 				PMAP_LOCK(pmap);
 			} else if (pmap != locked_pmap &&
 			    !PMAP_TRYLOCK(pmap)) {
 				pmap = NULL;
 				TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
 				mtx_lock(&pv_chunks_mutex);
 				continue;
 			}
 			PG_G = pmap_global_bit(pmap);
 			PG_A = pmap_accessed_bit(pmap);
 			PG_M = pmap_modified_bit(pmap);
 			PG_RW = pmap_rw_bit(pmap);
 		}
 
 		/*
 		 * Destroy every non-wired, 4 KB page mapping in the chunk.
 		 */
 		freed = 0;
 		for (field = 0; field < _NPCM; field++) {
 			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
 			    inuse != 0; inuse &= ~(1UL << bit)) {
 				bit = bsfq(inuse);
 				pv = &pc->pc_pventry[field * 64 + bit];
 				va = pv->pv_va;
 				pde = pmap_pde(pmap, va);
 				if ((*pde & PG_PS) != 0)
 					continue;
 				pte = pmap_pde_to_pte(pde, va);
 				if ((*pte & PG_W) != 0)
 					continue;
 				tpte = pte_load_clear(pte);
 				if ((tpte & PG_G) != 0)
 					pmap_invalidate_page(pmap, va);
 				m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
 				if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 					vm_page_dirty(m);
 				if ((tpte & PG_A) != 0)
 					vm_page_aflag_set(m, PGA_REFERENCED);
 				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 				m->md.pv_gen++;
 				if (TAILQ_EMPTY(&m->md.pv_list) &&
 				    (m->flags & PG_FICTITIOUS) == 0) {
 					pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 					if (TAILQ_EMPTY(&pvh->pv_list)) {
 						vm_page_aflag_clear(m,
 						    PGA_WRITEABLE);
 					}
 				}
 				pmap_delayed_invl_page(m);
 				pc->pc_map[field] |= 1UL << bit;
 				pmap_unuse_pt(pmap, va, *pde, &free);
 				freed++;
 			}
 		}
 		if (freed == 0) {
 			TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
 			mtx_lock(&pv_chunks_mutex);
 			continue;
 		}
 		/* Every freed mapping is for a 4 KB page. */
 		pmap_resident_count_dec(pmap, freed);
 		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
 		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
 		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
 		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 		if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
 		    pc->pc_map[2] == PC_FREE2) {
 			PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
 			PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
 			PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
 			/* Entire chunk is free; return it. */
 			m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 			dump_drop_page(m_pc->phys_addr);
 			mtx_lock(&pv_chunks_mutex);
 			break;
 		}
 		TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 		TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
 		mtx_lock(&pv_chunks_mutex);
 		/* One freed pv entry in locked_pmap is sufficient. */
 		if (pmap == locked_pmap)
 			break;
 	}
 	TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	if (pmap != NULL) {
 		pmap_invalidate_all(pmap);
 		if (pmap != locked_pmap)
 			PMAP_UNLOCK(pmap);
 	}
 	pmap_delayed_invl_finished();
 	if (m_pc == NULL && !SLIST_EMPTY(&free)) {
 		m_pc = SLIST_FIRST(&free);
 		SLIST_REMOVE_HEAD(&free, plinks.s.ss);
 		/* Recycle a freed page table page. */
 		m_pc->wire_count = 1;
 		atomic_add_int(&vm_cnt.v_wire_count, 1);
 	}
 	pmap_free_zero_pages(&free);
 	return (m_pc);
 }
 
 /*
  * free the pv_entry back to the free list
  */
 static void
 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 {
 	struct pv_chunk *pc;
 	int idx, field, bit;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(atomic_add_long(&pv_entry_frees, 1));
 	PV_STAT(atomic_add_int(&pv_entry_spare, 1));
 	PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
 	pc = pv_to_chunk(pv);
 	idx = pv - &pc->pc_pventry[0];
 	field = idx / 64;
 	bit = idx % 64;
 	pc->pc_map[field] |= 1ul << bit;
 	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
 	    pc->pc_map[2] != PC_FREE2) {
 		/* 98% of the time, pc is already at the head of the list. */
 		if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 		}
 		return;
 	}
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	free_pv_chunk(pc);
 }
 
 static void
 free_pv_chunk(struct pv_chunk *pc)
 {
 	vm_page_t m;
 
 	mtx_lock(&pv_chunks_mutex);
  	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
 	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
 	PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
 	/* entire chunk is free, return it */
 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
 	dump_drop_page(m->phys_addr);
 	vm_page_unwire(m, PQ_NONE);
 	vm_page_free(m);
 }
 
 /*
  * Returns a new PV entry, allocating a new PV chunk from the system when
  * needed.  If this PV chunk allocation fails and a PV list lock pointer was
  * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
  * returned.
  *
  * The given PV list lock may be released.
  */
 static pv_entry_t
 get_pv_entry(pmap_t pmap, struct rwlock **lockp)
 {
 	int bit, field;
 	pv_entry_t pv;
 	struct pv_chunk *pc;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
 retry:
 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 	if (pc != NULL) {
 		for (field = 0; field < _NPCM; field++) {
 			if (pc->pc_map[field]) {
 				bit = bsfq(pc->pc_map[field]);
 				break;
 			}
 		}
 		if (field < _NPCM) {
 			pv = &pc->pc_pventry[field * 64 + bit];
 			pc->pc_map[field] &= ~(1ul << bit);
 			/* If this was the last item, move it to tail */
 			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
 			    pc->pc_map[2] == 0) {
 				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
 				    pc_list);
 			}
 			PV_STAT(atomic_add_long(&pv_entry_count, 1));
 			PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
 			return (pv);
 		}
 	}
 	/* No free items, allocate another chunk */
 	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED);
 	if (m == NULL) {
 		if (lockp == NULL) {
 			PV_STAT(pc_chunk_tryfail++);
 			return (NULL);
 		}
 		m = reclaim_pv_chunk(pmap, lockp);
 		if (m == NULL)
 			goto retry;
 	}
 	PV_STAT(atomic_add_int(&pc_chunk_count, 1));
 	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
 	dump_add_page(m->phys_addr);
 	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 	pc->pc_pmap = pmap;
 	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
 	pc->pc_map[1] = PC_FREE1;
 	pc->pc_map[2] = PC_FREE2;
 	mtx_lock(&pv_chunks_mutex);
 	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
 	mtx_unlock(&pv_chunks_mutex);
 	pv = &pc->pc_pventry[0];
 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	PV_STAT(atomic_add_long(&pv_entry_count, 1));
 	PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
 	return (pv);
 }
 
 /*
  * Returns the number of one bits within the given PV chunk map.
  *
  * The erratas for Intel processors state that "POPCNT Instruction May
  * Take Longer to Execute Than Expected".  It is believed that the
  * issue is the spurious dependency on the destination register.
  * Provide a hint to the register rename logic that the destination
  * value is overwritten, by clearing it, as suggested in the
  * optimization manual.  It should be cheap for unaffected processors
  * as well.
  *
  * Reference numbers for erratas are
  * 4th Gen Core: HSD146
  * 5th Gen Core: BDM85
  * 6th Gen Core: SKL029
  */
 static int
 popcnt_pc_map_pq(uint64_t *map)
 {
 	u_long result, tmp;
 
 	__asm __volatile("xorl %k0,%k0;popcntq %2,%0;"
 	    "xorl %k1,%k1;popcntq %3,%1;addl %k1,%k0;"
 	    "xorl %k1,%k1;popcntq %4,%1;addl %k1,%k0"
 	    : "=&r" (result), "=&r" (tmp)
 	    : "m" (map[0]), "m" (map[1]), "m" (map[2]));
 	return (result);
 }
 
 /*
  * Ensure that the number of spare PV entries in the specified pmap meets or
  * exceeds the given count, "needed".
  *
  * The given PV list lock may be released.
  */
 static void
 reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
 {
 	struct pch new_tail;
 	struct pv_chunk *pc;
 	int avail, free;
 	vm_page_t m;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
 
 	/*
 	 * Newly allocated PV chunks must be stored in a private list until
 	 * the required number of PV chunks have been allocated.  Otherwise,
 	 * reclaim_pv_chunk() could recycle one of these chunks.  In
 	 * contrast, these chunks must be added to the pmap upon allocation.
 	 */
 	TAILQ_INIT(&new_tail);
 retry:
 	avail = 0;
 	TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
 #ifndef __POPCNT__
 		if ((cpu_feature2 & CPUID2_POPCNT) == 0)
 			bit_count((bitstr_t *)pc->pc_map, 0,
 			    sizeof(pc->pc_map) * NBBY, &free);
 		else
 #endif
 		free = popcnt_pc_map_pq(pc->pc_map);
 		if (free == 0)
 			break;
 		avail += free;
 		if (avail >= needed)
 			break;
 	}
 	for (; avail < needed; avail += _NPCPV) {
 		m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 		    VM_ALLOC_WIRED);
 		if (m == NULL) {
 			m = reclaim_pv_chunk(pmap, lockp);
 			if (m == NULL)
 				goto retry;
 		}
 		PV_STAT(atomic_add_int(&pc_chunk_count, 1));
 		PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
 		dump_add_page(m->phys_addr);
 		pc = (void *)PHYS_TO_DMAP(m->phys_addr);
 		pc->pc_pmap = pmap;
 		pc->pc_map[0] = PC_FREE0;
 		pc->pc_map[1] = PC_FREE1;
 		pc->pc_map[2] = PC_FREE2;
 		TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 		TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
 		PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
 	}
 	if (!TAILQ_EMPTY(&new_tail)) {
 		mtx_lock(&pv_chunks_mutex);
 		TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
 		mtx_unlock(&pv_chunks_mutex);
 	}
 }
 
 /*
  * First find and then remove the pv entry for the specified pmap and virtual
  * address from the specified pv list.  Returns the pv entry if found and NULL
  * otherwise.  This operation can be performed on pv lists for either 4KB or
  * 2MB page mappings.
  */
 static __inline pv_entry_t
 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			pvh->pv_gen++;
 			break;
 		}
 	}
 	return (pv);
 }
 
 /*
  * After demotion from a 2MB page mapping to 512 4KB page mappings,
  * destroy the pv entry for the 2MB page mapping and reinstantiate the pv
  * entries for each of the 4KB page mappings.
  */
 static void
 pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
     struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	struct pv_chunk *pc;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 	int bit, field;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((pa & PDRMASK) == 0,
 	    ("pmap_pv_demote_pde: pa is not 2mpage aligned"));
 	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 
 	/*
 	 * Transfer the 2mpage's pv entry for this mapping to the first
 	 * page's pv list.  Once this transfer begins, the pv list lock
 	 * must not be released until the last pv entry is reinstantiated.
 	 */
 	pvh = pa_to_pvh(pa);
 	va = trunc_2mpage(va);
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found"));
 	m = PHYS_TO_VM_PAGE(pa);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 	m->md.pv_gen++;
 	/* Instantiate the remaining NPTEPG - 1 pv entries. */
 	PV_STAT(atomic_add_long(&pv_entry_allocs, NPTEPG - 1));
 	va_last = va + NBPDR - PAGE_SIZE;
 	for (;;) {
 		pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 		KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 ||
 		    pc->pc_map[2] != 0, ("pmap_pv_demote_pde: missing spare"));
 		for (field = 0; field < _NPCM; field++) {
 			while (pc->pc_map[field]) {
 				bit = bsfq(pc->pc_map[field]);
 				pc->pc_map[field] &= ~(1ul << bit);
 				pv = &pc->pc_pventry[field * 64 + bit];
 				va += PAGE_SIZE;
 				pv->pv_va = va;
 				m++;
 				KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 			    ("pmap_pv_demote_pde: page %p is not managed", m));
 				TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 				m->md.pv_gen++;
 				if (va == va_last)
 					goto out;
 			}
 		}
 		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 		TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 	}
 out:
 	if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) {
 		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 		TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 	}
 	PV_STAT(atomic_add_long(&pv_entry_count, NPTEPG - 1));
 	PV_STAT(atomic_subtract_int(&pv_entry_spare, NPTEPG - 1));
 }
 
 /*
  * After promotion from 512 4KB page mappings to a single 2MB page mapping,
  * replace the many pv entries for the 4KB page mappings by a single pv entry
  * for the 2MB page mapping.
  */
 static void
 pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
     struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 
 	KASSERT((pa & PDRMASK) == 0,
 	    ("pmap_pv_promote_pde: pa is not 2mpage aligned"));
 	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 
 	/*
 	 * Transfer the first page's pv entry for this mapping to the 2mpage's
 	 * pv list.  Aside from avoiding the cost of a call to get_pv_entry(),
 	 * a transfer avoids the possibility that get_pv_entry() calls
 	 * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the
 	 * mappings that is being promoted.
 	 */
 	m = PHYS_TO_VM_PAGE(pa);
 	va = trunc_2mpage(va);
 	pv = pmap_pvh_remove(&m->md, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found"));
 	pvh = pa_to_pvh(pa);
 	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 	pvh->pv_gen++;
 	/* Free the remaining NPTEPG - 1 pv entries. */
 	va_last = va + NBPDR - PAGE_SIZE;
 	do {
 		m++;
 		va += PAGE_SIZE;
 		pmap_pvh_free(&m->md, pmap, va);
 	} while (va < va_last);
 }
 
 /*
  * First find and then destroy the pv entry for the specified pmap and virtual
  * address.  This operation can be performed on pv lists for either 4KB or 2MB
  * page mappings.
  */
 static void
 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 	free_pv_entry(pmap, pv);
 }
 
 /*
  * Conditionally create the PV entry for a 4KB page mapping if the required
  * memory can be allocated without resorting to reclamation.
  */
 static boolean_t
 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
     struct rwlock **lockp)
 {
 	pv_entry_t pv;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/* Pass NULL instead of the lock pointer to disable reclamation. */
 	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
 		pv->pv_va = va;
 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * Conditionally create the PV entry for a 2MB page mapping if the required
  * memory can be allocated without resorting to reclamation.
  */
 static boolean_t
 pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
     struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	/* Pass NULL instead of the lock pointer to disable reclamation. */
 	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
 		pv->pv_va = va;
 		CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
 		pvh = pa_to_pvh(pa);
 		TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 		pvh->pv_gen++;
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * Fills a page table page with mappings to consecutive physical pages.
  */
 static void
 pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte)
 {
 	pt_entry_t *pte;
 
 	for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
 		*pte = newpte;
 		newpte += PAGE_SIZE;
 	}
 }
 
 /*
  * Tries to demote a 2MB page mapping.  If demotion fails, the 2MB page
  * mapping is invalidated.
  */
 static boolean_t
 pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	struct rwlock *lock;
 	boolean_t rv;
 
 	lock = NULL;
 	rv = pmap_demote_pde_locked(pmap, pde, va, &lock);
 	if (lock != NULL)
 		rw_wunlock(lock);
 	return (rv);
 }
 
 static boolean_t
 pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
     struct rwlock **lockp)
 {
 	pd_entry_t newpde, oldpde;
 	pt_entry_t *firstpte, newpte;
 	pt_entry_t PG_A, PG_G, PG_M, PG_RW, PG_V;
 	vm_paddr_t mptepa;
 	vm_page_t mpte;
 	struct spglist free;
 	int PG_PTE_CACHE;
 
 	PG_G = pmap_global_bit(pmap);
 	PG_A = pmap_accessed_bit(pmap);
 	PG_M = pmap_modified_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 	PG_V = pmap_valid_bit(pmap);
 	PG_PTE_CACHE = pmap_cache_mask(pmap, 0);
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	oldpde = *pde;
 	KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
 	    ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
 	if ((oldpde & PG_A) != 0 && (mpte = pmap_lookup_pt_page(pmap, va)) !=
 	    NULL)
 		pmap_remove_pt_page(pmap, mpte);
 	else {
 		KASSERT((oldpde & PG_W) == 0,
 		    ("pmap_demote_pde: page table page for a wired mapping"
 		    " is missing"));
 
 		/*
 		 * Invalidate the 2MB page mapping and return "failure" if the
 		 * mapping was never accessed or the allocation of the new
 		 * page table page fails.  If the 2MB page mapping belongs to
 		 * the direct map region of the kernel's address space, then
 		 * the page allocation request specifies the highest possible
 		 * priority (VM_ALLOC_INTERRUPT).  Otherwise, the priority is
 		 * normal.  Page table pages are preallocated for every other
 		 * part of the kernel address space, so the direct map region
 		 * is the only part of the kernel address space that must be
 		 * handled here.
 		 */
 		if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
 		    pmap_pde_pindex(va), (va >= DMAP_MIN_ADDRESS && va <
 		    DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) |
 		    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 			SLIST_INIT(&free);
 			pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free,
 			    lockp);
 			pmap_invalidate_page(pmap, trunc_2mpage(va));
 			pmap_free_zero_pages(&free);
 			CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return (FALSE);
 		}
 		if (va < VM_MAXUSER_ADDRESS)
 			pmap_resident_count_inc(pmap, 1);
 	}
 	mptepa = VM_PAGE_TO_PHYS(mpte);
 	firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa);
 	newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
 	KASSERT((oldpde & PG_A) != 0,
 	    ("pmap_demote_pde: oldpde is missing PG_A"));
 	KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
 	    ("pmap_demote_pde: oldpde is missing PG_M"));
 	newpte = oldpde & ~PG_PS;
 	newpte = pmap_swap_pat(pmap, newpte);
 
 	/*
 	 * If the page table page is new, initialize it.
 	 */
 	if (mpte->wire_count == 1) {
 		mpte->wire_count = NPTEPG;
 		pmap_fill_ptp(firstpte, newpte);
 	}
 	KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
 	    ("pmap_demote_pde: firstpte and newpte map different physical"
 	    " addresses"));
 
 	/*
 	 * If the mapping has changed attributes, update the page table
 	 * entries.
 	 */
 	if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
 		pmap_fill_ptp(firstpte, newpte);
 
 	/*
 	 * The spare PV entries must be reserved prior to demoting the
 	 * mapping, that is, prior to changing the PDE.  Otherwise, the state
 	 * of the PDE and the PV lists will be inconsistent, which can result
 	 * in reclaim_pv_chunk() attempting to remove a PV entry from the
 	 * wrong PV list and pmap_pv_demote_pde() failing to find the expected
 	 * PV entry for the 2MB page mapping that is being demoted.
 	 */
 	if ((oldpde & PG_MANAGED) != 0)
 		reserve_pv_entries(pmap, NPTEPG - 1, lockp);
 
 	/*
 	 * Demote the mapping.  This pmap is locked.  The old PDE has
 	 * PG_A set.  If the old PDE has PG_RW set, it also has PG_M
 	 * set.  Thus, there is no danger of a race with another
 	 * processor changing the setting of PG_A and/or PG_M between
 	 * the read above and the store below. 
 	 */
 	if (workaround_erratum383)
 		pmap_update_pde(pmap, va, pde, newpde);
 	else
 		pde_store(pde, newpde);
 
 	/*
 	 * Invalidate a stale recursive mapping of the page table page.
 	 */
 	if (va >= VM_MAXUSER_ADDRESS)
 		pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
 
 	/*
 	 * Demote the PV entry.
 	 */
 	if ((oldpde & PG_MANAGED) != 0)
 		pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp);
 
 	atomic_add_long(&pmap_pde_demotions, 1);
 	CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx"
 	    " in pmap %p", va, pmap);
 	return (TRUE);
 }
 
 /*
  * pmap_remove_kernel_pde: Remove a kernel superpage mapping.
  */
 static void
 pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde;
 	vm_paddr_t mptepa;
 	vm_page_t mpte;
 
 	KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap));
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	mpte = pmap_lookup_pt_page(pmap, va);
 	if (mpte == NULL)
 		panic("pmap_remove_kernel_pde: Missing pt page.");
 
 	pmap_remove_pt_page(pmap, mpte);
 	mptepa = VM_PAGE_TO_PHYS(mpte);
 	newpde = mptepa | X86_PG_M | X86_PG_A | X86_PG_RW | X86_PG_V;
 
 	/*
 	 * Initialize the page table page.
 	 */
 	pagezero((void *)PHYS_TO_DMAP(mptepa));
 
 	/*
 	 * Demote the mapping.
 	 */
 	if (workaround_erratum383)
 		pmap_update_pde(pmap, va, pde, newpde);
 	else
 		pde_store(pde, newpde);
 
 	/*
 	 * Invalidate a stale recursive mapping of the page table page.
 	 */
 	pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
 }
 
 /*
  * pmap_remove_pde: do the things to unmap a superpage in a process
  */
 static int
 pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
     struct spglist *free, struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	pd_entry_t oldpde;
 	vm_offset_t eva, va;
 	vm_page_t m, mpte;
 	pt_entry_t PG_G, PG_A, PG_M, PG_RW;
 
 	PG_G = pmap_global_bit(pmap);
 	PG_A = pmap_accessed_bit(pmap);
 	PG_M = pmap_modified_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & PDRMASK) == 0,
 	    ("pmap_remove_pde: sva is not 2mpage aligned"));
 	oldpde = pte_load_clear(pdq);
 	if (oldpde & PG_W)
 		pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
 
 	/*
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
 	if (oldpde & PG_G)
 		pmap_invalidate_page(kernel_pmap, sva);
 	pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE);
 	if (oldpde & PG_MANAGED) {
 		CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, oldpde & PG_PS_FRAME);
 		pvh = pa_to_pvh(oldpde & PG_PS_FRAME);
 		pmap_pvh_free(pvh, pmap, sva);
 		eva = sva + NBPDR;
 		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
 		    va < eva; va += PAGE_SIZE, m++) {
 			if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
 				vm_page_dirty(m);
 			if (oldpde & PG_A)
 				vm_page_aflag_set(m, PGA_REFERENCED);
 			if (TAILQ_EMPTY(&m->md.pv_list) &&
 			    TAILQ_EMPTY(&pvh->pv_list))
 				vm_page_aflag_clear(m, PGA_WRITEABLE);
 			pmap_delayed_invl_page(m);
 		}
 	}
 	if (pmap == kernel_pmap) {
 		pmap_remove_kernel_pde(pmap, pdq, sva);
 	} else {
 		mpte = pmap_lookup_pt_page(pmap, sva);
 		if (mpte != NULL) {
 			pmap_remove_pt_page(pmap, mpte);
 			pmap_resident_count_dec(pmap, 1);
 			KASSERT(mpte->wire_count == NPTEPG,
 			    ("pmap_remove_pde: pte page wire count error"));
 			mpte->wire_count = 0;
 			pmap_add_delayed_free_list(mpte, free, FALSE);
 			atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 		}
 	}
 	return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free));
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, 
     pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp)
 {
 	struct md_page *pvh;
 	pt_entry_t oldpte, PG_A, PG_M, PG_RW;
 	vm_page_t m;
 
 	PG_A = pmap_accessed_bit(pmap);
 	PG_M = pmap_modified_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	oldpte = pte_load_clear(ptq);
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 	pmap_resident_count_dec(pmap, 1);
 	if (oldpte & PG_MANAGED) {
 		m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME);
 		if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			vm_page_dirty(m);
 		if (oldpte & PG_A)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
 		pmap_pvh_free(&m->md, pmap, va);
 		if (TAILQ_EMPTY(&m->md.pv_list) &&
 		    (m->flags & PG_FICTITIOUS) == 0) {
 			pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 			if (TAILQ_EMPTY(&pvh->pv_list))
 				vm_page_aflag_clear(m, PGA_WRITEABLE);
 		}
 		pmap_delayed_invl_page(m);
 	}
 	return (pmap_unuse_pt(pmap, va, ptepde, free));
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
     struct spglist *free)
 {
 	struct rwlock *lock;
 	pt_entry_t *pte, PG_V;
 
 	PG_V = pmap_valid_bit(pmap);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if ((*pde & PG_V) == 0)
 		return;
 	pte = pmap_pde_to_pte(pde, va);
 	if ((*pte & PG_V) == 0)
 		return;
 	lock = NULL;
 	pmap_remove_pte(pmap, pte, va, *pde, free, &lock);
 	if (lock != NULL)
 		rw_wunlock(lock);
 	pmap_invalidate_page(pmap, va);
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	struct rwlock *lock;
 	vm_offset_t va, va_next;
 	pml4_entry_t *pml4e;
 	pdp_entry_t *pdpe;
 	pd_entry_t ptpaddr, *pde;
 	pt_entry_t *pte, PG_G, PG_V;
 	struct spglist free;
 	int anyvalid;
 
 	PG_G = pmap_global_bit(pmap);
 	PG_V = pmap_valid_bit(pmap);
 
 	/*
 	 * Perform an unsynchronized read.  This is, however, safe.
 	 */
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	anyvalid = 0;
 	SLIST_INIT(&free);
 
 	pmap_delayed_invl_started();
 	PMAP_LOCK(pmap);
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if (sva + PAGE_SIZE == eva) {
 		pde = pmap_pde(pmap, sva);
 		if (pde && (*pde & PG_PS) == 0) {
 			pmap_remove_page(pmap, sva, pde, &free);
 			goto out;
 		}
 	}
 
 	lock = NULL;
 	for (; sva < eva; sva = va_next) {
 
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		pml4e = pmap_pml4e(pmap, sva);
 		if ((*pml4e & PG_V) == 0) {
 			va_next = (sva + NBPML4) & ~PML4MASK;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
 		if ((*pdpe & PG_V) == 0) {
 			va_next = (sva + NBPDP) & ~PDPMASK;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		va_next = (sva + NBPDR) & ~PDRMASK;
 		if (va_next < sva)
 			va_next = eva;
 
 		pde = pmap_pdpe_to_pde(pdpe, sva);
 		ptpaddr = *pde;
 
 		/*
 		 * Weed out invalid mappings.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			/*
 			 * Are we removing the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + NBPDR == va_next && eva >= va_next) {
 				/*
 				 * The TLB entry for a PG_G mapping is
 				 * invalidated by pmap_remove_pde().
 				 */
 				if ((ptpaddr & PG_G) == 0)
 					anyvalid = 1;
 				pmap_remove_pde(pmap, pde, sva, &free, &lock);
 				continue;
 			} else if (!pmap_demote_pde_locked(pmap, pde, sva,
 			    &lock)) {
 				/* The large page mapping was destroyed. */
 				continue;
 			} else
 				ptpaddr = *pde;
 		}
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (va_next > eva)
 			va_next = eva;
 
 		va = va_next;
 		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
 		    sva += PAGE_SIZE) {
 			if (*pte == 0) {
 				if (va != va_next) {
 					pmap_invalidate_range(pmap, va, sva);
 					va = va_next;
 				}
 				continue;
 			}
 			if ((*pte & PG_G) == 0)
 				anyvalid = 1;
 			else if (va == va_next)
 				va = sva;
 			if (pmap_remove_pte(pmap, pte, sva, ptpaddr, &free,
 			    &lock)) {
 				sva += PAGE_SIZE;
 				break;
 			}
 		}
 		if (va != va_next)
 			pmap_invalidate_range(pmap, va, sva);
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 out:
 	if (anyvalid)
 		pmap_invalidate_all(pmap);
 	PMAP_UNLOCK(pmap);
 	pmap_delayed_invl_finished();
 	pmap_free_zero_pages(&free);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	pmap_t pmap;
 	struct rwlock *lock;
 	pt_entry_t *pte, tpte, PG_A, PG_M, PG_RW;
 	pd_entry_t *pde;
 	vm_offset_t va;
 	struct spglist free;
 	int pvh_gen, md_gen;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_all: page %p is not managed", m));
 	SLIST_INIT(&free);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 	    pa_to_pvh(VM_PAGE_TO_PHYS(m));
 retry:
 	rw_wlock(lock);
 	while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen) {
 				rw_wunlock(lock);
 				PMAP_UNLOCK(pmap);
 				goto retry;
 			}
 		}
 		va = pv->pv_va;
 		pde = pmap_pde(pmap, va);
 		(void)pmap_demote_pde_locked(pmap, pde, va, &lock);
 		PMAP_UNLOCK(pmap);
 	}
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 				rw_wunlock(lock);
 				PMAP_UNLOCK(pmap);
 				goto retry;
 			}
 		}
 		PG_A = pmap_accessed_bit(pmap);
 		PG_M = pmap_modified_bit(pmap);
 		PG_RW = pmap_rw_bit(pmap);
 		pmap_resident_count_dec(pmap, 1);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found"
 		    " a 2mpage in page %p's pv list", m));
 		pte = pmap_pde_to_pte(pde, pv->pv_va);
 		tpte = pte_load_clear(pte);
 		if (tpte & PG_W)
 			pmap->pm_stats.wired_count--;
 		if (tpte & PG_A)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			vm_page_dirty(m);
 		pmap_unuse_pt(pmap, pv->pv_va, *pde, &free);
 		pmap_invalidate_page(pmap, pv->pv_va);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		free_pv_entry(pmap, pv);
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	rw_wunlock(lock);
 	pmap_delayed_invl_wait(m);
 	pmap_free_zero_pages(&free);
 }
 
 /*
  * pmap_protect_pde: do the things to protect a 2mpage in a process
  */
 static boolean_t
 pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot)
 {
 	pd_entry_t newpde, oldpde;
 	vm_offset_t eva, va;
 	vm_page_t m;
 	boolean_t anychanged;
 	pt_entry_t PG_G, PG_M, PG_RW;
 
 	PG_G = pmap_global_bit(pmap);
 	PG_M = pmap_modified_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & PDRMASK) == 0,
 	    ("pmap_protect_pde: sva is not 2mpage aligned"));
 	anychanged = FALSE;
 retry:
 	oldpde = newpde = *pde;
 	if (oldpde & PG_MANAGED) {
 		eva = sva + NBPDR;
 		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
 		    va < eva; va += PAGE_SIZE, m++)
 			if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
 				vm_page_dirty(m);
 	}
 	if ((prot & VM_PROT_WRITE) == 0)
 		newpde &= ~(PG_RW | PG_M);
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpde |= pg_nx;
 	if (newpde != oldpde) {
 		if (!atomic_cmpset_long(pde, oldpde, newpde))
 			goto retry;
 		if (oldpde & PG_G)
 			pmap_invalidate_page(pmap, sva);
 		else
 			anychanged = TRUE;
 	}
 	return (anychanged);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	vm_offset_t va_next;
 	pml4_entry_t *pml4e;
 	pdp_entry_t *pdpe;
 	pd_entry_t ptpaddr, *pde;
 	pt_entry_t *pte, PG_G, PG_M, PG_RW, PG_V;
 	boolean_t anychanged;
 
 	KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
 	if (prot == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
 	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
 		return;
 
 	PG_G = pmap_global_bit(pmap);
 	PG_M = pmap_modified_bit(pmap);
 	PG_V = pmap_valid_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 	anychanged = FALSE;
 
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 
 		pml4e = pmap_pml4e(pmap, sva);
 		if ((*pml4e & PG_V) == 0) {
 			va_next = (sva + NBPML4) & ~PML4MASK;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
 		if ((*pdpe & PG_V) == 0) {
 			va_next = (sva + NBPDP) & ~PDPMASK;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 
 		va_next = (sva + NBPDR) & ~PDRMASK;
 		if (va_next < sva)
 			va_next = eva;
 
 		pde = pmap_pdpe_to_pde(pdpe, sva);
 		ptpaddr = *pde;
 
 		/*
 		 * Weed out invalid mappings.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			/*
 			 * Are we protecting the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + NBPDR == va_next && eva >= va_next) {
 				/*
 				 * The TLB entry for a PG_G mapping is
 				 * invalidated by pmap_protect_pde().
 				 */
 				if (pmap_protect_pde(pmap, pde, sva, prot))
 					anychanged = TRUE;
 				continue;
 			} else if (!pmap_demote_pde(pmap, pde, sva)) {
 				/*
 				 * The large page mapping was destroyed.
 				 */
 				continue;
 			}
 		}
 
 		if (va_next > eva)
 			va_next = eva;
 
 		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
 		    sva += PAGE_SIZE) {
 			pt_entry_t obits, pbits;
 			vm_page_t m;
 
 retry:
 			obits = pbits = *pte;
 			if ((pbits & PG_V) == 0)
 				continue;
 
 			if ((prot & VM_PROT_WRITE) == 0) {
 				if ((pbits & (PG_MANAGED | PG_M | PG_RW)) ==
 				    (PG_MANAGED | PG_M | PG_RW)) {
 					m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
 					vm_page_dirty(m);
 				}
 				pbits &= ~(PG_RW | PG_M);
 			}
 			if ((prot & VM_PROT_EXECUTE) == 0)
 				pbits |= pg_nx;
 
 			if (pbits != obits) {
 				if (!atomic_cmpset_long(pte, obits, pbits))
 					goto retry;
 				if (obits & PG_G)
 					pmap_invalidate_page(pmap, sva);
 				else
 					anychanged = TRUE;
 			}
 		}
 	}
 	if (anychanged)
 		pmap_invalidate_all(pmap);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * Tries to promote the 512, contiguous 4KB page mappings that are within a
  * single page table page (PTP) to a single 2MB page mapping.  For promotion
  * to occur, two conditions must be met: (1) the 4KB page mappings must map
  * aligned, contiguous physical memory and (2) the 4KB page mappings must have
  * identical characteristics. 
  */
 static void
 pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
     struct rwlock **lockp)
 {
 	pd_entry_t newpde;
 	pt_entry_t *firstpte, oldpte, pa, *pte;
 	pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V;
 	vm_page_t mpte;
 	int PG_PTE_CACHE;
 
 	PG_A = pmap_accessed_bit(pmap);
 	PG_G = pmap_global_bit(pmap);
 	PG_M = pmap_modified_bit(pmap);
 	PG_V = pmap_valid_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 	PG_PTE_CACHE = pmap_cache_mask(pmap, 0);
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * Examine the first PTE in the specified PTP.  Abort if this PTE is
 	 * either invalid, unused, or does not map the first 4KB physical page
 	 * within a 2MB page. 
 	 */
 	firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
 setpde:
 	newpde = *firstpte;
 	if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
 		atomic_add_long(&pmap_pde_p_failures, 1);
 		CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return;
 	}
 	if ((newpde & (PG_M | PG_RW)) == PG_RW) {
 		/*
 		 * When PG_M is already clear, PG_RW can be cleared without
 		 * a TLB invalidation.
 		 */
 		if (!atomic_cmpset_long(firstpte, newpde, newpde & ~PG_RW))
 			goto setpde;
 		newpde &= ~PG_RW;
 	}
 
 	/*
 	 * Examine each of the other PTEs in the specified PTP.  Abort if this
 	 * PTE maps an unexpected 4KB physical page or does not have identical
 	 * characteristics to the first PTE.
 	 */
 	pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
 	for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
 setpte:
 		oldpte = *pte;
 		if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
 			atomic_add_long(&pmap_pde_p_failures, 1);
 			CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return;
 		}
 		if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
 			/*
 			 * When PG_M is already clear, PG_RW can be cleared
 			 * without a TLB invalidation.
 			 */
 			if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_RW))
 				goto setpte;
 			oldpte &= ~PG_RW;
 			CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx"
 			    " in pmap %p", (oldpte & PG_FRAME & PDRMASK) |
 			    (va & ~PDRMASK), pmap);
 		}
 		if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) {
 			atomic_add_long(&pmap_pde_p_failures, 1);
 			CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return;
 		}
 		pa -= PAGE_SIZE;
 	}
 
 	/*
 	 * Save the page table page in its current state until the PDE
 	 * mapping the superpage is demoted by pmap_demote_pde() or
 	 * destroyed by pmap_remove_pde(). 
 	 */
 	mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
 	KASSERT(mpte >= vm_page_array &&
 	    mpte < &vm_page_array[vm_page_array_size],
 	    ("pmap_promote_pde: page table page is out of range"));
 	KASSERT(mpte->pindex == pmap_pde_pindex(va),
 	    ("pmap_promote_pde: page table page's pindex is wrong"));
 	if (pmap_insert_pt_page(pmap, mpte)) {
 		atomic_add_long(&pmap_pde_p_failures, 1);
 		CTR2(KTR_PMAP,
 		    "pmap_promote_pde: failure for va %#lx in pmap %p", va,
 		    pmap);
 		return;
 	}
 
 	/*
 	 * Promote the pv entries.
 	 */
 	if ((newpde & PG_MANAGED) != 0)
 		pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME, lockp);
 
 	/*
 	 * Propagate the PAT index to its proper position.
 	 */
 	newpde = pmap_swap_pat(pmap, newpde);
 
 	/*
 	 * Map the superpage.
 	 */
 	if (workaround_erratum383)
 		pmap_update_pde(pmap, va, pde, PG_PS | newpde);
 	else
 		pde_store(pde, PG_PS | newpde);
 
 	atomic_add_long(&pmap_pde_promotions, 1);
 	CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx"
 	    " in pmap %p", va, pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  *
  *	When destroying both a page table and PV entry, this function
  *	performs the TLB invalidation before releasing the PV list
  *	lock, so we do not need pmap_delayed_invl_page() calls here.
  */
 int
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     u_int flags, int8_t psind __unused)
 {
 	struct rwlock *lock;
 	pd_entry_t *pde;
 	pt_entry_t *pte, PG_G, PG_A, PG_M, PG_RW, PG_V;
 	pt_entry_t newpte, origpte;
 	pv_entry_t pv;
 	vm_paddr_t opa, pa;
 	vm_page_t mpte, om;
 	boolean_t nosleep;
 
 	PG_A = pmap_accessed_bit(pmap);
 	PG_G = pmap_global_bit(pmap);
 	PG_M = pmap_modified_bit(pmap);
 	PG_V = pmap_valid_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 
 	va = trunc_page(va);
 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
 	KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
 	    ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)",
 	    va));
 	KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva ||
 	    va >= kmi.clean_eva,
 	    ("pmap_enter: managed mapping within the clean submap"));
 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 	pa = VM_PAGE_TO_PHYS(m);
 	newpte = (pt_entry_t)(pa | PG_A | PG_V);
 	if ((flags & VM_PROT_WRITE) != 0)
 		newpte |= PG_M;
 	if ((prot & VM_PROT_WRITE) != 0)
 		newpte |= PG_RW;
 	KASSERT((newpte & (PG_M | PG_RW)) != PG_M,
 	    ("pmap_enter: flags includes VM_PROT_WRITE but prot doesn't"));
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpte |= pg_nx;
 	if ((flags & PMAP_ENTER_WIRED) != 0)
 		newpte |= PG_W;
 	if (va < VM_MAXUSER_ADDRESS)
 		newpte |= PG_U;
 	if (pmap == kernel_pmap)
 		newpte |= PG_G;
 	newpte |= pmap_cache_bits(pmap, m->md.pat_mode, 0);
 
 	/*
 	 * Set modified bit gratuitously for writeable mappings if
 	 * the page is unmanaged. We do not want to take a fault
 	 * to do the dirty bit accounting for these mappings.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) != 0) {
 		if ((newpte & PG_RW) != 0)
 			newpte |= PG_M;
 	}
 
 	mpte = NULL;
 
 	lock = NULL;
 	PMAP_LOCK(pmap);
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 retry:
 	pde = pmap_pde(pmap, va);
 	if (pde != NULL && (*pde & PG_V) != 0 && ((*pde & PG_PS) == 0 ||
 	    pmap_demote_pde_locked(pmap, pde, va, &lock))) {
 		pte = pmap_pde_to_pte(pde, va);
 		if (va < VM_MAXUSER_ADDRESS && mpte == NULL) {
 			mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
 			mpte->wire_count++;
 		}
 	} else if (va < VM_MAXUSER_ADDRESS) {
 		/*
 		 * Here if the pte page isn't mapped, or if it has been
 		 * deallocated.
 		 */
 		nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
 		mpte = _pmap_allocpte(pmap, pmap_pde_pindex(va),
 		    nosleep ? NULL : &lock);
 		if (mpte == NULL && nosleep) {
 			if (lock != NULL)
 				rw_wunlock(lock);
 			PMAP_UNLOCK(pmap);
 			return (KERN_RESOURCE_SHORTAGE);
 		}
 		goto retry;
 	} else
 		panic("pmap_enter: invalid page directory va=%#lx", va);
 
 	origpte = *pte;
 
 	/*
 	 * Is the specified virtual address already mapped?
 	 */
 	if ((origpte & PG_V) != 0) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if ((newpte & PG_W) != 0 && (origpte & PG_W) == 0)
 			pmap->pm_stats.wired_count++;
 		else if ((newpte & PG_W) == 0 && (origpte & PG_W) != 0)
 			pmap->pm_stats.wired_count--;
 
 		/*
 		 * Remove the extra PT page reference.
 		 */
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			KASSERT(mpte->wire_count > 0,
 			    ("pmap_enter: missing reference to page table page,"
 			     " va: 0x%lx", va));
 		}
 
 		/*
 		 * Has the physical page changed?
 		 */
 		opa = origpte & PG_FRAME;
 		if (opa == pa) {
 			/*
 			 * No, might be a protection or wiring change.
 			 */
 			if ((origpte & PG_MANAGED) != 0) {
 				newpte |= PG_MANAGED;
 				if ((newpte & PG_RW) != 0)
 					vm_page_aflag_set(m, PGA_WRITEABLE);
 			}
 			if (((origpte ^ newpte) & ~(PG_M | PG_A)) == 0)
 				goto unchanged;
 			goto validate;
 		}
 	} else {
 		/*
 		 * Increment the counters.
 		 */
 		if ((newpte & PG_W) != 0)
 			pmap->pm_stats.wired_count++;
 		pmap_resident_count_inc(pmap, 1);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		newpte |= PG_MANAGED;
 		pv = get_pv_entry(pmap, &lock);
 		pv->pv_va = va;
 		CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		m->md.pv_gen++;
 		if ((newpte & PG_RW) != 0)
 			vm_page_aflag_set(m, PGA_WRITEABLE);
 	}
 
 	/*
 	 * Update the PTE.
 	 */
 	if ((origpte & PG_V) != 0) {
 validate:
 		origpte = pte_load_store(pte, newpte);
 		opa = origpte & PG_FRAME;
 		if (opa != pa) {
 			if ((origpte & PG_MANAGED) != 0) {
 				om = PHYS_TO_VM_PAGE(opa);
 				if ((origpte & (PG_M | PG_RW)) == (PG_M |
 				    PG_RW))
 					vm_page_dirty(om);
 				if ((origpte & PG_A) != 0)
 					vm_page_aflag_set(om, PGA_REFERENCED);
 				CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
 				pmap_pvh_free(&om->md, pmap, va);
 				if ((om->aflags & PGA_WRITEABLE) != 0 &&
 				    TAILQ_EMPTY(&om->md.pv_list) &&
 				    ((om->flags & PG_FICTITIOUS) != 0 ||
 				    TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
 					vm_page_aflag_clear(om, PGA_WRITEABLE);
 			}
 		} else if ((newpte & PG_M) == 0 && (origpte & (PG_M |
 		    PG_RW)) == (PG_M | PG_RW)) {
 			if ((origpte & PG_MANAGED) != 0)
 				vm_page_dirty(m);
 
 			/*
 			 * Although the PTE may still have PG_RW set, TLB
 			 * invalidation may nonetheless be required because
 			 * the PTE no longer has PG_M set.
 			 */
 		} else if ((origpte & PG_NX) != 0 || (newpte & PG_NX) == 0) {
 			/*
 			 * This PTE change does not require TLB invalidation.
 			 */
 			goto unchanged;
 		}
 		if ((origpte & PG_A) != 0)
 			pmap_invalidate_page(pmap, va);
 	} else
 		pte_store(pte, newpte);
 
 unchanged:
 
 	/*
 	 * If both the page table page and the reservation are fully
 	 * populated, then attempt promotion.
 	 */
 	if ((mpte == NULL || mpte->wire_count == NPTEPG) &&
 	    pmap_ps_enabled(pmap) &&
 	    (m->flags & PG_FICTITIOUS) == 0 &&
 	    vm_reserv_level_iffullpop(m) == 0)
 		pmap_promote_pde(pmap, pde, va, &lock);
 
 	if (lock != NULL)
 		rw_wunlock(lock);
 	PMAP_UNLOCK(pmap);
 	return (KERN_SUCCESS);
 }
 
 /*
  * Tries to create a 2MB page mapping.  Returns TRUE if successful and FALSE
  * otherwise.  Fails if (1) a page table page cannot be allocated without
  * blocking, (2) a mapping already exists at the specified virtual address, or
  * (3) a pv entry cannot be allocated without reclaiming another pv entry. 
  */
 static boolean_t
 pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     struct rwlock **lockp)
 {
 	pd_entry_t *pde, newpde;
 	pt_entry_t PG_V;
 	vm_page_t mpde;
 	struct spglist free;
 
 	PG_V = pmap_valid_bit(pmap);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	if ((mpde = pmap_allocpde(pmap, va, NULL)) == NULL) {
 		CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return (FALSE);
 	}
 	pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpde));
 	pde = &pde[pmap_pde_index(va)];
 	if ((*pde & PG_V) != 0) {
 		KASSERT(mpde->wire_count > 1,
 		    ("pmap_enter_pde: mpde's wire count is too low"));
 		mpde->wire_count--;
 		CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return (FALSE);
 	}
 	newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 1) |
 	    PG_PS | PG_V;
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		newpde |= PG_MANAGED;
 
 		/*
 		 * Abort this mapping if its PV entry could not be created.
 		 */
 		if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m),
 		    lockp)) {
 			SLIST_INIT(&free);
 			if (pmap_unwire_ptp(pmap, va, mpde, &free)) {
 				/*
 				 * Although "va" is not mapped, paging-
 				 * structure caches could nonetheless have
 				 * entries that refer to the freed page table
 				 * pages.  Invalidate those entries.
 				 */
 				pmap_invalidate_page(pmap, va);
 				pmap_free_zero_pages(&free);
 			}
 			CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return (FALSE);
 		}
 	}
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpde |= pg_nx;
 	if (va < VM_MAXUSER_ADDRESS)
 		newpde |= PG_U;
 
 	/*
 	 * Increment counters.
 	 */
 	pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE);
 
 	/*
 	 * Map the superpage.
 	 */
 	pde_store(pde, newpde);
 
 	atomic_add_long(&pmap_pde_mappings, 1);
 	CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx"
 	    " in pmap %p", va, pmap);
 	return (TRUE);
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	struct rwlock *lock;
 	vm_offset_t va;
 	vm_page_t m, mpte;
 	vm_pindex_t diff, psize;
 
 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
 
 	psize = atop(end - start);
 	mpte = NULL;
 	m = m_start;
 	lock = NULL;
 	PMAP_LOCK(pmap);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		va = start + ptoa(diff);
 		if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
 		    m->psind == 1 && pmap_ps_enabled(pmap) &&
 		    pmap_enter_pde(pmap, va, m, prot, &lock))
 			m = &m[NBPDR / PAGE_SIZE - 1];
 		else
 			mpte = pmap_enter_quick_locked(pmap, va, m, prot,
 			    mpte, &lock);
 		m = TAILQ_NEXT(m, listq);
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * but is *MUCH* faster than pmap_enter...
  */
 
 void
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 	struct rwlock *lock;
 
 	lock = NULL;
 	PMAP_LOCK(pmap);
 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
 	if (lock != NULL)
 		rw_wunlock(lock);
 	PMAP_UNLOCK(pmap);
 }
 
 static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
 {
 	struct spglist free;
 	pt_entry_t *pte, PG_V;
 	vm_paddr_t pa;
 
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->oflags & VPO_UNMANAGED) != 0,
 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 	PG_V = pmap_valid_bit(pmap);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		vm_pindex_t ptepindex;
 		pd_entry_t *ptepa;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		ptepindex = pmap_pde_pindex(va);
 		if (mpte && (mpte->pindex == ptepindex)) {
 			mpte->wire_count++;
 		} else {
 			/*
 			 * Get the page directory entry
 			 */
 			ptepa = pmap_pde(pmap, va);
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.  Otherwise, we
 			 * attempt to allocate a page table page.  If this
 			 * attempt fails, we don't retry.  Instead, we give up.
 			 */
 			if (ptepa && (*ptepa & PG_V) != 0) {
 				if (*ptepa & PG_PS)
 					return (NULL);
 				mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME);
 				mpte->wire_count++;
 			} else {
 				/*
 				 * Pass NULL instead of the PV list lock
 				 * pointer, because we don't intend to sleep.
 				 */
 				mpte = _pmap_allocpte(pmap, ptepindex, NULL);
 				if (mpte == NULL)
 					return (mpte);
 			}
 		}
 		pte = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
 		pte = &pte[pmap_pte_index(va)];
 	} else {
 		mpte = NULL;
 		pte = vtopte(va);
 	}
 	if (*pte) {
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0 &&
 	    !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
 		if (mpte != NULL) {
 			SLIST_INIT(&free);
 			if (pmap_unwire_ptp(pmap, va, mpte, &free)) {
 				/*
 				 * Although "va" is not mapped, paging-
 				 * structure caches could nonetheless have
 				 * entries that refer to the freed page table
 				 * pages.  Invalidate those entries.
 				 */
 				pmap_invalidate_page(pmap, va);
 				pmap_free_zero_pages(&free);
 			}
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap_resident_count_inc(pmap, 1);
 
 	pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 0);
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		pa |= pg_nx;
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		pte_store(pte, pa | PG_V | PG_U);
 	else
 		pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
 	return (mpte);
 }
 
 /*
  * Make a temporary mapping for a physical address.  This is only intended
  * to be used for panic dumps.
  */
 void *
 pmap_kenter_temporary(vm_paddr_t pa, int i)
 {
 	vm_offset_t va;
 
 	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 	pmap_kenter(va, pa);
 	invlpg(va);
 	return ((void *)crashdumpmap);
 }
 
 /*
  * This code maps large physical mmap regions into the
  * processor address space.  Note that some shortcuts
  * are taken, but the code works.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
 	pd_entry_t *pde;
 	pt_entry_t PG_A, PG_M, PG_RW, PG_V;
 	vm_paddr_t pa, ptepa;
 	vm_page_t p, pdpg;
 	int pat_mode;
 
 	PG_A = pmap_accessed_bit(pmap);
 	PG_M = pmap_modified_bit(pmap);
 	PG_V = pmap_valid_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("pmap_object_init_pt: non-device object"));
 	if ((addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
 		if (!pmap_ps_enabled(pmap))
 			return;
 		if (!vm_object_populate(object, pindex, pindex + atop(size)))
 			return;
 		p = vm_page_lookup(object, pindex);
 		KASSERT(p->valid == VM_PAGE_BITS_ALL,
 		    ("pmap_object_init_pt: invalid page %p", p));
 		pat_mode = p->md.pat_mode;
 
 		/*
 		 * Abort the mapping if the first page is not physically
 		 * aligned to a 2MB page boundary.
 		 */
 		ptepa = VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1))
 			return;
 
 		/*
 		 * Skip the first page.  Abort the mapping if the rest of
 		 * the pages are not physically contiguous or have differing
 		 * memory attributes.
 		 */
 		p = TAILQ_NEXT(p, listq);
 		for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
 		    pa += PAGE_SIZE) {
 			KASSERT(p->valid == VM_PAGE_BITS_ALL,
 			    ("pmap_object_init_pt: invalid page %p", p));
 			if (pa != VM_PAGE_TO_PHYS(p) ||
 			    pat_mode != p->md.pat_mode)
 				return;
 			p = TAILQ_NEXT(p, listq);
 		}
 
 		/*
 		 * Map using 2MB pages.  Since "ptepa" is 2M aligned and
 		 * "size" is a multiple of 2M, adding the PAT setting to "pa"
 		 * will not affect the termination of this loop.
 		 */ 
 		PMAP_LOCK(pmap);
 		for (pa = ptepa | pmap_cache_bits(pmap, pat_mode, 1);
 		    pa < ptepa + size; pa += NBPDR) {
 			pdpg = pmap_allocpde(pmap, addr, NULL);
 			if (pdpg == NULL) {
 				/*
 				 * The creation of mappings below is only an
 				 * optimization.  If a page directory page
 				 * cannot be allocated without blocking,
 				 * continue on to the next mapping rather than
 				 * blocking.
 				 */
 				addr += NBPDR;
 				continue;
 			}
 			pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg));
 			pde = &pde[pmap_pde_index(addr)];
 			if ((*pde & PG_V) == 0) {
 				pde_store(pde, pa | PG_PS | PG_M | PG_A |
 				    PG_U | PG_RW | PG_V);
 				pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE);
 				atomic_add_long(&pmap_pde_mappings, 1);
 			} else {
 				/* Continue on if the PDE is already valid. */
 				pdpg->wire_count--;
 				KASSERT(pdpg->wire_count > 0,
 				    ("pmap_object_init_pt: missing reference "
 				    "to page directory page, va: 0x%lx", addr));
 			}
 			addr += NBPDR;
 		}
 		PMAP_UNLOCK(pmap);
 	}
 }
 
 /*
  *	Clear the wired attribute from the mappings for the specified range of
  *	addresses in the given pmap.  Every valid mapping within that range
  *	must have the wired attribute set.  In contrast, invalid mappings
  *	cannot have the wired attribute set, so they are ignored.
  *
  *	The wired attribute of the page table entry is not a hardware
  *	feature, so there is no need to invalidate any TLB entries.
  *	Since pmap_demote_pde() for the wired entry must never fail,
  *	pmap_delayed_invl_started()/finished() calls around the
  *	function are not needed.
  */
 void
 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t va_next;
 	pml4_entry_t *pml4e;
 	pdp_entry_t *pdpe;
 	pd_entry_t *pde;
 	pt_entry_t *pte, PG_V;
 
 	PG_V = pmap_valid_bit(pmap);
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 		pml4e = pmap_pml4e(pmap, sva);
 		if ((*pml4e & PG_V) == 0) {
 			va_next = (sva + NBPML4) & ~PML4MASK;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 		pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
 		if ((*pdpe & PG_V) == 0) {
 			va_next = (sva + NBPDP) & ~PDPMASK;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 		va_next = (sva + NBPDR) & ~PDRMASK;
 		if (va_next < sva)
 			va_next = eva;
 		pde = pmap_pdpe_to_pde(pdpe, sva);
 		if ((*pde & PG_V) == 0)
 			continue;
 		if ((*pde & PG_PS) != 0) {
 			if ((*pde & PG_W) == 0)
 				panic("pmap_unwire: pde %#jx is missing PG_W",
 				    (uintmax_t)*pde);
 
 			/*
 			 * Are we unwiring the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + NBPDR == va_next && eva >= va_next) {
 				atomic_clear_long(pde, PG_W);
 				pmap->pm_stats.wired_count -= NBPDR /
 				    PAGE_SIZE;
 				continue;
 			} else if (!pmap_demote_pde(pmap, pde, sva))
 				panic("pmap_unwire: demotion failed");
 		}
 		if (va_next > eva)
 			va_next = eva;
 		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
 		    sva += PAGE_SIZE) {
 			if ((*pte & PG_V) == 0)
 				continue;
 			if ((*pte & PG_W) == 0)
 				panic("pmap_unwire: pte %#jx is missing PG_W",
 				    (uintmax_t)*pte);
 
 			/*
 			 * PG_W must be cleared atomically.  Although the pmap
 			 * lock synchronizes access to PG_W, another processor
 			 * could be setting PG_M and/or PG_A concurrently.
 			 */
 			atomic_clear_long(pte, PG_W);
 			pmap->pm_stats.wired_count--;
 		}
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
     vm_offset_t src_addr)
 {
 	struct rwlock *lock;
 	struct spglist free;
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t va_next;
 	pt_entry_t PG_A, PG_M, PG_V;
 
 	if (dst_addr != src_addr)
 		return;
 
 	if (dst_pmap->pm_type != src_pmap->pm_type)
 		return;
 
 	/*
 	 * EPT page table entries that require emulation of A/D bits are
 	 * sensitive to clearing the PG_A bit (aka EPT_PG_READ). Although
 	 * we clear PG_M (aka EPT_PG_WRITE) concomitantly, the PG_U bit
 	 * (aka EPT_PG_EXECUTE) could still be set. Since some EPT
 	 * implementations flag an EPT misconfiguration for exec-only
 	 * mappings we skip this function entirely for emulated pmaps.
 	 */
 	if (pmap_emulate_ad_bits(dst_pmap))
 		return;
 
 	lock = NULL;
 	if (dst_pmap < src_pmap) {
 		PMAP_LOCK(dst_pmap);
 		PMAP_LOCK(src_pmap);
 	} else {
 		PMAP_LOCK(src_pmap);
 		PMAP_LOCK(dst_pmap);
 	}
 
 	PG_A = pmap_accessed_bit(dst_pmap);
 	PG_M = pmap_modified_bit(dst_pmap);
 	PG_V = pmap_valid_bit(dst_pmap);
 
 	for (addr = src_addr; addr < end_addr; addr = va_next) {
 		pt_entry_t *src_pte, *dst_pte;
 		vm_page_t dstmpde, dstmpte, srcmpte;
 		pml4_entry_t *pml4e;
 		pdp_entry_t *pdpe;
 		pd_entry_t srcptepaddr, *pde;
 
 		KASSERT(addr < UPT_MIN_ADDRESS,
 		    ("pmap_copy: invalid to pmap_copy page tables"));
 
 		pml4e = pmap_pml4e(src_pmap, addr);
 		if ((*pml4e & PG_V) == 0) {
 			va_next = (addr + NBPML4) & ~PML4MASK;
 			if (va_next < addr)
 				va_next = end_addr;
 			continue;
 		}
 
 		pdpe = pmap_pml4e_to_pdpe(pml4e, addr);
 		if ((*pdpe & PG_V) == 0) {
 			va_next = (addr + NBPDP) & ~PDPMASK;
 			if (va_next < addr)
 				va_next = end_addr;
 			continue;
 		}
 
 		va_next = (addr + NBPDR) & ~PDRMASK;
 		if (va_next < addr)
 			va_next = end_addr;
 
 		pde = pmap_pdpe_to_pde(pdpe, addr);
 		srcptepaddr = *pde;
 		if (srcptepaddr == 0)
 			continue;
 			
 		if (srcptepaddr & PG_PS) {
 			if ((addr & PDRMASK) != 0 || addr + NBPDR > end_addr)
 				continue;
 			dstmpde = pmap_allocpde(dst_pmap, addr, NULL);
 			if (dstmpde == NULL)
 				break;
 			pde = (pd_entry_t *)
 			    PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpde));
 			pde = &pde[pmap_pde_index(addr)];
 			if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 ||
 			    pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
 			    PG_PS_FRAME, &lock))) {
 				*pde = srcptepaddr & ~PG_W;
 				pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE);
 				atomic_add_long(&pmap_pde_mappings, 1);
 			} else
 				dstmpde->wire_count--;
 			continue;
 		}
 
 		srcptepaddr &= PG_FRAME;
 		srcmpte = PHYS_TO_VM_PAGE(srcptepaddr);
 		KASSERT(srcmpte->wire_count > 0,
 		    ("pmap_copy: source page table page is unused"));
 
 		if (va_next > end_addr)
 			va_next = end_addr;
 
 		src_pte = (pt_entry_t *)PHYS_TO_DMAP(srcptepaddr);
 		src_pte = &src_pte[pmap_pte_index(addr)];
 		dstmpte = NULL;
 		while (addr < va_next) {
 			pt_entry_t ptetemp;
 			ptetemp = *src_pte;
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				if (dstmpte != NULL &&
 				    dstmpte->pindex == pmap_pde_pindex(addr))
 					dstmpte->wire_count++;
 				else if ((dstmpte = pmap_allocpte(dst_pmap,
 				    addr, NULL)) == NULL)
 					goto out;
 				dst_pte = (pt_entry_t *)
 				    PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte));
 				dst_pte = &dst_pte[pmap_pte_index(addr)];
 				if (*dst_pte == 0 &&
 				    pmap_try_insert_pv_entry(dst_pmap, addr,
 				    PHYS_TO_VM_PAGE(ptetemp & PG_FRAME),
 				    &lock)) {
 					/*
 					 * Clear the wired, modified, and
 					 * accessed (referenced) bits
 					 * during the copy.
 					 */
 					*dst_pte = ptetemp & ~(PG_W | PG_M |
 					    PG_A);
 					pmap_resident_count_inc(dst_pmap, 1);
 				} else {
 					SLIST_INIT(&free);
 					if (pmap_unwire_ptp(dst_pmap, addr,
 					    dstmpte, &free)) {
 						/*
 						 * Although "addr" is not
 						 * mapped, paging-structure
 						 * caches could nonetheless
 						 * have entries that refer to
 						 * the freed page table pages.
 						 * Invalidate those entries.
 						 */
 						pmap_invalidate_page(dst_pmap,
 						    addr);
 						pmap_free_zero_pages(&free);
 					}
 					goto out;
 				}
 				if (dstmpte->wire_count >= srcmpte->wire_count)
 					break;
 			}
 			addr += PAGE_SIZE;
 			src_pte++;
 		}
 	}
 out:
 	if (lock != NULL)
 		rw_wunlock(lock);
 	PMAP_UNLOCK(src_pmap);
 	PMAP_UNLOCK(dst_pmap);
 }
 
 /*
- *	pmap_zero_page zeros the specified hardware page by mapping
- *	the page into KVM and using bzero to clear its contents.
+ * Zero the specified hardware page.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	pagezero((void *)va);
 }
 
 /*
- *	pmap_zero_page_area zeros the specified hardware page by mapping 
- *	the page into KVM and using bzero to clear its contents.
- *
- *	off and size may not cover an area beyond a single hardware page.
+ * Zero an an area within a single hardware page.  off and size must not
+ * cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
 	if (off == 0 && size == PAGE_SIZE)
 		pagezero((void *)va);
 	else
 		bzero((char *)va + off, size);
 }
 
 /*
- *	pmap_zero_page_idle zeros the specified hardware page by mapping 
- *	the page into KVM and using bzero to clear its contents.  This
- *	is intended to be called from the vm_pagezero process only and
- *	outside of Giant.
+ * Zero the specified hardware page in a way that minimizes cache thrashing.
+ * This is intended to be called from the vm_pagezero process only and
+ * outside of Giant.
  */
 void
 pmap_zero_page_idle(vm_page_t m)
 {
 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
 
-	pagezero((void *)va);
+	sse2_pagezero((void *)va);
 }
 
 /*
- *	pmap_copy_page copies the specified (machine independent)
- *	page by mapping the page into virtual memory and using
- *	bcopy to copy the page, one machine dependent page at a
- *	time.
+ * Copy 1 specified hardware page to another.
  */
 void
 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
 {
 	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
 	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
 
 	pagecopy((void *)src, (void *)dst);
 }
 
 int unmapped_buf_allowed = 1;
 
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)
 {
 	void *a_cp, *b_cp;
 	vm_page_t pages[2];
 	vm_offset_t vaddr[2], a_pg_offset, b_pg_offset;
 	int cnt;
 	boolean_t mapped;
 
 	while (xfersize > 0) {
 		a_pg_offset = a_offset & PAGE_MASK;
 		pages[0] = ma[a_offset >> PAGE_SHIFT];
 		b_pg_offset = b_offset & PAGE_MASK;
 		pages[1] = mb[b_offset >> PAGE_SHIFT];
 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 		mapped = pmap_map_io_transient(pages, vaddr, 2, FALSE);
 		a_cp = (char *)vaddr[0] + a_pg_offset;
 		b_cp = (char *)vaddr[1] + b_pg_offset;
 		bcopy(a_cp, b_cp, cnt);
 		if (__predict_false(mapped))
 			pmap_unmap_io_transient(pages, vaddr, 2, FALSE);
 		a_offset += cnt;
 		b_offset += cnt;
 		xfersize -= cnt;
 	}
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 {
 	struct md_page *pvh;
 	struct rwlock *lock;
 	pv_entry_t pv;
 	int loops = 0;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_page_exists_quick: page %p is not managed", m));
 	rv = FALSE;
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		if (PV_PMAP(pv) == pmap) {
 			rv = TRUE;
 			break;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 			if (PV_PMAP(pv) == pmap) {
 				rv = TRUE;
 				break;
 			}
 			loops++;
 			if (loops >= 16)
 				break;
 		}
 	}
 	rw_runlock(lock);
 	return (rv);
 }
 
 /*
  *	pmap_page_wired_mappings:
  *
  *	Return the number of managed mappings to the given physical page
  *	that are wired.
  */
 int
 pmap_page_wired_mappings(vm_page_t m)
 {
 	struct rwlock *lock;
 	struct md_page *pvh;
 	pmap_t pmap;
 	pt_entry_t *pte;
 	pv_entry_t pv;
 	int count, md_gen, pvh_gen;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (0);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 restart:
 	count = 0;
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_runlock(lock);
 			PMAP_LOCK(pmap);
 			rw_rlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		pte = pmap_pte(pmap, pv->pv_va);
 		if ((*pte & PG_W) != 0)
 			count++;
 		PMAP_UNLOCK(pmap);
 	}
 	if ((m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 			pmap = PV_PMAP(pv);
 			if (!PMAP_TRYLOCK(pmap)) {
 				md_gen = m->md.pv_gen;
 				pvh_gen = pvh->pv_gen;
 				rw_runlock(lock);
 				PMAP_LOCK(pmap);
 				rw_rlock(lock);
 				if (md_gen != m->md.pv_gen ||
 				    pvh_gen != pvh->pv_gen) {
 					PMAP_UNLOCK(pmap);
 					goto restart;
 				}
 			}
 			pte = pmap_pde(pmap, pv->pv_va);
 			if ((*pte & PG_W) != 0)
 				count++;
 			PMAP_UNLOCK(pmap);
 		}
 	}
 	rw_runlock(lock);
 	return (count);
 }
 
 /*
  * Returns TRUE if the given page is mapped individually or as part of
  * a 2mpage.  Otherwise, returns FALSE.
  */
 boolean_t
 pmap_page_is_mapped(vm_page_t m)
 {
 	struct rwlock *lock;
 	boolean_t rv;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (FALSE);
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 	rv = !TAILQ_EMPTY(&m->md.pv_list) ||
 	    ((m->flags & PG_FICTITIOUS) == 0 &&
 	    !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
 	rw_runlock(lock);
 	return (rv);
 }
 
 /*
  * Destroy all managed, non-wired mappings in the given user-space
  * pmap.  This pmap cannot be active on any processor besides the
  * caller.
  *
  * This function cannot be applied to the kernel pmap.  Moreover, it
  * is not intended for general use.  It is only to be used during
  * process termination.  Consequently, it can be implemented in ways
  * that make it faster than pmap_remove().  First, it can more quickly
  * destroy mappings by iterating over the pmap's collection of PV
  * entries, rather than searching the page table.  Second, it doesn't
  * have to test and clear the page table entries atomically, because
  * no processor is currently accessing the user address space.  In
  * particular, a page table entry's dirty bit won't change state once
  * this function starts.
  */
 void
 pmap_remove_pages(pmap_t pmap)
 {
 	pd_entry_t ptepde;
 	pt_entry_t *pte, tpte;
 	pt_entry_t PG_M, PG_RW, PG_V;
 	struct spglist free;
 	vm_page_t m, mpte, mt;
 	pv_entry_t pv;
 	struct md_page *pvh;
 	struct pv_chunk *pc, *npc;
 	struct rwlock *lock;
 	int64_t bit;
 	uint64_t inuse, bitmask;
 	int allfree, field, freed, idx;
 	boolean_t superpage;
 	vm_paddr_t pa;
 
 	/*
 	 * Assert that the given pmap is only active on the current
 	 * CPU.  Unfortunately, we cannot block another CPU from
 	 * activating the pmap while this function is executing.
 	 */
 	KASSERT(pmap == PCPU_GET(curpmap), ("non-current pmap %p", pmap));
 #ifdef INVARIANTS
 	{
 		cpuset_t other_cpus;
 
 		other_cpus = all_cpus;
 		critical_enter();
 		CPU_CLR(PCPU_GET(cpuid), &other_cpus);
 		CPU_AND(&other_cpus, &pmap->pm_active);
 		critical_exit();
 		KASSERT(CPU_EMPTY(&other_cpus), ("pmap active %p", pmap));
 	}
 #endif
 
 	lock = NULL;
 	PG_M = pmap_modified_bit(pmap);
 	PG_V = pmap_valid_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 
 	SLIST_INIT(&free);
 	PMAP_LOCK(pmap);
 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 		allfree = 1;
 		freed = 0;
 		for (field = 0; field < _NPCM; field++) {
 			inuse = ~pc->pc_map[field] & pc_freemask[field];
 			while (inuse != 0) {
 				bit = bsfq(inuse);
 				bitmask = 1UL << bit;
 				idx = field * 64 + bit;
 				pv = &pc->pc_pventry[idx];
 				inuse &= ~bitmask;
 
 				pte = pmap_pdpe(pmap, pv->pv_va);
 				ptepde = *pte;
 				pte = pmap_pdpe_to_pde(pte, pv->pv_va);
 				tpte = *pte;
 				if ((tpte & (PG_PS | PG_V)) == PG_V) {
 					superpage = FALSE;
 					ptepde = tpte;
 					pte = (pt_entry_t *)PHYS_TO_DMAP(tpte &
 					    PG_FRAME);
 					pte = &pte[pmap_pte_index(pv->pv_va)];
 					tpte = *pte;
 				} else {
 					/*
 					 * Keep track whether 'tpte' is a
 					 * superpage explicitly instead of
 					 * relying on PG_PS being set.
 					 *
 					 * This is because PG_PS is numerically
 					 * identical to PG_PTE_PAT and thus a
 					 * regular page could be mistaken for
 					 * a superpage.
 					 */
 					superpage = TRUE;
 				}
 
 				if ((tpte & PG_V) == 0) {
 					panic("bad pte va %lx pte %lx",
 					    pv->pv_va, tpte);
 				}
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 				if (tpte & PG_W) {
 					allfree = 0;
 					continue;
 				}
 
 				if (superpage)
 					pa = tpte & PG_PS_FRAME;
 				else
 					pa = tpte & PG_FRAME;
 
 				m = PHYS_TO_VM_PAGE(pa);
 				KASSERT(m->phys_addr == pa,
 				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 				    m, (uintmax_t)m->phys_addr,
 				    (uintmax_t)tpte));
 
 				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 				    m < &vm_page_array[vm_page_array_size],
 				    ("pmap_remove_pages: bad tpte %#jx",
 				    (uintmax_t)tpte));
 
 				pte_clear(pte);
 
 				/*
 				 * Update the vm_page_t clean/reference bits.
 				 */
 				if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 					if (superpage) {
 						for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
 							vm_page_dirty(mt);
 					} else
 						vm_page_dirty(m);
 				}
 
 				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
 
 				/* Mark free */
 				pc->pc_map[field] |= bitmask;
 				if (superpage) {
 					pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE);
 					pvh = pa_to_pvh(tpte & PG_PS_FRAME);
 					TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 					pvh->pv_gen++;
 					if (TAILQ_EMPTY(&pvh->pv_list)) {
 						for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
 							if ((mt->aflags & PGA_WRITEABLE) != 0 &&
 							    TAILQ_EMPTY(&mt->md.pv_list))
 								vm_page_aflag_clear(mt, PGA_WRITEABLE);
 					}
 					mpte = pmap_lookup_pt_page(pmap, pv->pv_va);
 					if (mpte != NULL) {
 						pmap_remove_pt_page(pmap, mpte);
 						pmap_resident_count_dec(pmap, 1);
 						KASSERT(mpte->wire_count == NPTEPG,
 						    ("pmap_remove_pages: pte page wire count error"));
 						mpte->wire_count = 0;
 						pmap_add_delayed_free_list(mpte, &free, FALSE);
 						atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 					}
 				} else {
 					pmap_resident_count_dec(pmap, 1);
 					TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 					m->md.pv_gen++;
 					if ((m->aflags & PGA_WRITEABLE) != 0 &&
 					    TAILQ_EMPTY(&m->md.pv_list) &&
 					    (m->flags & PG_FICTITIOUS) == 0) {
 						pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 						if (TAILQ_EMPTY(&pvh->pv_list))
 							vm_page_aflag_clear(m, PGA_WRITEABLE);
 					}
 				}
 				pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free);
 				freed++;
 			}
 		}
 		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
 		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
 		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
 		if (allfree) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			free_pv_chunk(pc);
 		}
 	}
 	if (lock != NULL)
 		rw_wunlock(lock);
 	pmap_invalidate_all(pmap);
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(&free);
 }
 
 static boolean_t
 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
 {
 	struct rwlock *lock;
 	pv_entry_t pv;
 	struct md_page *pvh;
 	pt_entry_t *pte, mask;
 	pt_entry_t PG_A, PG_M, PG_RW, PG_V;
 	pmap_t pmap;
 	int md_gen, pvh_gen;
 	boolean_t rv;
 
 	rv = FALSE;
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_rlock(lock);
 restart:
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			rw_runlock(lock);
 			PMAP_LOCK(pmap);
 			rw_rlock(lock);
 			if (md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		pte = pmap_pte(pmap, pv->pv_va);
 		mask = 0;
 		if (modified) {
 			PG_M = pmap_modified_bit(pmap);
 			PG_RW = pmap_rw_bit(pmap);
 			mask |= PG_RW | PG_M;
 		}
 		if (accessed) {
 			PG_A = pmap_accessed_bit(pmap);
 			PG_V = pmap_valid_bit(pmap);
 			mask |= PG_V | PG_A;
 		}
 		rv = (*pte & mask) == mask;
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			goto out;
 	}
 	if ((m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 			pmap = PV_PMAP(pv);
 			if (!PMAP_TRYLOCK(pmap)) {
 				md_gen = m->md.pv_gen;
 				pvh_gen = pvh->pv_gen;
 				rw_runlock(lock);
 				PMAP_LOCK(pmap);
 				rw_rlock(lock);
 				if (md_gen != m->md.pv_gen ||
 				    pvh_gen != pvh->pv_gen) {
 					PMAP_UNLOCK(pmap);
 					goto restart;
 				}
 			}
 			pte = pmap_pde(pmap, pv->pv_va);
 			mask = 0;
 			if (modified) {
 				PG_M = pmap_modified_bit(pmap);
 				PG_RW = pmap_rw_bit(pmap);
 				mask |= PG_RW | PG_M;
 			}
 			if (accessed) {
 				PG_A = pmap_accessed_bit(pmap);
 				PG_V = pmap_valid_bit(pmap);
 				mask |= PG_V | PG_A;
 			}
 			rv = (*pte & mask) == mask;
 			PMAP_UNLOCK(pmap);
 			if (rv)
 				goto out;
 		}
 	}
 out:
 	rw_runlock(lock);
 	return (rv);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_modified: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	return (pmap_page_test_mappings(m, FALSE, TRUE));
 }
 
 /*
  *	pmap_is_prefaultable:
  *
  *	Return whether or not the specified virtual address is eligible
  *	for prefault.
  */
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte, PG_V;
 	boolean_t rv;
 
 	PG_V = pmap_valid_bit(pmap);
 	rv = FALSE;
 	PMAP_LOCK(pmap);
 	pde = pmap_pde(pmap, addr);
 	if (pde != NULL && (*pde & (PG_PS | PG_V)) == PG_V) {
 		pte = pmap_pde_to_pte(pde, addr);
 		rv = (*pte & PG_V) == 0;
 	}
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	in any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_referenced: page %p is not managed", m));
 	return (pmap_page_test_mappings(m, TRUE, FALSE));
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 void
 pmap_remove_write(vm_page_t m)
 {
 	struct md_page *pvh;
 	pmap_t pmap;
 	struct rwlock *lock;
 	pv_entry_t next_pv, pv;
 	pd_entry_t *pde;
 	pt_entry_t oldpte, *pte, PG_M, PG_RW;
 	vm_offset_t va;
 	int pvh_gen, md_gen;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * set by another thread while the object is locked.  Thus,
 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 	    pa_to_pvh(VM_PAGE_TO_PHYS(m));
 retry_pv_loop:
 	rw_wlock(lock);
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen) {
 				PMAP_UNLOCK(pmap);
 				rw_wunlock(lock);
 				goto retry_pv_loop;
 			}
 		}
 		PG_RW = pmap_rw_bit(pmap);
 		va = pv->pv_va;
 		pde = pmap_pde(pmap, va);
 		if ((*pde & PG_RW) != 0)
 			(void)pmap_demote_pde_locked(pmap, pde, va, &lock);
 		KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
 		    ("inconsistent pv lock %p %p for page %p",
 		    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
 		PMAP_UNLOCK(pmap);
 	}
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen ||
 			    md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				rw_wunlock(lock);
 				goto retry_pv_loop;
 			}
 		}
 		PG_M = pmap_modified_bit(pmap);
 		PG_RW = pmap_rw_bit(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0,
 		    ("pmap_remove_write: found a 2mpage in page %p's pv list",
 		    m));
 		pte = pmap_pde_to_pte(pde, pv->pv_va);
 retry:
 		oldpte = *pte;
 		if (oldpte & PG_RW) {
 			if (!atomic_cmpset_long(pte, oldpte, oldpte &
 			    ~(PG_RW | PG_M)))
 				goto retry;
 			if ((oldpte & PG_M) != 0)
 				vm_page_dirty(m);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	rw_wunlock(lock);
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	pmap_delayed_invl_wait(m);
 }
 
 static __inline boolean_t
 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
 {
 
 	if (!pmap_emulate_ad_bits(pmap))
 		return (TRUE);
 
 	KASSERT(pmap->pm_type == PT_EPT, ("invalid pm_type %d", pmap->pm_type));
 
 	/*
 	 * XWR = 010 or 110 will cause an unconditional EPT misconfiguration
 	 * so we don't let the referenced (aka EPT_PG_READ) bit to be cleared
 	 * if the EPT_PG_WRITE bit is set.
 	 */
 	if ((pte & EPT_PG_WRITE) != 0)
 		return (FALSE);
 
 	/*
 	 * XWR = 100 is allowed only if the PMAP_SUPPORTS_EXEC_ONLY is set.
 	 */
 	if ((pte & EPT_PG_EXECUTE) == 0 ||
 	    ((pmap->pm_flags & PMAP_SUPPORTS_EXEC_ONLY) != 0))
 		return (TRUE);
 	else
 		return (FALSE);
 }
 
 #define	PMAP_TS_REFERENCED_MAX	5
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	XXX: The exact number of bits to check and clear is a matter that
  *	should be tested and standardized at some point in the future for
  *	optimal aging of shared pages.
  *
  *	A DI block is not needed within this function, because
  *	invalidations are performed before the PV list lock is
  *	released.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv, pvf;
 	pmap_t pmap;
 	struct rwlock *lock;
 	pd_entry_t oldpde, *pde;
 	pt_entry_t *pte, PG_A;
 	vm_offset_t va;
 	vm_paddr_t pa;
 	int cleared, md_gen, not_cleared, pvh_gen;
 	struct spglist free;
 	boolean_t demoted;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_ts_referenced: page %p is not managed", m));
 	SLIST_INIT(&free);
 	cleared = 0;
 	pa = VM_PAGE_TO_PHYS(m);
 	lock = PHYS_TO_PV_LIST_LOCK(pa);
 	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa);
 	rw_wlock(lock);
 retry:
 	not_cleared = 0;
 	if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
 		goto small_mappings;
 	pv = pvf;
 	do {
 		if (pvf == NULL)
 			pvf = pv;
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto retry;
 			}
 		}
 		PG_A = pmap_accessed_bit(pmap);
 		va = pv->pv_va;
 		pde = pmap_pde(pmap, pv->pv_va);
 		oldpde = *pde;
 		if ((*pde & PG_A) != 0) {
 			/*
 			 * Since this reference bit is shared by 512 4KB
 			 * pages, it should not be cleared every time it is
 			 * tested.  Apply a simple "hash" function on the
 			 * physical page number, the virtual superpage number,
 			 * and the pmap address to select one 4KB page out of
 			 * the 512 on which testing the reference bit will
 			 * result in clearing that reference bit.  This
 			 * function is designed to avoid the selection of the
 			 * same 4KB page for every 2MB page mapping.
 			 *
 			 * On demotion, a mapping that hasn't been referenced
 			 * is simply destroyed.  To avoid the possibility of a
 			 * subsequent page fault on a demoted wired mapping,
 			 * always leave its reference bit set.  Moreover,
 			 * since the superpage is wired, the current state of
 			 * its reference bit won't affect page replacement.
 			 */
 			if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PDRSHIFT) ^
 			    (uintptr_t)pmap) & (NPTEPG - 1)) == 0 &&
 			    (*pde & PG_W) == 0) {
 				if (safe_to_clear_referenced(pmap, oldpde)) {
 					atomic_clear_long(pde, PG_A);
 					pmap_invalidate_page(pmap, pv->pv_va);
 					demoted = FALSE;
 				} else if (pmap_demote_pde_locked(pmap, pde,
 				    pv->pv_va, &lock)) {
 					/*
 					 * Remove the mapping to a single page
 					 * so that a subsequent access may
 					 * repromote.  Since the underlying
 					 * page table page is fully populated,
 					 * this removal never frees a page
 					 * table page.
 					 */
 					demoted = TRUE;
 					va += VM_PAGE_TO_PHYS(m) - (oldpde &
 					    PG_PS_FRAME);
 					pte = pmap_pde_to_pte(pde, va);
 					pmap_remove_pte(pmap, pte, va, *pde,
 					    NULL, &lock);
 					pmap_invalidate_page(pmap, va);
 				} else
 					demoted = TRUE;
 
 				if (demoted) {
 					/*
 					 * The superpage mapping was removed
 					 * entirely and therefore 'pv' is no
 					 * longer valid.
 					 */
 					if (pvf == pv)
 						pvf = NULL;
 					pv = NULL;
 				}
 				cleared++;
 				KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
 				    ("inconsistent pv lock %p %p for page %p",
 				    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
 			} else
 				not_cleared++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 			pvh->pv_gen++;
 		}
 		if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX)
 			goto out;
 	} while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
 small_mappings:
 	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
 		goto out;
 	pv = pvf;
 	do {
 		if (pvf == NULL)
 			pvf = pv;
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			md_gen = m->md.pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto retry;
 			}
 		}
 		PG_A = pmap_accessed_bit(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0,
 		    ("pmap_ts_referenced: found a 2mpage in page %p's pv list",
 		    m));
 		pte = pmap_pde_to_pte(pde, pv->pv_va);
 		if ((*pte & PG_A) != 0) {
 			if (safe_to_clear_referenced(pmap, *pte)) {
 				atomic_clear_long(pte, PG_A);
 				pmap_invalidate_page(pmap, pv->pv_va);
 				cleared++;
 			} else if ((*pte & PG_W) == 0) {
 				/*
 				 * Wired pages cannot be paged out so
 				 * doing accessed bit emulation for
 				 * them is wasted effort. We do the
 				 * hard work for unwired pages only.
 				 */
 				pmap_remove_pte(pmap, pte, pv->pv_va,
 				    *pde, &free, &lock);
 				pmap_invalidate_page(pmap, pv->pv_va);
 				cleared++;
 				if (pvf == pv)
 					pvf = NULL;
 				pv = NULL;
 				KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
 				    ("inconsistent pv lock %p %p for page %p",
 				    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
 			} else
 				not_cleared++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 			m->md.pv_gen++;
 		}
 	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
 	    not_cleared < PMAP_TS_REFERENCED_MAX);
 out:
 	rw_wunlock(lock);
 	pmap_free_zero_pages(&free);
 	return (cleared + not_cleared);
 }
 
 /*
  *	Apply the given advice to the specified range of addresses within the
  *	given pmap.  Depending on the advice, clear the referenced and/or
  *	modified flags in each mapping and set the mapped page's dirty field.
  */
 void
 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 {
 	struct rwlock *lock;
 	pml4_entry_t *pml4e;
 	pdp_entry_t *pdpe;
 	pd_entry_t oldpde, *pde;
 	pt_entry_t *pte, PG_A, PG_G, PG_M, PG_RW, PG_V;
 	vm_offset_t va_next;
 	vm_page_t m;
 	boolean_t anychanged;
 
 	if (advice != MADV_DONTNEED && advice != MADV_FREE)
 		return;
 
 	/*
 	 * A/D bit emulation requires an alternate code path when clearing
 	 * the modified and accessed bits below. Since this function is
 	 * advisory in nature we skip it entirely for pmaps that require
 	 * A/D bit emulation.
 	 */
 	if (pmap_emulate_ad_bits(pmap))
 		return;
 
 	PG_A = pmap_accessed_bit(pmap);
 	PG_G = pmap_global_bit(pmap);
 	PG_M = pmap_modified_bit(pmap);
 	PG_V = pmap_valid_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 	anychanged = FALSE;
 	pmap_delayed_invl_started();
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = va_next) {
 		pml4e = pmap_pml4e(pmap, sva);
 		if ((*pml4e & PG_V) == 0) {
 			va_next = (sva + NBPML4) & ~PML4MASK;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 		pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
 		if ((*pdpe & PG_V) == 0) {
 			va_next = (sva + NBPDP) & ~PDPMASK;
 			if (va_next < sva)
 				va_next = eva;
 			continue;
 		}
 		va_next = (sva + NBPDR) & ~PDRMASK;
 		if (va_next < sva)
 			va_next = eva;
 		pde = pmap_pdpe_to_pde(pdpe, sva);
 		oldpde = *pde;
 		if ((oldpde & PG_V) == 0)
 			continue;
 		else if ((oldpde & PG_PS) != 0) {
 			if ((oldpde & PG_MANAGED) == 0)
 				continue;
 			lock = NULL;
 			if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) {
 				if (lock != NULL)
 					rw_wunlock(lock);
 
 				/*
 				 * The large page mapping was destroyed.
 				 */
 				continue;
 			}
 
 			/*
 			 * Unless the page mappings are wired, remove the
 			 * mapping to a single page so that a subsequent
 			 * access may repromote.  Since the underlying page
 			 * table page is fully populated, this removal never
 			 * frees a page table page.
 			 */
 			if ((oldpde & PG_W) == 0) {
 				pte = pmap_pde_to_pte(pde, sva);
 				KASSERT((*pte & PG_V) != 0,
 				    ("pmap_advise: invalid PTE"));
 				pmap_remove_pte(pmap, pte, sva, *pde, NULL,
 				    &lock);
 				anychanged = TRUE;
 			}
 			if (lock != NULL)
 				rw_wunlock(lock);
 		}
 		if (va_next > eva)
 			va_next = eva;
 		for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
 		    sva += PAGE_SIZE) {
 			if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED |
 			    PG_V))
 				continue;
 			else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 				if (advice == MADV_DONTNEED) {
 					/*
 					 * Future calls to pmap_is_modified()
 					 * can be avoided by making the page
 					 * dirty now.
 					 */
 					m = PHYS_TO_VM_PAGE(*pte & PG_FRAME);
 					vm_page_dirty(m);
 				}
 				atomic_clear_long(pte, PG_M | PG_A);
 			} else if ((*pte & PG_A) != 0)
 				atomic_clear_long(pte, PG_A);
 			else
 				continue;
 			if ((*pte & PG_G) != 0)
 				pmap_invalidate_page(pmap, sva);
 			else
 				anychanged = TRUE;
 		}
 	}
 	if (anychanged)
 		pmap_invalidate_all(pmap);
 	PMAP_UNLOCK(pmap);
 	pmap_delayed_invl_finished();
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	struct md_page *pvh;
 	pmap_t pmap;
 	pv_entry_t next_pv, pv;
 	pd_entry_t oldpde, *pde;
 	pt_entry_t oldpte, *pte, PG_M, PG_RW, PG_V;
 	struct rwlock *lock;
 	vm_offset_t va;
 	int md_gen, pvh_gen;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	KASSERT(!vm_page_xbusied(m),
 	    ("pmap_clear_modify: page %p is exclusive busied", m));
 
 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
 	 * If the object containing the page is locked and the page is not
 	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
 	    pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
 	rw_wlock(lock);
 restart:
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		PG_M = pmap_modified_bit(pmap);
 		PG_V = pmap_valid_bit(pmap);
 		PG_RW = pmap_rw_bit(pmap);
 		va = pv->pv_va;
 		pde = pmap_pde(pmap, va);
 		oldpde = *pde;
 		if ((oldpde & PG_RW) != 0) {
 			if (pmap_demote_pde_locked(pmap, pde, va, &lock)) {
 				if ((oldpde & PG_W) == 0) {
 					/*
 					 * Write protect the mapping to a
 					 * single page so that a subsequent
 					 * write access may repromote.
 					 */
 					va += VM_PAGE_TO_PHYS(m) - (oldpde &
 					    PG_PS_FRAME);
 					pte = pmap_pde_to_pte(pde, va);
 					oldpte = *pte;
 					if ((oldpte & PG_V) != 0) {
 						while (!atomic_cmpset_long(pte,
 						    oldpte,
 						    oldpte & ~(PG_M | PG_RW)))
 							oldpte = *pte;
 						vm_page_dirty(m);
 						pmap_invalidate_page(pmap, va);
 					}
 				}
 			}
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		if (!PMAP_TRYLOCK(pmap)) {
 			md_gen = m->md.pv_gen;
 			pvh_gen = pvh->pv_gen;
 			rw_wunlock(lock);
 			PMAP_LOCK(pmap);
 			rw_wlock(lock);
 			if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
 				PMAP_UNLOCK(pmap);
 				goto restart;
 			}
 		}
 		PG_M = pmap_modified_bit(pmap);
 		PG_RW = pmap_rw_bit(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found"
 		    " a 2mpage in page %p's pv list", m));
 		pte = pmap_pde_to_pte(pde, pv->pv_va);
 		if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 			atomic_clear_long(pte, PG_M);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	rw_wunlock(lock);
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 /* Adjust the cache mode for a 4KB page mapped via a PTE. */
 static __inline void
 pmap_pte_attr(pt_entry_t *pte, int cache_bits, int mask)
 {
 	u_int opte, npte;
 
 	/*
 	 * The cache mode bits are all in the low 32-bits of the
 	 * PTE, so we can just spin on updating the low 32-bits.
 	 */
 	do {
 		opte = *(u_int *)pte;
 		npte = opte & ~mask;
 		npte |= cache_bits;
 	} while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte));
 }
 
 /* Adjust the cache mode for a 2MB page mapped via a PDE. */
 static __inline void
 pmap_pde_attr(pd_entry_t *pde, int cache_bits, int mask)
 {
 	u_int opde, npde;
 
 	/*
 	 * The cache mode bits are all in the low 32-bits of the
 	 * PDE, so we can just spin on updating the low 32-bits.
 	 */
 	do {
 		opde = *(u_int *)pde;
 		npde = opde & ~mask;
 		npde |= cache_bits;
 	} while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde));
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
 {
 	struct pmap_preinit_mapping *ppim;
 	vm_offset_t va, offset;
 	vm_size_t tmpsize;
 	int i;
 
 	offset = pa & PAGE_MASK;
 	size = round_page(offset + size);
 	pa = trunc_page(pa);
 
 	if (!pmap_initialized) {
 		va = 0;
 		for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
 			ppim = pmap_preinit_mapping + i;
 			if (ppim->va == 0) {
 				ppim->pa = pa;
 				ppim->sz = size;
 				ppim->mode = mode;
 				ppim->va = virtual_avail;
 				virtual_avail += size;
 				va = ppim->va;
 				break;
 			}
 		}
 		if (va == 0)
 			panic("%s: too many preinit mappings", __func__);
 	} else {
 		/*
 		 * If we have a preinit mapping, re-use it.
 		 */
 		for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
 			ppim = pmap_preinit_mapping + i;
 			if (ppim->pa == pa && ppim->sz == size &&
 			    ppim->mode == mode)
 				return ((void *)(ppim->va + offset));
 		}
 		/*
 		 * If the specified range of physical addresses fits within
 		 * the direct map window, use the direct map.
 		 */
 		if (pa < dmaplimit && pa + size < dmaplimit) {
 			va = PHYS_TO_DMAP(pa);
 			if (!pmap_change_attr(va, size, mode))
 				return ((void *)(va + offset));
 		}
 		va = kva_alloc(size);
 		if (va == 0)
 			panic("%s: Couldn't allocate KVA", __func__);
 	}
 	for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
 		pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
 	pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
 	pmap_invalidate_cache_range(va, va + tmpsize, FALSE);
 	return ((void *)(va + offset));
 }
 
 void *
 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
 {
 
 	return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
 }
 
 void *
 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 {
 
 	return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
 }
 
 void
 pmap_unmapdev(vm_offset_t va, vm_size_t size)
 {
 	struct pmap_preinit_mapping *ppim;
 	vm_offset_t offset;
 	int i;
 
 	/* If we gave a direct map region in pmap_mapdev, do nothing */
 	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS)
 		return;
 	offset = va & PAGE_MASK;
 	size = round_page(offset + size);
 	va = trunc_page(va);
 	for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
 		ppim = pmap_preinit_mapping + i;
 		if (ppim->va == va && ppim->sz == size) {
 			if (pmap_initialized)
 				return;
 			ppim->pa = 0;
 			ppim->va = 0;
 			ppim->sz = 0;
 			ppim->mode = 0;
 			if (va + size == virtual_avail)
 				virtual_avail = va;
 			return;
 		}
 	}
 	if (pmap_initialized)
 		kva_free(va, size);
 }
 
 /*
  * Tries to demote a 1GB page mapping.
  */
 static boolean_t
 pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va)
 {
 	pdp_entry_t newpdpe, oldpdpe;
 	pd_entry_t *firstpde, newpde, *pde;
 	pt_entry_t PG_A, PG_M, PG_RW, PG_V;
 	vm_paddr_t mpdepa;
 	vm_page_t mpde;
 
 	PG_A = pmap_accessed_bit(pmap);
 	PG_M = pmap_modified_bit(pmap);
 	PG_V = pmap_valid_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	oldpdpe = *pdpe;
 	KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V),
 	    ("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V"));
 	if ((mpde = vm_page_alloc(NULL, va >> PDPSHIFT, VM_ALLOC_INTERRUPT |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 		CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return (FALSE);
 	}
 	mpdepa = VM_PAGE_TO_PHYS(mpde);
 	firstpde = (pd_entry_t *)PHYS_TO_DMAP(mpdepa);
 	newpdpe = mpdepa | PG_M | PG_A | (oldpdpe & PG_U) | PG_RW | PG_V;
 	KASSERT((oldpdpe & PG_A) != 0,
 	    ("pmap_demote_pdpe: oldpdpe is missing PG_A"));
 	KASSERT((oldpdpe & (PG_M | PG_RW)) != PG_RW,
 	    ("pmap_demote_pdpe: oldpdpe is missing PG_M"));
 	newpde = oldpdpe;
 
 	/*
 	 * Initialize the page directory page.
 	 */
 	for (pde = firstpde; pde < firstpde + NPDEPG; pde++) {
 		*pde = newpde;
 		newpde += NBPDR;
 	}
 
 	/*
 	 * Demote the mapping.
 	 */
 	*pdpe = newpdpe;
 
 	/*
 	 * Invalidate a stale recursive mapping of the page directory page.
 	 */
 	pmap_invalidate_page(pmap, (vm_offset_t)vtopde(va));
 
 	pmap_pdpe_demotions++;
 	CTR2(KTR_PMAP, "pmap_demote_pdpe: success for va %#lx"
 	    " in pmap %p", va, pmap);
 	return (TRUE);
 }
 
 /*
  * Sets the memory attribute for the specified page.
  */
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
 
 	m->md.pat_mode = ma;
 
 	/*
 	 * If "m" is a normal page, update its direct mapping.  This update
 	 * can be relied upon to perform any cache operations that are
 	 * required for data coherence.
 	 */
 	if ((m->flags & PG_FICTITIOUS) == 0 &&
 	    pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE,
 	    m->md.pat_mode))
 		panic("memory attribute change on the direct map failed");
 }
 
 /*
  * Changes the specified virtual address range's memory type to that given by
  * the parameter "mode".  The specified virtual address range must be
  * completely contained within either the direct map or the kernel map.  If
  * the virtual address range is contained within the kernel map, then the
  * memory type for each of the corresponding ranges of the direct map is also
  * changed.  (The corresponding ranges of the direct map are those ranges that
  * map the same physical pages as the specified virtual address range.)  These
  * changes to the direct map are necessary because Intel describes the
  * behavior of their processors as "undefined" if two or more mappings to the
  * same physical page have different memory types.
  *
  * Returns zero if the change completed successfully, and either EINVAL or
  * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
  * of the virtual address range was not mapped, and ENOMEM is returned if
  * there was insufficient memory available to complete the change.  In the
  * latter case, the memory type may have been changed on some part of the
  * virtual address range or the direct map.
  */
 int
 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
 {
 	int error;
 
 	PMAP_LOCK(kernel_pmap);
 	error = pmap_change_attr_locked(va, size, mode);
 	PMAP_UNLOCK(kernel_pmap);
 	return (error);
 }
 
 static int
 pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode)
 {
 	vm_offset_t base, offset, tmpva;
 	vm_paddr_t pa_start, pa_end, pa_end1;
 	pdp_entry_t *pdpe;
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	int cache_bits_pte, cache_bits_pde, error;
 	boolean_t changed;
 
 	PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED);
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
 	size = round_page(offset + size);
 
 	/*
 	 * Only supported on kernel virtual addresses, including the direct
 	 * map but excluding the recursive map.
 	 */
 	if (base < DMAP_MIN_ADDRESS)
 		return (EINVAL);
 
 	cache_bits_pde = pmap_cache_bits(kernel_pmap, mode, 1);
 	cache_bits_pte = pmap_cache_bits(kernel_pmap, mode, 0);
 	changed = FALSE;
 
 	/*
 	 * Pages that aren't mapped aren't supported.  Also break down 2MB pages
 	 * into 4KB pages if required.
 	 */
 	for (tmpva = base; tmpva < base + size; ) {
 		pdpe = pmap_pdpe(kernel_pmap, tmpva);
 		if (pdpe == NULL || *pdpe == 0)
 			return (EINVAL);
 		if (*pdpe & PG_PS) {
 			/*
 			 * If the current 1GB page already has the required
 			 * memory type, then we need not demote this page. Just
 			 * increment tmpva to the next 1GB page frame.
 			 */
 			if ((*pdpe & X86_PG_PDE_CACHE) == cache_bits_pde) {
 				tmpva = trunc_1gpage(tmpva) + NBPDP;
 				continue;
 			}
 
 			/*
 			 * If the current offset aligns with a 1GB page frame
 			 * and there is at least 1GB left within the range, then
 			 * we need not break down this page into 2MB pages.
 			 */
 			if ((tmpva & PDPMASK) == 0 &&
 			    tmpva + PDPMASK < base + size) {
 				tmpva += NBPDP;
 				continue;
 			}
 			if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva))
 				return (ENOMEM);
 		}
 		pde = pmap_pdpe_to_pde(pdpe, tmpva);
 		if (*pde == 0)
 			return (EINVAL);
 		if (*pde & PG_PS) {
 			/*
 			 * If the current 2MB page already has the required
 			 * memory type, then we need not demote this page. Just
 			 * increment tmpva to the next 2MB page frame.
 			 */
 			if ((*pde & X86_PG_PDE_CACHE) == cache_bits_pde) {
 				tmpva = trunc_2mpage(tmpva) + NBPDR;
 				continue;
 			}
 
 			/*
 			 * If the current offset aligns with a 2MB page frame
 			 * and there is at least 2MB left within the range, then
 			 * we need not break down this page into 4KB pages.
 			 */
 			if ((tmpva & PDRMASK) == 0 &&
 			    tmpva + PDRMASK < base + size) {
 				tmpva += NBPDR;
 				continue;
 			}
 			if (!pmap_demote_pde(kernel_pmap, pde, tmpva))
 				return (ENOMEM);
 		}
 		pte = pmap_pde_to_pte(pde, tmpva);
 		if (*pte == 0)
 			return (EINVAL);
 		tmpva += PAGE_SIZE;
 	}
 	error = 0;
 
 	/*
 	 * Ok, all the pages exist, so run through them updating their
 	 * cache mode if required.
 	 */
 	pa_start = pa_end = 0;
 	for (tmpva = base; tmpva < base + size; ) {
 		pdpe = pmap_pdpe(kernel_pmap, tmpva);
 		if (*pdpe & PG_PS) {
 			if ((*pdpe & X86_PG_PDE_CACHE) != cache_bits_pde) {
 				pmap_pde_attr(pdpe, cache_bits_pde,
 				    X86_PG_PDE_CACHE);
 				changed = TRUE;
 			}
 			if (tmpva >= VM_MIN_KERNEL_ADDRESS &&
 			    (*pdpe & PG_PS_FRAME) < dmaplimit) {
 				if (pa_start == pa_end) {
 					/* Start physical address run. */
 					pa_start = *pdpe & PG_PS_FRAME;
 					pa_end = pa_start + NBPDP;
 				} else if (pa_end == (*pdpe & PG_PS_FRAME))
 					pa_end += NBPDP;
 				else {
 					/* Run ended, update direct map. */
 					error = pmap_change_attr_locked(
 					    PHYS_TO_DMAP(pa_start),
 					    pa_end - pa_start, mode);
 					if (error != 0)
 						break;
 					/* Start physical address run. */
 					pa_start = *pdpe & PG_PS_FRAME;
 					pa_end = pa_start + NBPDP;
 				}
 			}
 			tmpva = trunc_1gpage(tmpva) + NBPDP;
 			continue;
 		}
 		pde = pmap_pdpe_to_pde(pdpe, tmpva);
 		if (*pde & PG_PS) {
 			if ((*pde & X86_PG_PDE_CACHE) != cache_bits_pde) {
 				pmap_pde_attr(pde, cache_bits_pde,
 				    X86_PG_PDE_CACHE);
 				changed = TRUE;
 			}
 			if (tmpva >= VM_MIN_KERNEL_ADDRESS &&
 			    (*pde & PG_PS_FRAME) < dmaplimit) {
 				if (pa_start == pa_end) {
 					/* Start physical address run. */
 					pa_start = *pde & PG_PS_FRAME;
 					pa_end = pa_start + NBPDR;
 				} else if (pa_end == (*pde & PG_PS_FRAME))
 					pa_end += NBPDR;
 				else {
 					/* Run ended, update direct map. */
 					error = pmap_change_attr_locked(
 					    PHYS_TO_DMAP(pa_start),
 					    pa_end - pa_start, mode);
 					if (error != 0)
 						break;
 					/* Start physical address run. */
 					pa_start = *pde & PG_PS_FRAME;
 					pa_end = pa_start + NBPDR;
 				}
 			}
 			tmpva = trunc_2mpage(tmpva) + NBPDR;
 		} else {
 			pte = pmap_pde_to_pte(pde, tmpva);
 			if ((*pte & X86_PG_PTE_CACHE) != cache_bits_pte) {
 				pmap_pte_attr(pte, cache_bits_pte,
 				    X86_PG_PTE_CACHE);
 				changed = TRUE;
 			}
 			if (tmpva >= VM_MIN_KERNEL_ADDRESS &&
 			    (*pte & PG_PS_FRAME) < dmaplimit) {
 				if (pa_start == pa_end) {
 					/* Start physical address run. */
 					pa_start = *pte & PG_FRAME;
 					pa_end = pa_start + PAGE_SIZE;
 				} else if (pa_end == (*pte & PG_FRAME))
 					pa_end += PAGE_SIZE;
 				else {
 					/* Run ended, update direct map. */
 					error = pmap_change_attr_locked(
 					    PHYS_TO_DMAP(pa_start),
 					    pa_end - pa_start, mode);
 					if (error != 0)
 						break;
 					/* Start physical address run. */
 					pa_start = *pte & PG_FRAME;
 					pa_end = pa_start + PAGE_SIZE;
 				}
 			}
 			tmpva += PAGE_SIZE;
 		}
 	}
 	if (error == 0 && pa_start != pa_end && pa_start < dmaplimit) {
 		pa_end1 = MIN(pa_end, dmaplimit);
 		if (pa_start != pa_end1)
 			error = pmap_change_attr_locked(PHYS_TO_DMAP(pa_start),
 			    pa_end1 - pa_start, mode);
 	}
 
 	/*
 	 * Flush CPU caches if required to make sure any data isn't cached that
 	 * shouldn't be, etc.
 	 */
 	if (changed) {
 		pmap_invalidate_range(kernel_pmap, base, tmpva);
 		pmap_invalidate_cache_range(base, tmpva, FALSE);
 	}
 	return (error);
 }
 
 /*
  * Demotes any mapping within the direct map region that covers more than the
  * specified range of physical addresses.  This range's size must be a power
  * of two and its starting address must be a multiple of its size.  Since the
  * demotion does not change any attributes of the mapping, a TLB invalidation
  * is not mandatory.  The caller may, however, request a TLB invalidation.
  */
 void
 pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate)
 {
 	pdp_entry_t *pdpe;
 	pd_entry_t *pde;
 	vm_offset_t va;
 	boolean_t changed;
 
 	if (len == 0)
 		return;
 	KASSERT(powerof2(len), ("pmap_demote_DMAP: len is not a power of 2"));
 	KASSERT((base & (len - 1)) == 0,
 	    ("pmap_demote_DMAP: base is not a multiple of len"));
 	if (len < NBPDP && base < dmaplimit) {
 		va = PHYS_TO_DMAP(base);
 		changed = FALSE;
 		PMAP_LOCK(kernel_pmap);
 		pdpe = pmap_pdpe(kernel_pmap, va);
 		if ((*pdpe & X86_PG_V) == 0)
 			panic("pmap_demote_DMAP: invalid PDPE");
 		if ((*pdpe & PG_PS) != 0) {
 			if (!pmap_demote_pdpe(kernel_pmap, pdpe, va))
 				panic("pmap_demote_DMAP: PDPE failed");
 			changed = TRUE;
 		}
 		if (len < NBPDR) {
 			pde = pmap_pdpe_to_pde(pdpe, va);
 			if ((*pde & X86_PG_V) == 0)
 				panic("pmap_demote_DMAP: invalid PDE");
 			if ((*pde & PG_PS) != 0) {
 				if (!pmap_demote_pde(kernel_pmap, pde, va))
 					panic("pmap_demote_DMAP: PDE failed");
 				changed = TRUE;
 			}
 		}
 		if (changed && invalidate)
 			pmap_invalidate_page(kernel_pmap, va);
 		PMAP_UNLOCK(kernel_pmap);
 	}
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 {
 	pd_entry_t *pdep;
 	pt_entry_t pte, PG_A, PG_M, PG_RW, PG_V;
 	vm_paddr_t pa;
 	int val;
 
 	PG_A = pmap_accessed_bit(pmap);
 	PG_M = pmap_modified_bit(pmap);
 	PG_V = pmap_valid_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 
 	PMAP_LOCK(pmap);
 retry:
 	pdep = pmap_pde(pmap, addr);
 	if (pdep != NULL && (*pdep & PG_V)) {
 		if (*pdep & PG_PS) {
 			pte = *pdep;
 			/* Compute the physical address of the 4KB page. */
 			pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) &
 			    PG_FRAME;
 			val = MINCORE_SUPER;
 		} else {
 			pte = *pmap_pde_to_pte(pdep, addr);
 			pa = pte & PG_FRAME;
 			val = 0;
 		}
 	} else {
 		pte = 0;
 		pa = 0;
 		val = 0;
 	}
 	if ((pte & PG_V) != 0) {
 		val |= MINCORE_INCORE;
 		if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if ((pte & PG_A) != 0)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 	}
 	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
 	    (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) {
 		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
 		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
 			goto retry;
 	} else
 		PA_UNLOCK_COND(*locked_pa);
 	PMAP_UNLOCK(pmap);
 	return (val);
 }
 
 static uint64_t
 pmap_pcid_alloc(pmap_t pmap, u_int cpuid)
 {
 	uint32_t gen, new_gen, pcid_next;
 
 	CRITICAL_ASSERT(curthread);
 	gen = PCPU_GET(pcid_gen);
 	if (pmap->pm_pcids[cpuid].pm_pcid == PMAP_PCID_KERN ||
 	    pmap->pm_pcids[cpuid].pm_gen == gen)
 		return (CR3_PCID_SAVE);
 	pcid_next = PCPU_GET(pcid_next);
 	KASSERT(pcid_next <= PMAP_PCID_OVERMAX, ("cpu %d pcid_next %#x",
 	    cpuid, pcid_next));
 	if (pcid_next == PMAP_PCID_OVERMAX) {
 		new_gen = gen + 1;
 		if (new_gen == 0)
 			new_gen = 1;
 		PCPU_SET(pcid_gen, new_gen);
 		pcid_next = PMAP_PCID_KERN + 1;
 	} else {
 		new_gen = gen;
 	}
 	pmap->pm_pcids[cpuid].pm_pcid = pcid_next;
 	pmap->pm_pcids[cpuid].pm_gen = new_gen;
 	PCPU_SET(pcid_next, pcid_next + 1);
 	return (0);
 }
 
 void
 pmap_activate_sw(struct thread *td)
 {
 	pmap_t oldpmap, pmap;
 	uint64_t cached, cr3;
 	u_int cpuid;
 
 	oldpmap = PCPU_GET(curpmap);
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	if (oldpmap == pmap)
 		return;
 	cpuid = PCPU_GET(cpuid);
 #ifdef SMP
 	CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
 #else
 	CPU_SET(cpuid, &pmap->pm_active);
 #endif
 	cr3 = rcr3();
 	if (pmap_pcid_enabled) {
 		cached = pmap_pcid_alloc(pmap, cpuid);
 		KASSERT(pmap->pm_pcids[cpuid].pm_pcid >= 0 &&
 		    pmap->pm_pcids[cpuid].pm_pcid < PMAP_PCID_OVERMAX,
 		    ("pmap %p cpu %d pcid %#x", pmap, cpuid,
 		    pmap->pm_pcids[cpuid].pm_pcid));
 		KASSERT(pmap->pm_pcids[cpuid].pm_pcid != PMAP_PCID_KERN ||
 		    pmap == kernel_pmap,
 		    ("non-kernel pmap thread %p pmap %p cpu %d pcid %#x",
 		    td, pmap, cpuid, pmap->pm_pcids[cpuid].pm_pcid));
 		if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) {
 			load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid |
 			    cached);
 			if (cached)
 				PCPU_INC(pm_save_cnt);
 		}
 	} else if (cr3 != pmap->pm_cr3) {
 		load_cr3(pmap->pm_cr3);
 	}
 	PCPU_SET(curpmap, pmap);
 #ifdef SMP
 	CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
 #else
 	CPU_CLR(cpuid, &oldpmap->pm_active);
 #endif
 }
 
 void
 pmap_activate(struct thread *td)
 {
 
 	critical_enter();
 	pmap_activate_sw(td);
 	critical_exit();
 }
 
 void
 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
 {
 }
 
 /*
  *	Increase the starting virtual address of the given mapping if a
  *	different alignment might result in more superpage mappings.
  */
 void
 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t size)
 {
 	vm_offset_t superpage_offset;
 
 	if (size < NBPDR)
 		return;
 	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
 		offset += ptoa(object->pg_color);
 	superpage_offset = offset & PDRMASK;
 	if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR ||
 	    (*addr & PDRMASK) == superpage_offset)
 		return;
 	if ((*addr & PDRMASK) < superpage_offset)
 		*addr = (*addr & ~PDRMASK) + superpage_offset;
 	else
 		*addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
 }
 
 #ifdef INVARIANTS
 static unsigned long num_dirty_emulations;
 SYSCTL_ULONG(_vm_pmap, OID_AUTO, num_dirty_emulations, CTLFLAG_RW,
 	     &num_dirty_emulations, 0, NULL);
 
 static unsigned long num_accessed_emulations;
 SYSCTL_ULONG(_vm_pmap, OID_AUTO, num_accessed_emulations, CTLFLAG_RW,
 	     &num_accessed_emulations, 0, NULL);
 
 static unsigned long num_superpage_accessed_emulations;
 SYSCTL_ULONG(_vm_pmap, OID_AUTO, num_superpage_accessed_emulations, CTLFLAG_RW,
 	     &num_superpage_accessed_emulations, 0, NULL);
 
 static unsigned long ad_emulation_superpage_promotions;
 SYSCTL_ULONG(_vm_pmap, OID_AUTO, ad_emulation_superpage_promotions, CTLFLAG_RW,
 	     &ad_emulation_superpage_promotions, 0, NULL);
 #endif	/* INVARIANTS */
 
 int
 pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype)
 {
 	int rv;
 	struct rwlock *lock;
 	vm_page_t m, mpte;
 	pd_entry_t *pde;
 	pt_entry_t *pte, PG_A, PG_M, PG_RW, PG_V;
 
 	KASSERT(ftype == VM_PROT_READ || ftype == VM_PROT_WRITE,
 	    ("pmap_emulate_accessed_dirty: invalid fault type %d", ftype));
 
 	if (!pmap_emulate_ad_bits(pmap))
 		return (-1);
 
 	PG_A = pmap_accessed_bit(pmap);
 	PG_M = pmap_modified_bit(pmap);
 	PG_V = pmap_valid_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 
 	rv = -1;
 	lock = NULL;
 	PMAP_LOCK(pmap);
 
 	pde = pmap_pde(pmap, va);
 	if (pde == NULL || (*pde & PG_V) == 0)
 		goto done;
 
 	if ((*pde & PG_PS) != 0) {
 		if (ftype == VM_PROT_READ) {
 #ifdef INVARIANTS
 			atomic_add_long(&num_superpage_accessed_emulations, 1);
 #endif
 			*pde |= PG_A;
 			rv = 0;
 		}
 		goto done;
 	}
 
 	pte = pmap_pde_to_pte(pde, va);
 	if ((*pte & PG_V) == 0)
 		goto done;
 
 	if (ftype == VM_PROT_WRITE) {
 		if ((*pte & PG_RW) == 0)
 			goto done;
 		/*
 		 * Set the modified and accessed bits simultaneously.
 		 *
 		 * Intel EPT PTEs that do software emulation of A/D bits map
 		 * PG_A and PG_M to EPT_PG_READ and EPT_PG_WRITE respectively.
 		 * An EPT misconfiguration is triggered if the PTE is writable
 		 * but not readable (WR=10). This is avoided by setting PG_A
 		 * and PG_M simultaneously.
 		 */
 		*pte |= PG_M | PG_A;
 	} else {
 		*pte |= PG_A;
 	}
 
 	/* try to promote the mapping */
 	if (va < VM_MAXUSER_ADDRESS)
 		mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
 	else
 		mpte = NULL;
 
 	m = PHYS_TO_VM_PAGE(*pte & PG_FRAME);
 
 	if ((mpte == NULL || mpte->wire_count == NPTEPG) &&
 	    pmap_ps_enabled(pmap) &&
 	    (m->flags & PG_FICTITIOUS) == 0 &&
 	    vm_reserv_level_iffullpop(m) == 0) {
 		pmap_promote_pde(pmap, pde, va, &lock);
 #ifdef INVARIANTS
 		atomic_add_long(&ad_emulation_superpage_promotions, 1);
 #endif
 	}
 #ifdef INVARIANTS
 	if (ftype == VM_PROT_WRITE)
 		atomic_add_long(&num_dirty_emulations, 1);
 	else
 		atomic_add_long(&num_accessed_emulations, 1);
 #endif
 	rv = 0;		/* success */
 done:
 	if (lock != NULL)
 		rw_wunlock(lock);
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 void
 pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num)
 {
 	pml4_entry_t *pml4;
 	pdp_entry_t *pdp;
 	pd_entry_t *pde;
 	pt_entry_t *pte, PG_V;
 	int idx;
 
 	idx = 0;
 	PG_V = pmap_valid_bit(pmap);
 	PMAP_LOCK(pmap);
 
 	pml4 = pmap_pml4e(pmap, va);
 	ptr[idx++] = *pml4;
 	if ((*pml4 & PG_V) == 0)
 		goto done;
 
 	pdp = pmap_pml4e_to_pdpe(pml4, va);
 	ptr[idx++] = *pdp;
 	if ((*pdp & PG_V) == 0 || (*pdp & PG_PS) != 0)
 		goto done;
 
 	pde = pmap_pdpe_to_pde(pdp, va);
 	ptr[idx++] = *pde;
 	if ((*pde & PG_V) == 0 || (*pde & PG_PS) != 0)
 		goto done;
 
 	pte = pmap_pde_to_pte(pde, va);
 	ptr[idx++] = *pte;
 
 done:
 	PMAP_UNLOCK(pmap);
 	*num = idx;
 }
 
 /**
  * Get the kernel virtual address of a set of physical pages. If there are
  * physical addresses not covered by the DMAP perform a transient mapping
  * that will be removed when calling pmap_unmap_io_transient.
  *
  * \param page        The pages the caller wishes to obtain the virtual
  *                    address on the kernel memory map.
  * \param vaddr       On return contains the kernel virtual memory address
  *                    of the pages passed in the page parameter.
  * \param count       Number of pages passed in.
  * \param can_fault   TRUE if the thread using the mapped pages can take
  *                    page faults, FALSE otherwise.
  *
  * \returns TRUE if the caller must call pmap_unmap_io_transient when
  *          finished or FALSE otherwise.
  *
  */
 boolean_t
 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
     boolean_t can_fault)
 {
 	vm_paddr_t paddr;
 	boolean_t needs_mapping;
 	pt_entry_t *pte;
 	int cache_bits, error, i;
 
 	/*
 	 * Allocate any KVA space that we need, this is done in a separate
 	 * loop to prevent calling vmem_alloc while pinned.
 	 */
 	needs_mapping = FALSE;
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (__predict_false(paddr >= dmaplimit)) {
 			error = vmem_alloc(kernel_arena, PAGE_SIZE,
 			    M_BESTFIT | M_WAITOK, &vaddr[i]);
 			KASSERT(error == 0, ("vmem_alloc failed: %d", error));
 			needs_mapping = TRUE;
 		} else {
 			vaddr[i] = PHYS_TO_DMAP(paddr);
 		}
 	}
 
 	/* Exit early if everything is covered by the DMAP */
 	if (!needs_mapping)
 		return (FALSE);
 
 	/*
 	 * NB:  The sequence of updating a page table followed by accesses
 	 * to the corresponding pages used in the !DMAP case is subject to
 	 * the situation described in the "AMD64 Architecture Programmer's
 	 * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special
 	 * Coherency Considerations".  Therefore, issuing the INVLPG right
 	 * after modifying the PTE bits is crucial.
 	 */
 	if (!can_fault)
 		sched_pin();
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (paddr >= dmaplimit) {
 			if (can_fault) {
 				/*
 				 * Slow path, since we can get page faults
 				 * while mappings are active don't pin the
 				 * thread to the CPU and instead add a global
 				 * mapping visible to all CPUs.
 				 */
 				pmap_qenter(vaddr[i], &page[i], 1);
 			} else {
 				pte = vtopte(vaddr[i]);
 				cache_bits = pmap_cache_bits(kernel_pmap,
 				    page[i]->md.pat_mode, 0);
 				pte_store(pte, paddr | X86_PG_RW | X86_PG_V |
 				    cache_bits);
 				invlpg(vaddr[i]);
 			}
 		}
 	}
 
 	return (needs_mapping);
 }
 
 void
 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
     boolean_t can_fault)
 {
 	vm_paddr_t paddr;
 	int i;
 
 	if (!can_fault)
 		sched_unpin();
 	for (i = 0; i < count; i++) {
 		paddr = VM_PAGE_TO_PHYS(page[i]);
 		if (paddr >= dmaplimit) {
 			if (can_fault)
 				pmap_qremove(vaddr[i], 1);
 			vmem_free(kernel_arena, vaddr[i], PAGE_SIZE);
 		}
 	}
 }
 
 vm_offset_t
 pmap_quick_enter_page(vm_page_t m)
 {
 	vm_paddr_t paddr;
 
 	paddr = VM_PAGE_TO_PHYS(m);
 	if (paddr < dmaplimit)
 		return (PHYS_TO_DMAP(paddr));
 	mtx_lock_spin(&qframe_mtx);
 	KASSERT(*vtopte(qframe) == 0, ("qframe busy"));
 	pte_store(vtopte(qframe), paddr | X86_PG_RW | X86_PG_V | X86_PG_A |
 	    X86_PG_M | pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0));
 	return (qframe);
 }
 
 void
 pmap_quick_remove_page(vm_offset_t addr)
 {
 
 	if (addr != qframe)
 		return;
 	pte_store(vtopte(qframe), 0);
 	invlpg(qframe);
 	mtx_unlock_spin(&qframe_mtx);
 }
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(pte, pmap_print_pte)
 {
 	pmap_t pmap;
 	pml4_entry_t *pml4;
 	pdp_entry_t *pdp;
 	pd_entry_t *pde;
 	pt_entry_t *pte, PG_V;
 	vm_offset_t va;
 
 	if (have_addr) {
 		va = (vm_offset_t)addr;
 		pmap = PCPU_GET(curpmap); /* XXX */
 	} else {
 		db_printf("show pte addr\n");
 		return;
 	}
 	PG_V = pmap_valid_bit(pmap);
 	pml4 = pmap_pml4e(pmap, va);
 	db_printf("VA %#016lx pml4e %#016lx", va, *pml4);
 	if ((*pml4 & PG_V) == 0) {
 		db_printf("\n");
 		return;
 	}
 	pdp = pmap_pml4e_to_pdpe(pml4, va);
 	db_printf(" pdpe %#016lx", *pdp);
 	if ((*pdp & PG_V) == 0 || (*pdp & PG_PS) != 0) {
 		db_printf("\n");
 		return;
 	}
 	pde = pmap_pdpe_to_pde(pdp, va);
 	db_printf(" pde %#016lx", *pde);
 	if ((*pde & PG_V) == 0 || (*pde & PG_PS) != 0) {
 		db_printf("\n");
 		return;
 	}
 	pte = pmap_pde_to_pte(pde, va);
 	db_printf(" pte %#016lx\n", *pte);
 }
 
 DB_SHOW_COMMAND(phys2dmap, pmap_phys2dmap)
 {
 	vm_paddr_t a;
 
 	if (have_addr) {
 		a = (vm_paddr_t)addr;
 		db_printf("0x%jx\n", (uintmax_t)PHYS_TO_DMAP(a));
 	} else {
 		db_printf("show phys2dmap addr\n");
 	}
 }
 #endif
Index: projects/clang390-import/sys/amd64/amd64/support.S
===================================================================
--- projects/clang390-import/sys/amd64/amd64/support.S	(revision 305016)
+++ projects/clang390-import/sys/amd64/amd64/support.S	(revision 305017)
@@ -1,789 +1,813 @@
 /*-
  * Copyright (c) 2003 Peter Wemm.
  * Copyright (c) 1993 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_ddb.h"
 
 #include <machine/asmacros.h>
 #include <machine/pmap.h>
 
 #include "assym.s"
 
 	.text
 
 /*
  * bcopy family
  * void bzero(void *buf, u_int len)
  */
 
 /* done */
 ENTRY(bzero)
 	PUSH_FRAME_POINTER
 	movq	%rsi,%rcx
 	xorl	%eax,%eax
 	shrq	$3,%rcx
 	cld
 	rep
 	stosq
 	movq	%rsi,%rcx
 	andq	$7,%rcx
 	rep
 	stosb
 	POP_FRAME_POINTER
 	ret
 END(bzero)
 
 /* Address: %rdi */
 ENTRY(pagezero)
 	PUSH_FRAME_POINTER
 	movq	$PAGE_SIZE/8,%rcx
 	xorl	%eax,%eax
 	rep
 	stosq
 	POP_FRAME_POINTER
 	ret
 END(pagezero)
 
+/* Address: %rdi */
+ENTRY(sse2_pagezero)
+	PUSH_FRAME_POINTER
+	movq	$-PAGE_SIZE,%rdx
+	subq	%rdx,%rdi
+	xorl	%eax,%eax
+	jmp	1f
+	/*
+	 * The loop takes 29 bytes.  Ensure that it doesn't cross a 32-byte
+	 * cache line.
+	 */
+	.p2align 5,0x90
+1:
+	movnti	%rax,(%rdi,%rdx)
+	movnti	%rax,8(%rdi,%rdx)
+	movnti	%rax,16(%rdi,%rdx)
+	movnti	%rax,24(%rdi,%rdx)
+	addq	$32,%rdx
+	jne	1b
+	sfence
+	POP_FRAME_POINTER
+	ret
+END(sse2_pagezero)
+
 ENTRY(bcmp)
 	PUSH_FRAME_POINTER
 	movq	%rdx,%rcx
 	shrq	$3,%rcx
 	cld					/* compare forwards */
 	repe
 	cmpsq
 	jne	1f
 
 	movq	%rdx,%rcx
 	andq	$7,%rcx
 	repe
 	cmpsb
 1:
 	setne	%al
 	movsbl	%al,%eax
 	POP_FRAME_POINTER
 	ret
 END(bcmp)
 
 /*
  * bcopy(src, dst, cnt)
  *       rdi, rsi, rdx
  *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
  */
 ENTRY(bcopy)
 	PUSH_FRAME_POINTER
 	xchgq	%rsi,%rdi
 	movq	%rdx,%rcx
 
 	movq	%rdi,%rax
 	subq	%rsi,%rax
 	cmpq	%rcx,%rax			/* overlapping && src < dst? */
 	jb	1f
 
 	shrq	$3,%rcx				/* copy by 64-bit words */
 	cld					/* nope, copy forwards */
 	rep
 	movsq
 	movq	%rdx,%rcx
 	andq	$7,%rcx				/* any bytes left? */
 	rep
 	movsb
 	POP_FRAME_POINTER
 	ret
 
 	/* ALIGN_TEXT */
 1:
 	addq	%rcx,%rdi			/* copy backwards */
 	addq	%rcx,%rsi
 	decq	%rdi
 	decq	%rsi
 	andq	$7,%rcx				/* any fractional bytes? */
 	std
 	rep
 	movsb
 	movq	%rdx,%rcx			/* copy remainder by 32-bit words */
 	shrq	$3,%rcx
 	subq	$7,%rsi
 	subq	$7,%rdi
 	rep
 	movsq
 	cld
 	POP_FRAME_POINTER
 	ret
 END(bcopy)
 
 /*
  * Note: memcpy does not support overlapping copies
  */
 ENTRY(memcpy)
 	PUSH_FRAME_POINTER
 	movq	%rdi,%rax
 	movq	%rdx,%rcx
 	shrq	$3,%rcx				/* copy by 64-bit words */
 	cld					/* copy forwards */
 	rep
 	movsq
 	movq	%rdx,%rcx
 	andq	$7,%rcx				/* any bytes left? */
 	rep
 	movsb
 	POP_FRAME_POINTER
 	ret
 END(memcpy)
 
 /*
  * pagecopy(%rdi=from, %rsi=to)
  */
 ENTRY(pagecopy)
 	PUSH_FRAME_POINTER
 	movq	$-PAGE_SIZE,%rax
 	movq	%rax,%rdx
 	subq	%rax,%rdi
 	subq	%rax,%rsi
 1:
 	prefetchnta (%rdi,%rax)
 	addq	$64,%rax
 	jne	1b
 2:
 	movq	(%rdi,%rdx),%rax
 	movnti	%rax,(%rsi,%rdx)
 	movq	8(%rdi,%rdx),%rax
 	movnti	%rax,8(%rsi,%rdx)
 	movq	16(%rdi,%rdx),%rax
 	movnti	%rax,16(%rsi,%rdx)
 	movq	24(%rdi,%rdx),%rax
 	movnti	%rax,24(%rsi,%rdx)
 	addq	$32,%rdx
 	jne	2b
 	sfence
 	POP_FRAME_POINTER
 	ret
 END(pagecopy)
 
 /* fillw(pat, base, cnt) */
 /*       %rdi,%rsi, %rdx */
 ENTRY(fillw)
 	PUSH_FRAME_POINTER
 	movq	%rdi,%rax
 	movq	%rsi,%rdi
 	movq	%rdx,%rcx
 	cld
 	rep
 	stosw
 	POP_FRAME_POINTER
 	ret
 END(fillw)
 
 /*****************************************************************************/
 /* copyout and fubyte family                                                 */
 /*****************************************************************************/
 /*
  * Access user memory from inside the kernel. These routines should be
  * the only places that do this.
  *
  * These routines set curpcb->pcb_onfault for the time they execute. When a
  * protection violation occurs inside the functions, the trap handler
  * returns to *curpcb->pcb_onfault instead of the function.
  */
 
 /*
  * copyout(from_kernel, to_user, len)  - MP SAFE
  *         %rdi,        %rsi,    %rdx
  */
 ENTRY(copyout)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rax
 	movq	$copyout_fault,PCB_ONFAULT(%rax)
 	testq	%rdx,%rdx			/* anything to do? */
 	jz	done_copyout
 
 	/*
 	 * Check explicitly for non-user addresses.  If 486 write protection
 	 * is being used, this check is essential because we are in kernel
 	 * mode so the h/w does not provide any protection against writing
 	 * kernel addresses.
 	 */
 
 	/*
 	 * First, prevent address wrapping.
 	 */
 	movq	%rsi,%rax
 	addq	%rdx,%rax
 	jc	copyout_fault
 /*
  * XXX STOP USING VM_MAXUSER_ADDRESS.
  * It is an end address, not a max, so every time it is used correctly it
  * looks like there is an off by one error, and of course it caused an off
  * by one error in several places.
  */
 	movq	$VM_MAXUSER_ADDRESS,%rcx
 	cmpq	%rcx,%rax
 	ja	copyout_fault
 
 	xchgq	%rdi,%rsi
 	/* bcopy(%rsi, %rdi, %rdx) */
 	movq	%rdx,%rcx
 
 	shrq	$3,%rcx
 	cld
 	rep
 	movsq
 	movb	%dl,%cl
 	andb	$7,%cl
 	rep
 	movsb
 
 done_copyout:
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rdx
 	movq	%rax,PCB_ONFAULT(%rdx)
 	POP_FRAME_POINTER
 	ret
 
 	ALIGN_TEXT
 copyout_fault:
 	movq	PCPU(CURPCB),%rdx
 	movq	$0,PCB_ONFAULT(%rdx)
 	movq	$EFAULT,%rax
 	POP_FRAME_POINTER
 	ret
 END(copyout)
 
 /*
  * copyin(from_user, to_kernel, len) - MP SAFE
  *        %rdi,      %rsi,      %rdx
  */
 ENTRY(copyin)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rax
 	movq	$copyin_fault,PCB_ONFAULT(%rax)
 	testq	%rdx,%rdx			/* anything to do? */
 	jz	done_copyin
 
 	/*
 	 * make sure address is valid
 	 */
 	movq	%rdi,%rax
 	addq	%rdx,%rax
 	jc	copyin_fault
 	movq	$VM_MAXUSER_ADDRESS,%rcx
 	cmpq	%rcx,%rax
 	ja	copyin_fault
 
 	xchgq	%rdi,%rsi
 	movq	%rdx,%rcx
 	movb	%cl,%al
 	shrq	$3,%rcx				/* copy longword-wise */
 	cld
 	rep
 	movsq
 	movb	%al,%cl
 	andb	$7,%cl				/* copy remaining bytes */
 	rep
 	movsb
 
 done_copyin:
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rdx
 	movq	%rax,PCB_ONFAULT(%rdx)
 	POP_FRAME_POINTER
 	ret
 
 	ALIGN_TEXT
 copyin_fault:
 	movq	PCPU(CURPCB),%rdx
 	movq	$0,PCB_ONFAULT(%rdx)
 	movq	$EFAULT,%rax
 	POP_FRAME_POINTER
 	ret
 END(copyin)
 
 /*
  * casueword32.  Compare and set user integer.  Returns -1 on fault,
  *        0 if access was successful.  Old value is written to *oldp.
  *        dst = %rdi, old = %esi, oldp = %rdx, new = %ecx
  */
 ENTRY(casueword32)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$fusufault,PCB_ONFAULT(%r8)
 
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address is valid */
 	ja	fusufault
 
 	movl	%esi,%eax			/* old */
 #ifdef SMP
 	lock
 #endif
 	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
 
 	/*
 	 * The old value is in %eax.  If the store succeeded it will be the
 	 * value we expected (old) from before the store, otherwise it will
 	 * be the current value.  Save %eax into %esi to prepare the return
 	 * value.
 	 */
 	movl	%eax,%esi
 	xorl	%eax,%eax
 	movq	%rax,PCB_ONFAULT(%r8)
 
 	/*
 	 * Access the oldp after the pcb_onfault is cleared, to correctly
 	 * catch corrupted pointer.
 	 */
 	movl	%esi,(%rdx)			/* oldp = %rdx */
 	POP_FRAME_POINTER
 	ret
 END(casueword32)
 
 /*
  * casueword.  Compare and set user long.  Returns -1 on fault,
  *        0 if access was successful.  Old value is written to *oldp.
  *        dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx
  */
 ENTRY(casueword)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$fusufault,PCB_ONFAULT(%r8)
 
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address is valid */
 	ja	fusufault
 
 	movq	%rsi,%rax			/* old */
 #ifdef SMP
 	lock
 #endif
 	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
 
 	/*
 	 * The old value is in %rax.  If the store succeeded it will be the
 	 * value we expected (old) from before the store, otherwise it will
 	 * be the current value.
 	 */
 	movq	%rax,%rsi
 	xorl	%eax,%eax
 	movq	%rax,PCB_ONFAULT(%r8)
 	movq	%rsi,(%rdx)
 	POP_FRAME_POINTER
 	ret
 END(casueword)
 
 /*
  * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit
  * byte from user memory.
  * addr = %rdi, valp = %rsi
  */
 
 ALTENTRY(fueword64)
 ENTRY(fueword)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-8,%rax
 	cmpq	%rax,%rdi			/* verify address is valid */
 	ja	fusufault
 
 	xorl	%eax,%eax
 	movq	(%rdi),%r11
 	movq	%rax,PCB_ONFAULT(%rcx)
 	movq	%r11,(%rsi)
 	POP_FRAME_POINTER
 	ret
 END(fueword64)
 END(fueword)
 
 ENTRY(fueword32)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address is valid */
 	ja	fusufault
 
 	xorl	%eax,%eax
 	movl	(%rdi),%r11d
 	movq	%rax,PCB_ONFAULT(%rcx)
 	movl	%r11d,(%rsi)
 	POP_FRAME_POINTER
 	ret
 END(fueword32)
 
 /*
  * fuswintr() and suswintr() are specialized variants of fuword16() and
  * suword16(), respectively.  They are called from the profiling code,
  * potentially at interrupt time.  If they fail, that's okay; good things
  * will happen later.  They always fail for now, until the trap code is
  * able to deal with this.
  */
 ALTENTRY(suswintr)
 ENTRY(fuswintr)
 	movq	$-1,%rax
 	ret
 END(suswintr)
 END(fuswintr)
 
 ENTRY(fuword16)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-2,%rax
 	cmpq	%rax,%rdi
 	ja	fusufault
 
 	movzwl	(%rdi),%eax
 	movq	$0,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(fuword16)
 
 ENTRY(fubyte)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-1,%rax
 	cmpq	%rax,%rdi
 	ja	fusufault
 
 	movzbl	(%rdi),%eax
 	movq	$0,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(fubyte)
 
 	ALIGN_TEXT
 fusufault:
 	movq	PCPU(CURPCB),%rcx
 	xorl	%eax,%eax
 	movq	%rax,PCB_ONFAULT(%rcx)
 	decq	%rax
 	POP_FRAME_POINTER
 	ret
 
 /*
  * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to
  * user memory.  All these functions are MPSAFE.
  * addr = %rdi, value = %rsi
  */
 ALTENTRY(suword64)
 ENTRY(suword)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-8,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
 
 	movq	%rsi,(%rdi)
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rcx
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(suword64)
 END(suword)
 
 ENTRY(suword32)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-4,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
 
 	movl	%esi,(%rdi)
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rcx
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(suword32)
 
 ENTRY(suword16)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-2,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
 
 	movw	%si,(%rdi)
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rcx		/* restore trashed register */
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(suword16)
 
 ENTRY(subyte)
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%rcx
 	movq	$fusufault,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS-1,%rax
 	cmpq	%rax,%rdi			/* verify address validity */
 	ja	fusufault
 
 	movl	%esi,%eax
 	movb	%al,(%rdi)
 	xorl	%eax,%eax
 	movq	PCPU(CURPCB),%rcx		/* restore trashed register */
 	movq	%rax,PCB_ONFAULT(%rcx)
 	POP_FRAME_POINTER
 	ret
 END(subyte)
 
 /*
  * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
  *           %rdi, %rsi, %rdx, %rcx
  *
  *	copy a string from from to to, stop when a 0 character is reached.
  *	return ENAMETOOLONG if string is longer than maxlen, and
  *	EFAULT on protection violations. If lencopied is non-zero,
  *	return the actual length in *lencopied.
  */
 ENTRY(copyinstr)
 	PUSH_FRAME_POINTER
 	movq	%rdx,%r8			/* %r8 = maxlen */
 	movq	%rcx,%r9			/* %r9 = *len */
 	xchgq	%rdi,%rsi			/* %rdi = from, %rsi = to */
 	movq	PCPU(CURPCB),%rcx
 	movq	$cpystrflt,PCB_ONFAULT(%rcx)
 
 	movq	$VM_MAXUSER_ADDRESS,%rax
 
 	/* make sure 'from' is within bounds */
 	subq	%rsi,%rax
 	jbe	cpystrflt
 
 	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
 	cmpq	%rdx,%rax
 	jae	1f
 	movq	%rax,%rdx
 	movq	%rax,%r8
 1:
 	incq	%rdx
 	cld
 
 2:
 	decq	%rdx
 	jz	3f
 
 	lodsb
 	stosb
 	orb	%al,%al
 	jnz	2b
 
 	/* Success -- 0 byte reached */
 	decq	%rdx
 	xorl	%eax,%eax
 	jmp	cpystrflt_x
 3:
 	/* rdx is zero - return ENAMETOOLONG or EFAULT */
 	movq	$VM_MAXUSER_ADDRESS,%rax
 	cmpq	%rax,%rsi
 	jae	cpystrflt
 4:
 	movq	$ENAMETOOLONG,%rax
 	jmp	cpystrflt_x
 
 cpystrflt:
 	movq	$EFAULT,%rax
 
 cpystrflt_x:
 	/* set *lencopied and return %eax */
 	movq	PCPU(CURPCB),%rcx
 	movq	$0,PCB_ONFAULT(%rcx)
 
 	testq	%r9,%r9
 	jz	1f
 	subq	%rdx,%r8
 	movq	%r8,(%r9)
 1:
 	POP_FRAME_POINTER
 	ret
 END(copyinstr)
 
 /*
  * copystr(from, to, maxlen, int *lencopied) - MP SAFE
  *         %rdi, %rsi, %rdx, %rcx
  */
 ENTRY(copystr)
 	PUSH_FRAME_POINTER
 	movq	%rdx,%r8			/* %r8 = maxlen */
 
 	xchgq	%rdi,%rsi
 	incq	%rdx
 	cld
 1:
 	decq	%rdx
 	jz	4f
 	lodsb
 	stosb
 	orb	%al,%al
 	jnz	1b
 
 	/* Success -- 0 byte reached */
 	decq	%rdx
 	xorl	%eax,%eax
 	jmp	6f
 4:
 	/* rdx is zero -- return ENAMETOOLONG */
 	movq	$ENAMETOOLONG,%rax
 
 6:
 
 	testq	%rcx,%rcx
 	jz	7f
 	/* set *lencopied and return %rax */
 	subq	%rdx,%r8
 	movq	%r8,(%rcx)
 7:
 	POP_FRAME_POINTER
 	ret
 END(copystr)
 
 /*
  * Handling of special amd64 registers and descriptor tables etc
  * %rdi
  */
 /* void lgdt(struct region_descriptor *rdp); */
 ENTRY(lgdt)
 	/* reload the descriptor table */
 	lgdt	(%rdi)
 
 	/* flush the prefetch q */
 	jmp	1f
 	nop
 1:
 	movl	$KDSEL,%eax
 	movl	%eax,%ds
 	movl	%eax,%es
 	movl	%eax,%fs	/* Beware, use wrmsr to set 64 bit base */
 	movl	%eax,%gs
 	movl	%eax,%ss
 
 	/* reload code selector by turning return into intersegmental return */
 	popq	%rax
 	pushq	$KCSEL
 	pushq	%rax
 	MEXITCOUNT
 	lretq
 END(lgdt)
 
 /*****************************************************************************/
 /* setjump, longjump                                                         */
 /*****************************************************************************/
 
 ENTRY(setjmp)
 	movq	%rbx,0(%rdi)			/* save rbx */
 	movq	%rsp,8(%rdi)			/* save rsp */
 	movq	%rbp,16(%rdi)			/* save rbp */
 	movq	%r12,24(%rdi)			/* save r12 */
 	movq	%r13,32(%rdi)			/* save r13 */
 	movq	%r14,40(%rdi)			/* save r14 */
 	movq	%r15,48(%rdi)			/* save r15 */
 	movq	0(%rsp),%rdx			/* get rta */
 	movq	%rdx,56(%rdi)			/* save rip */
 	xorl	%eax,%eax			/* return(0); */
 	ret
 END(setjmp)
 
 ENTRY(longjmp)
 	movq	0(%rdi),%rbx			/* restore rbx */
 	movq	8(%rdi),%rsp			/* restore rsp */
 	movq	16(%rdi),%rbp			/* restore rbp */
 	movq	24(%rdi),%r12			/* restore r12 */
 	movq	32(%rdi),%r13			/* restore r13 */
 	movq	40(%rdi),%r14			/* restore r14 */
 	movq	48(%rdi),%r15			/* restore r15 */
 	movq	56(%rdi),%rdx			/* get rta */
 	movq	%rdx,0(%rsp)			/* put in return frame */
 	xorl	%eax,%eax			/* return(1); */
 	incl	%eax
 	ret
 END(longjmp)
 
 /*
  * Support for reading MSRs in the safe manner.
  */
 ENTRY(rdmsr_safe)
 /* int rdmsr_safe(u_int msr, uint64_t *data) */
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$msr_onfault,PCB_ONFAULT(%r8)
 	movl	%edi,%ecx
 	rdmsr			/* Read MSR pointed by %ecx. Returns
 				   hi byte in edx, lo in %eax */
 	salq	$32,%rdx	/* sign-shift %rdx left */
 	movl	%eax,%eax	/* zero-extend %eax -> %rax */
 	orq	%rdx,%rax
 	movq	%rax,(%rsi)
 	xorq	%rax,%rax
 	movq	%rax,PCB_ONFAULT(%r8)
 	POP_FRAME_POINTER
 	ret
 
 /*
  * Support for writing MSRs in the safe manner.
  */
 ENTRY(wrmsr_safe)
 /* int wrmsr_safe(u_int msr, uint64_t data) */
 	PUSH_FRAME_POINTER
 	movq	PCPU(CURPCB),%r8
 	movq	$msr_onfault,PCB_ONFAULT(%r8)
 	movl	%edi,%ecx
 	movl	%esi,%eax
 	sarq	$32,%rsi
 	movl	%esi,%edx
 	wrmsr			/* Write MSR pointed by %ecx. Accepts
 				   hi byte in edx, lo in %eax. */
 	xorq	%rax,%rax
 	movq	%rax,PCB_ONFAULT(%r8)
 	POP_FRAME_POINTER
 	ret
 
 /*
  * MSR operations fault handler
  */
 	ALIGN_TEXT
 msr_onfault:
 	movq	$0,PCB_ONFAULT(%r8)
 	movl	$EFAULT,%eax
 	POP_FRAME_POINTER
 	ret
Index: projects/clang390-import/sys/amd64/include/md_var.h
===================================================================
--- projects/clang390-import/sys/amd64/include/md_var.h	(revision 305016)
+++ projects/clang390-import/sys/amd64/include/md_var.h	(revision 305017)
@@ -1,63 +1,64 @@
 /*-
  * Copyright (c) 1995 Bruce D. Evans.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the author nor the names of contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _MACHINE_MD_VAR_H_
 #define	_MACHINE_MD_VAR_H_
 
 #include <x86/x86_var.h>
 
 extern  uint64_t *vm_page_dump;
 
 struct	savefpu;
 
 void	amd64_db_resume_dbreg(void);
 void	amd64_syscall(struct thread *td, int traced);
 void	doreti_iret(void) __asm(__STRING(doreti_iret));
 void	doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
 void	ld_ds(void) __asm(__STRING(ld_ds));
 void	ld_es(void) __asm(__STRING(ld_es));
 void	ld_fs(void) __asm(__STRING(ld_fs));
 void	ld_gs(void) __asm(__STRING(ld_gs));
 void	ld_fsbase(void) __asm(__STRING(ld_fsbase));
 void	ld_gsbase(void) __asm(__STRING(ld_gsbase));
 void	ds_load_fault(void) __asm(__STRING(ds_load_fault));
 void	es_load_fault(void) __asm(__STRING(es_load_fault));
 void	fs_load_fault(void) __asm(__STRING(fs_load_fault));
 void	gs_load_fault(void) __asm(__STRING(gs_load_fault));
 void	fsbase_load_fault(void) __asm(__STRING(fsbase_load_fault));
 void	gsbase_load_fault(void) __asm(__STRING(gsbase_load_fault));
 void	fpstate_drop(struct thread *td);
 void	pagezero(void *addr);
 void	setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int ist);
+void	sse2_pagezero(void *addr);
 struct savefpu *get_pcb_user_save_td(struct thread *td);
 struct savefpu *get_pcb_user_save_pcb(struct pcb *pcb);
 
 #endif /* !_MACHINE_MD_VAR_H_ */
Index: projects/clang390-import/sys/boot/i386/libi386/biosdisk.c
===================================================================
--- projects/clang390-import/sys/boot/i386/libi386/biosdisk.c	(revision 305016)
+++ projects/clang390-import/sys/boot/i386/libi386/biosdisk.c	(revision 305017)
@@ -1,921 +1,922 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2012 Andrey V. Elsukov <ae@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * BIOS disk device handling.
  * 
  * Ideas and algorithms from:
  *
  * - NetBSD libi386/biosdisk.c
  * - FreeBSD biosboot/disk.c
  *
  */
 
 #include <sys/disk.h>
 #include <stand.h>
 #include <machine/bootinfo.h>
 #include <stdarg.h>
 
 #include <bootstrap.h>
 #include <btxv86.h>
 #include <edd.h>
 #include "disk.h"
 #include "libi386.h"
 
 #ifdef LOADER_GELI_SUPPORT
 #include "cons.h"
 #include "drv.h"
 #include "gpt.h"
 #include "part.h"
 #include <uuid.h>
 struct pentry {
 	struct ptable_entry	part;
 	uint64_t		flags;
 	union {
 		uint8_t bsd;
 		uint8_t	mbr;
 		uuid_t	gpt;
 		uint16_t vtoc8;
 	} type;
 	STAILQ_ENTRY(pentry)	entry;
 };
 struct ptable {
 	enum ptable_type	type;
 	uint16_t		sectorsize;
 	uint64_t		sectors;
 
 	STAILQ_HEAD(, pentry)	entries;
 };
 
 #include "geliboot.c"
 #endif /* LOADER_GELI_SUPPORT */
 
 CTASSERT(sizeof(struct i386_devdesc) >= sizeof(struct disk_devdesc));
 
 #define BIOS_NUMDRIVES		0x475
 #define BIOSDISK_SECSIZE	512
 #define BUFSIZE			(1 * BIOSDISK_SECSIZE)
 
 #define DT_ATAPI		0x10		/* disk type for ATAPI floppies */
 #define WDMAJOR			0		/* major numbers for devices we frontend for */
 #define WFDMAJOR		1
 #define FDMAJOR			2
 #define DAMAJOR			4
 
 #ifdef DISK_DEBUG
 # define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
 #else
 # define DEBUG(fmt, args...)
 #endif
 
 /*
  * List of BIOS devices, translation from disk unit number to
  * BIOS unit number.
  */
 static struct bdinfo
 {
 	int		bd_unit;	/* BIOS unit number */
 	int		bd_cyl;		/* BIOS geometry */
 	int		bd_hds;
 	int		bd_sec;
 	int		bd_flags;
 #define	BD_MODEINT13	0x0000
 #define	BD_MODEEDD1	0x0001
 #define	BD_MODEEDD3	0x0002
 #define	BD_MODEMASK	0x0003
 #define	BD_FLOPPY	0x0004
 	int		bd_type;	/* BIOS 'drive type' (floppy only) */
 	uint16_t	bd_sectorsize;	/* Sector size */
 	uint64_t	bd_sectors;	/* Disk size */
 	int		bd_open;	/* reference counter */
 	void		*bd_bcache;	/* buffer cache data */
 } bdinfo [MAXBDDEV];
 static int nbdinfo = 0;
 
 #define	BD(dev)		(bdinfo[(dev)->d_unit])
 
 static int bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks,
     caddr_t dest);
 static int bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks,
     caddr_t dest);
 static int bd_int13probe(struct bdinfo *bd);
 
 static int bd_init(void);
 static int bd_strategy(void *devdata, int flag, daddr_t dblk, size_t offset,
     size_t size, char *buf, size_t *rsize);
 static int bd_realstrategy(void *devdata, int flag, daddr_t dblk, size_t offset,
     size_t size, char *buf, size_t *rsize);
 static int bd_open(struct open_file *f, ...);
 static int bd_close(struct open_file *f);
 static int bd_ioctl(struct open_file *f, u_long cmd, void *data);
 static void bd_print(int verbose);
 static void bd_cleanup(void);
 
 #ifdef LOADER_GELI_SUPPORT
 static enum isgeli {
 	ISGELI_UNKNOWN,
 	ISGELI_NO,
 	ISGELI_YES
 };
 static enum isgeli geli_status[MAXBDDEV][MAXTBLENTS];
 
 int bios_read(void *vdev __unused, struct dsk *priv, off_t off, char *buf,
     size_t bytes);
 #endif /* LOADER_GELI_SUPPORT */
 
 struct devsw biosdisk = {
 	"disk",
 	DEVT_DISK,
 	bd_init,
 	bd_strategy,
 	bd_open,
 	bd_close,
 	bd_ioctl,
 	bd_print,
 	bd_cleanup
 };
 
 /*
  * Translate between BIOS device numbers and our private unit numbers.
  */
 int
 bd_bios2unit(int biosdev)
 {
 	int i;
 
 	DEBUG("looking for bios device 0x%x", biosdev);
 	for (i = 0; i < nbdinfo; i++) {
 		DEBUG("bd unit %d is BIOS device 0x%x", i, bdinfo[i].bd_unit);
 		if (bdinfo[i].bd_unit == biosdev)
 			return (i);
 	}
 	return (-1);
 }
 
 int
 bd_unit2bios(int unit)
 {
 
 	if ((unit >= 0) && (unit < nbdinfo))
 		return (bdinfo[unit].bd_unit);
 	return (-1);
 }
 
 /*
  * Quiz the BIOS for disk devices, save a little info about them.
  */
 static int
 bd_init(void)
 {
 	int base, unit, nfd = 0;
 
 #ifdef LOADER_GELI_SUPPORT
 	geli_init();
 #endif
 	/* sequence 0, 0x80 */
 	for (base = 0; base <= 0x80; base += 0x80) {
 		for (unit = base; (nbdinfo < MAXBDDEV); unit++) {
 #ifndef VIRTUALBOX
 			/*
 			 * Check the BIOS equipment list for number
 			 * of fixed disks.
 			 */
 			if(base == 0x80 &&
 			    (nfd >= *(unsigned char *)PTOV(BIOS_NUMDRIVES)))
 				break;
 #endif
 			bdinfo[nbdinfo].bd_open = 0;
 			bdinfo[nbdinfo].bd_bcache = NULL;
 			bdinfo[nbdinfo].bd_unit = unit;
 			bdinfo[nbdinfo].bd_flags = unit < 0x80 ? BD_FLOPPY: 0;
 			if (!bd_int13probe(&bdinfo[nbdinfo]))
 				break;
 
 			/* XXX we need "disk aliases" to make this simpler */
 			printf("BIOS drive %c: is disk%d\n", (unit < 0x80) ?
 			    ('A' + unit): ('C' + unit - 0x80), nbdinfo);
 			nbdinfo++;
 			if (base == 0x80)
 				nfd++;
 		}
 	}
 	bcache_add_dev(nbdinfo);
 	return(0);
 }
 
 static void
 bd_cleanup(void)
 {
 
 	disk_cleanup(&biosdisk);
 }
 
 /*
  * Try to detect a device supported by the legacy int13 BIOS
  */
 static int
 bd_int13probe(struct bdinfo *bd)
 {
 	struct edd_params params;
 
 	v86.ctl = V86_FLAGS;
 	v86.addr = 0x13;
 	v86.eax = 0x800;
 	v86.edx = bd->bd_unit;
 	v86int();
 
 	if (V86_CY(v86.efl) ||	/* carry set */
 	    (v86.ecx & 0x3f) == 0 || /* absurd sector number */
 	    (v86.edx & 0xff) <= (unsigned)(bd->bd_unit & 0x7f))	/* unit # bad */
 		return (0);	/* skip device */
 
 	/* Convert max cyl # -> # of cylinders */
 	bd->bd_cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1;
 	/* Convert max head # -> # of heads */
 	bd->bd_hds = ((v86.edx & 0xff00) >> 8) + 1;
 	bd->bd_sec = v86.ecx & 0x3f;
 	bd->bd_type = v86.ebx & 0xff;
 	bd->bd_flags |= BD_MODEINT13;
 
 	/* Calculate sectors count from the geometry */
 	bd->bd_sectors = bd->bd_cyl * bd->bd_hds * bd->bd_sec;
 	bd->bd_sectorsize = BIOSDISK_SECSIZE;
 	DEBUG("unit 0x%x geometry %d/%d/%d", bd->bd_unit, bd->bd_cyl,
 	    bd->bd_hds, bd->bd_sec);
 
 	/* Determine if we can use EDD with this device. */
 	v86.ctl = V86_FLAGS;
 	v86.addr = 0x13;
 	v86.eax = 0x4100;
 	v86.edx = bd->bd_unit;
 	v86.ebx = 0x55aa;
 	v86int();
 	if (V86_CY(v86.efl) ||	/* carry set */
 	    (v86.ebx & 0xffff) != 0xaa55 || /* signature */
 	    (v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0)
 		return (1);
 	/* EDD supported */
 	bd->bd_flags |= BD_MODEEDD1;
 	if ((v86.eax & 0xff00) >= 0x3000)
 		bd->bd_flags |= BD_MODEEDD3;
 	/* Get disk params */
 	params.len = sizeof(struct edd_params);
 	v86.ctl = V86_FLAGS;
 	v86.addr = 0x13;
 	v86.eax = 0x4800;
 	v86.edx = bd->bd_unit;
 	v86.ds = VTOPSEG(&params);
 	v86.esi = VTOPOFF(&params);
 	v86int();
 	if (!V86_CY(v86.efl)) {
 		bd->bd_sectors = params.sectors;
 		bd->bd_sectorsize = params.sector_size;
 	}
 	DEBUG("unit 0x%x flags %x, sectors %llu, sectorsize %u",
 	    bd->bd_unit, bd->bd_flags, bd->bd_sectors, bd->bd_sectorsize);
 	return (1);
 }
 
 /*
  * Print information about disks
  */
 static void
 bd_print(int verbose)
 {
 	static char line[80];
 	struct disk_devdesc dev;
 	int i;
 
 	pager_open();
 	for (i = 0; i < nbdinfo; i++) {
 		sprintf(line, "    disk%d:   BIOS drive %c (%ju X %u):\n", i,
 		    (bdinfo[i].bd_unit < 0x80) ? ('A' + bdinfo[i].bd_unit):
 		    ('C' + bdinfo[i].bd_unit - 0x80),
 		    (uintmax_t)bdinfo[i].bd_sectors,
 		    bdinfo[i].bd_sectorsize);
 		if (pager_output(line))
 			break;
 		dev.d_dev = &biosdisk;
 		dev.d_unit = i;
 		dev.d_slice = -1;
 		dev.d_partition = -1;
 		if (disk_open(&dev,
 		    bdinfo[i].bd_sectorsize * bdinfo[i].bd_sectors,
 		    bdinfo[i].bd_sectorsize,
 		    (bdinfo[i].bd_flags & BD_FLOPPY) ?
 		    DISK_F_NOCACHE: 0) == 0) {
 			sprintf(line, "    disk%d", i);
 			disk_print(&dev, line, verbose);
 			disk_close(&dev);
 		}
 	}
 	pager_close();
 }
 
 /*
  * Attempt to open the disk described by (dev) for use by (f).
  *
  * Note that the philosophy here is "give them exactly what
  * they ask for".  This is necessary because being too "smart"
  * about what the user might want leads to complications.
  * (eg. given no slice or partition value, with a disk that is
  *  sliced - are they after the first BSD slice, or the DOS
  *  slice before it?)
  */
 static int
 bd_open(struct open_file *f, ...)
 {
 	struct disk_devdesc *dev, rdev;
 	int err, g_err;
 	va_list ap;
 
 	va_start(ap, f);
 	dev = va_arg(ap, struct disk_devdesc *);
 	va_end(ap);
 
 	if (dev->d_unit < 0 || dev->d_unit >= nbdinfo)
 		return (EIO);
 	BD(dev).bd_open++;
 	if (BD(dev).bd_bcache == NULL)
 	    BD(dev).bd_bcache = bcache_allocate();
 	err = disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
 	    BD(dev).bd_sectorsize, (BD(dev).bd_flags & BD_FLOPPY) ?
 	    DISK_F_NOCACHE: 0);
 
 #ifdef LOADER_GELI_SUPPORT
 	static char gelipw[GELI_PW_MAXLEN];
 	char *passphrase;
 
 	if (err)
 		return (err);
 
 	/* if we already know there is no GELI, skip the rest */
 	if (geli_status[dev->d_unit][dev->d_slice] != ISGELI_UNKNOWN)
 		return (err);
 
 	struct dsk dskp;
 	struct ptable *table = NULL;
 	struct ptable_entry part;
 	struct pentry *entry;
 	int geli_part = 0;
 
 	dskp.drive = bd_unit2bios(dev->d_unit);
 	dskp.type = dev->d_type;
 	dskp.unit = dev->d_unit;
 	dskp.slice = dev->d_slice;
 	dskp.part = dev->d_partition;
 	dskp.start = dev->d_offset;
 
 	memcpy(&rdev, dev, sizeof(rdev));
 	/* to read the GPT table, we need to read the first sector */
 	rdev.d_offset = 0;
 	/* We need the LBA of the end of the partition */
 	table = ptable_open(&rdev, BD(dev).bd_sectors,
 	    BD(dev).bd_sectorsize, ptblread);
 	if (table == NULL) {
 		DEBUG("Can't read partition table");
 		/* soft failure, return the exit status of disk_open */
 		return (err);
 	}
 
 	if (table->type == PTABLE_GPT)
 		dskp.part = 255;
 
 	STAILQ_FOREACH(entry, &table->entries, entry) {
 		dskp.slice = entry->part.index;
 		dskp.start = entry->part.start;
 		if (is_geli(&dskp) == 0) {
 			geli_status[dev->d_unit][dskp.slice] = ISGELI_YES;
 			return (0);
 		}
 		if (geli_taste(bios_read, &dskp,
 		    entry->part.end - entry->part.start) == 0) {
 			if ((passphrase = getenv("kern.geom.eli.passphrase"))
 			    != NULL) {
 				/* Use the cached passphrase */
 				bcopy(passphrase, &gelipw, GELI_PW_MAXLEN);
 			}
 			if (geli_passphrase(&gelipw, dskp.unit, 'p',
 				    (dskp.slice > 0 ? dskp.slice : dskp.part),
 				    &dskp) == 0) {
 				setenv("kern.geom.eli.passphrase", &gelipw, 1);
 				bzero(gelipw, sizeof(gelipw));
 				geli_status[dev->d_unit][dskp.slice] = ISGELI_YES;
 				geli_part++;
 			}
 		} else
 			geli_status[dev->d_unit][dskp.slice] = ISGELI_NO;
 	}
 
 	/* none of the partitions on this disk have GELI */
 	if (geli_part == 0) {
 		/* found no GELI */
 		geli_status[dev->d_unit][dev->d_slice] = ISGELI_NO;
 	}
 #endif /* LOADER_GELI_SUPPORT */
 
 	return (err);
 }
 
 static int
 bd_close(struct open_file *f)
 {
 	struct disk_devdesc *dev;
 
 	dev = (struct disk_devdesc *)f->f_devdata;
 	BD(dev).bd_open--;
 	if (BD(dev).bd_open == 0) {
 	    bcache_free(BD(dev).bd_bcache);
 	    BD(dev).bd_bcache = NULL;
 	}
 	return (disk_close(dev));
 }
 
 static int
 bd_ioctl(struct open_file *f, u_long cmd, void *data)
 {
 	struct disk_devdesc *dev;
 
 	dev = (struct disk_devdesc *)f->f_devdata;
 	switch (cmd) {
 	case DIOCGSECTORSIZE:
 		*(u_int *)data = BD(dev).bd_sectorsize;
 		break;
 	case DIOCGMEDIASIZE:
 		*(off_t *)data = BD(dev).bd_sectors * BD(dev).bd_sectorsize;
 		break;
 	default:
 		return (ENOTTY);
 	}
 	return (0);
 }
 
 static int
 bd_strategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size,
     char *buf, size_t *rsize)
 {
 	struct bcache_devdata bcd;
 	struct disk_devdesc *dev;
 
 	dev = (struct disk_devdesc *)devdata;
 	bcd.dv_strategy = bd_realstrategy;
 	bcd.dv_devdata = devdata;
 	bcd.dv_cache = BD(dev).bd_bcache;
 	return (bcache_strategy(&bcd, rw, dblk + dev->d_offset, offset,
 	    size, buf, rsize));
 }
 
 static int
 bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size,
     char *buf, size_t *rsize)
 {
     struct disk_devdesc *dev = (struct disk_devdesc *)devdata;
-    int			blks;
+    int			blks, remaining;
 #ifdef BD_SUPPORT_FRAGS /* XXX: sector size */
     char		fragbuf[BIOSDISK_SECSIZE];
     size_t		fragsize;
 
     fragsize = size % BIOSDISK_SECSIZE;
 #else
     if (size % BD(dev).bd_sectorsize)
 	panic("bd_strategy: %d bytes I/O not multiple of block size", size);
 #endif
 
     DEBUG("open_disk %p", dev);
     blks = size / BD(dev).bd_sectorsize;
     if (rsize)
 	*rsize = 0;
 
-    if (dblk >= BD(dev).bd_sectors) {
-	DEBUG("IO past disk end %llu", (unsigned long long)dblk);
-	return (EIO);
-    }
-
-    if (dblk + blks > BD(dev).bd_sectors) {
-	/* perform partial read */
-	blks = BD(dev).bd_sectors - dblk;
+    /*
+     * Perform partial read to prevent read-ahead crossing
+     * the end of disk - or any 32 bit aliases of the end.
+     * Signed arithmetic is used to handle wrap-around cases
+     * like we do for TCP sequence numbers.
+     */
+    remaining = (int)(BD(dev).bd_sectors - dblk);	/* truncate */
+    if (remaining > 0 && remaining < blks) {
+	blks = remaining;
 	size = blks * BD(dev).bd_sectorsize;
 	DEBUG("short read %d", blks);
     }
 
     switch(rw){
     case F_READ:
 	DEBUG("read %d from %lld to %p", blks, dblk, buf);
 
 	if (blks && bd_read(dev, dblk, blks, buf)) {
 	    DEBUG("read error");
 	    return (EIO);
 	}
 #ifdef BD_SUPPORT_FRAGS /* XXX: sector size */
 	DEBUG("bd_strategy: frag read %d from %d+%d to %p",
 	    fragsize, dblk, blks, buf + (blks * BIOSDISK_SECSIZE));
 	if (fragsize && bd_read(od, dblk + blks, 1, fragsize)) {
 	    DEBUG("frag read error");
 	    return(EIO);
 	}
 	bcopy(fragbuf, buf + (blks * BIOSDISK_SECSIZE), fragsize);
 #endif
 	break;
     case F_WRITE :
 	DEBUG("write %d from %d to %p", blks, dblk, buf);
 
 	if (blks && bd_write(dev, dblk, blks, buf)) {
 	    DEBUG("write error");
 	    return (EIO);
 	}
 #ifdef BD_SUPPORT_FRAGS
 	if(fragsize) {
 	    DEBUG("Attempted to write a frag");
 	    return (EIO);
 	}
 #endif
 	break;
     default:
 	/* DO NOTHING */
 	return (EROFS);
     }
 
     if (rsize)
 	*rsize = size;
     return (0);
 }
 
 /* Max number of sectors to bounce-buffer if the request crosses a 64k boundary */
 #define FLOPPY_BOUNCEBUF	18
 
 static int
 bd_edd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
     int write)
 {
     static struct edd_packet packet;
 
     packet.len = sizeof(struct edd_packet);
     packet.count = blks;
     packet.off = VTOPOFF(dest);
     packet.seg = VTOPSEG(dest);
     packet.lba = dblk;
     v86.ctl = V86_FLAGS;
     v86.addr = 0x13;
     if (write)
 	/* Should we Write with verify ?? 0x4302 ? */
 	v86.eax = 0x4300;
     else
 	v86.eax = 0x4200;
     v86.edx = BD(dev).bd_unit;
     v86.ds = VTOPSEG(&packet);
     v86.esi = VTOPOFF(&packet);
     v86int();
     return (V86_CY(v86.efl));
 }
 
 static int
 bd_chs_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest,
     int write)
 {
     u_int	x, bpc, cyl, hd, sec;
 
     bpc = BD(dev).bd_sec * BD(dev).bd_hds;	/* blocks per cylinder */
     x = dblk;
     cyl = x / bpc;			/* block # / blocks per cylinder */
     x %= bpc;				/* block offset into cylinder */
     hd = x / BD(dev).bd_sec;		/* offset / blocks per track */
     sec = x % BD(dev).bd_sec;		/* offset into track */
 
     /* correct sector number for 1-based BIOS numbering */
     sec++;
 
     if (cyl > 1023)
 	/* CHS doesn't support cylinders > 1023. */
 	return (1);
 
     v86.ctl = V86_FLAGS;
     v86.addr = 0x13;
     if (write)
 	v86.eax = 0x300 | blks;
     else
 	v86.eax = 0x200 | blks;
     v86.ecx = ((cyl & 0xff) << 8) | ((cyl & 0x300) >> 2) | sec;
     v86.edx = (hd << 8) | BD(dev).bd_unit;
     v86.es = VTOPSEG(dest);
     v86.ebx = VTOPOFF(dest);
     v86int();
     return (V86_CY(v86.efl));
 }
 
 static int
 bd_io(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest, int write)
 {
     u_int	x, sec, result, resid, retry, maxfer;
     caddr_t	p, xp, bbuf, breg;
     
     /* Just in case some idiot actually tries to read/write -1 blocks... */
     if (blks < 0)
 	return (-1);
 
     resid = blks;
     p = dest;
 
     /* Decide whether we have to bounce */
     if (VTOP(dest) >> 20 != 0 || (BD(dev).bd_unit < 0x80 &&
 	(VTOP(dest) >> 16) != (VTOP(dest +
 	blks * BD(dev).bd_sectorsize) >> 16))) {
 
 	/* 
 	 * There is a 64k physical boundary somewhere in the
 	 * destination buffer, or the destination buffer is above
 	 * first 1MB of physical memory so we have to arrange a
 	 * suitable bounce buffer.  Allocate a buffer twice as large
 	 * as we need to.  Use the bottom half unless there is a break
 	 * there, in which case we use the top half.
 	 */
 	x = min(FLOPPY_BOUNCEBUF, (unsigned)blks);
 	bbuf = alloca(x * 2 * BD(dev).bd_sectorsize);
 	if (((u_int32_t)VTOP(bbuf) & 0xffff0000) ==
 	    ((u_int32_t)VTOP(bbuf + x * BD(dev).bd_sectorsize) & 0xffff0000)) {
 	    breg = bbuf;
 	} else {
 	    breg = bbuf + x * BD(dev).bd_sectorsize;
 	}
 	maxfer = x;		/* limit transfers to bounce region size */
     } else {
 	breg = bbuf = NULL;
 	maxfer = 0;
     }
     
     while (resid > 0) {
 	/*
 	 * Play it safe and don't cross track boundaries.
 	 * (XXX this is probably unnecessary)
 	 */
 	sec = dblk % BD(dev).bd_sec;	/* offset into track */
 	x = min(BD(dev).bd_sec - sec, resid);
 	if (maxfer > 0)
 	    x = min(x, maxfer);		/* fit bounce buffer */
 
 	/* where do we transfer to? */
 	xp = bbuf == NULL ? p : breg;
 
 	/*
 	 * Put your Data In, Put your Data out,
 	 * Put your Data In, and shake it all about 
 	 */
 	if (write && bbuf != NULL)
 	    bcopy(p, breg, x * BD(dev).bd_sectorsize);
 
 	/*
 	 * Loop retrying the operation a couple of times.  The BIOS
 	 * may also retry.
 	 */
 	for (retry = 0; retry < 3; retry++) {
 	    /* if retrying, reset the drive */
 	    if (retry > 0) {
 		v86.ctl = V86_FLAGS;
 		v86.addr = 0x13;
 		v86.eax = 0;
 		v86.edx = BD(dev).bd_unit;
 		v86int();
 	    }
 
 	    if (BD(dev).bd_flags & BD_MODEEDD1)
 		result = bd_edd_io(dev, dblk, x, xp, write);
 	    else
 		result = bd_chs_io(dev, dblk, x, xp, write);
 	    if (result == 0)
 		break;
 	}
 
 	if (write)
 	    DEBUG("Write %d sector(s) from %p (0x%x) to %lld %s", x,
 		p, VTOP(p), dblk, result ? "failed" : "ok");
 	else
 	    DEBUG("Read %d sector(s) from %lld to %p (0x%x) %s", x,
 		dblk, p, VTOP(p), result ? "failed" : "ok");
 	if (result) {
 	    return(-1);
 	}
 	if (!write && bbuf != NULL)
 	    bcopy(breg, p, x * BD(dev).bd_sectorsize);
 	p += (x * BD(dev).bd_sectorsize);
 	dblk += x;
 	resid -= x;
     }
 
 /*    hexdump(dest, (blks * BD(dev).bd_sectorsize)); */
     return(0);
 }
 
 static int
 bd_read(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest)
 {
 #ifdef LOADER_GELI_SUPPORT
 	struct dsk dskp;
 	off_t p_off, diff;
 	daddr_t alignlba;
 	int err, n, alignblks;
 	char *tmpbuf;
 
 	/* if we already know there is no GELI, skip the rest */
 	if (geli_status[dev->d_unit][dev->d_slice] != ISGELI_YES)
 		return (bd_io(dev, dblk, blks, dest, 0));
 
 	if (geli_status[dev->d_unit][dev->d_slice] == ISGELI_YES) {
 		/*
 		 * Align reads to DEV_GELIBOOT_BSIZE bytes because partial
 		 * sectors cannot be decrypted. Round the requested LBA down to
 		 * nearest multiple of DEV_GELIBOOT_BSIZE bytes.
 		 */
 		alignlba = rounddown2(dblk * BD(dev).bd_sectorsize,
 		    DEV_GELIBOOT_BSIZE) / BD(dev).bd_sectorsize;
 		/*
 		 * Round number of blocks to read up to nearest multiple of
 		 * DEV_GELIBOOT_BSIZE
 		 */
 		diff = (dblk - alignlba) * BD(dev).bd_sectorsize;
 		alignblks = roundup2(blks * BD(dev).bd_sectorsize + diff,
 		    DEV_GELIBOOT_BSIZE) / BD(dev).bd_sectorsize;
 
 		/*
 		 * If the read is rounded up to a larger size, use a temporary
 		 * buffer here because the buffer provided by the caller may be
 		 * too small.
 		 */
 		if (diff == 0) {
 			tmpbuf = dest;
 		} else {
 			tmpbuf = malloc(alignblks * BD(dev).bd_sectorsize);
 			if (tmpbuf == NULL) {
 				return (-1);
 			}
 		}
 
 		err = bd_io(dev, alignlba, alignblks, tmpbuf, 0);
 		if (err)
 			return (err);
 
 		dskp.drive = bd_unit2bios(dev->d_unit);
 		dskp.type = dev->d_type;
 		dskp.unit = dev->d_unit;
 		dskp.slice = dev->d_slice;
 		dskp.part = dev->d_partition;
 		dskp.start = dev->d_offset;
 
 		/* GELI needs the offset relative to the partition start */
 		p_off = alignlba - dskp.start;
 
 		err = geli_read(&dskp, p_off * BD(dev).bd_sectorsize, tmpbuf,
 		    alignblks * BD(dev).bd_sectorsize);
 		if (err)
 			return (err);
 
 		if (tmpbuf != dest) {
 			bcopy(tmpbuf + diff, dest, blks * BD(dev).bd_sectorsize);
 			free(tmpbuf);
 		}
 		return (0);
 	}
 #endif /* LOADER_GELI_SUPPORT */
 
 	return (bd_io(dev, dblk, blks, dest, 0));
 }
 
 static int
 bd_write(struct disk_devdesc *dev, daddr_t dblk, int blks, caddr_t dest)
 {
 
 	return (bd_io(dev, dblk, blks, dest, 1));
 }
 
 /*
  * Return the BIOS geometry of a given "fixed drive" in a format
  * suitable for the legacy bootinfo structure.  Since the kernel is
  * expecting raw int 0x13/0x8 values for N_BIOS_GEOM drives, we
  * prefer to get the information directly, rather than rely on being
  * able to put it together from information already maintained for
  * different purposes and for a probably different number of drives.
  *
  * For valid drives, the geometry is expected in the format (31..0)
  * "000000cc cccccccc hhhhhhhh 00ssssss"; and invalid drives are
  * indicated by returning the geometry of a "1.2M" PC-format floppy
  * disk.  And, incidentally, what is returned is not the geometry as
  * such but the highest valid cylinder, head, and sector numbers.
  */
 u_int32_t
 bd_getbigeom(int bunit)
 {
 
     v86.ctl = V86_FLAGS;
     v86.addr = 0x13;
     v86.eax = 0x800;
     v86.edx = 0x80 + bunit;
     v86int();
     if (V86_CY(v86.efl))
 	return 0x4f010f;
     return ((v86.ecx & 0xc0) << 18) | ((v86.ecx & 0xff00) << 8) |
 	   (v86.edx & 0xff00) | (v86.ecx & 0x3f);
 }
 
 /*
  * Return a suitable dev_t value for (dev).
  *
  * In the case where it looks like (dev) is a SCSI disk, we allow the number of
  * IDE disks to be specified in $num_ide_disks.  There should be a Better Way.
  */
 int
 bd_getdev(struct i386_devdesc *d)
 {
     struct disk_devdesc		*dev;
     int				biosdev;
     int 			major;
     int				rootdev;
     char			*nip, *cp;
     int				i, unit;
 
     dev = (struct disk_devdesc *)d;
     biosdev = bd_unit2bios(dev->d_unit);
     DEBUG("unit %d BIOS device %d", dev->d_unit, biosdev);
     if (biosdev == -1)				/* not a BIOS device */
 	return(-1);
     if (disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize,
 	BD(dev).bd_sectorsize,(BD(dev).bd_flags & BD_FLOPPY) ?
 	DISK_F_NOCACHE: 0) != 0)		/* oops, not a viable device */
 	    return (-1);
     else
 	disk_close(dev);
 
     if (biosdev < 0x80) {
 	/* floppy (or emulated floppy) or ATAPI device */
 	if (bdinfo[dev->d_unit].bd_type == DT_ATAPI) {
 	    /* is an ATAPI disk */
 	    major = WFDMAJOR;
 	} else {
 	    /* is a floppy disk */
 	    major = FDMAJOR;
 	}
     } else {
 	    /* assume an IDE disk */
 	    major = WDMAJOR;
     }
     /* default root disk unit number */
     unit = biosdev & 0x7f;
 
     /* XXX a better kludge to set the root disk unit number */
     if ((nip = getenv("root_disk_unit")) != NULL) {
 	i = strtol(nip, &cp, 0);
 	/* check for parse error */
 	if ((cp != nip) && (*cp == 0))
 	    unit = i;
     }
 
     rootdev = MAKEBOOTDEV(major, dev->d_slice + 1, unit, dev->d_partition);
     DEBUG("dev is 0x%x\n", rootdev);
     return(rootdev);
 }
 
 #ifdef LOADER_GELI_SUPPORT
 int
 bios_read(void *vdev __unused, struct dsk *priv, off_t off, char *buf, size_t bytes)
 {
 	struct disk_devdesc dev;
 
 	dev.d_dev = &biosdisk;
 	dev.d_type = priv->type;
 	dev.d_unit = priv->unit;
 	dev.d_slice = priv->slice;
 	dev.d_partition = priv->part;
 	dev.d_offset = priv->start;
 
 	off = off / BD(&dev).bd_sectorsize;
 	/* GELI gives us the offset relative to the partition start */
 	off += dev.d_offset;
 	bytes = bytes / BD(&dev).bd_sectorsize;
 
 	return (bd_io(&dev, off, bytes, buf, 0));
 }
 #endif /* LOADER_GELI_SUPPORT */
Index: projects/clang390-import/sys/dev/bhnd/bhnd.h
===================================================================
--- projects/clang390-import/sys/dev/bhnd/bhnd.h	(revision 305016)
+++ projects/clang390-import/sys/dev/bhnd/bhnd.h	(revision 305017)
@@ -1,1225 +1,1229 @@
 /*-
  * Copyright (c) 2015 Landon Fuller <landon@landonf.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  * 
  * $FreeBSD$
  */
 
 #ifndef _BHND_BHND_H_
 #define _BHND_BHND_H_
 
 #include <sys/types.h>
 #include <sys/bus.h>
 
 #include <machine/bus.h>
 
 #include "bhnd_ids.h"
 #include "bhnd_types.h"
 #include "bhnd_debug.h"
 #include "bhnd_bus_if.h"
 #include "bhnd_match.h"
 
 #include "nvram/bhnd_nvram.h"
 
 extern devclass_t bhnd_devclass;
 extern devclass_t bhnd_hostb_devclass;
 extern devclass_t bhnd_nvram_devclass;
 
 #define	BHND_CHIPID_MAX_NAMELEN	32	/**< maximum buffer required for a
 					     bhnd_format_chip_id() */
 
 /**
  * bhnd child instance variables
  */
 enum bhnd_device_vars {
 	BHND_IVAR_VENDOR,	/**< Designer's JEP-106 manufacturer ID. */
 	BHND_IVAR_DEVICE,	/**< Part number */
 	BHND_IVAR_HWREV,	/**< Core revision */
 	BHND_IVAR_DEVICE_CLASS,	/**< Core class (@sa bhnd_devclass_t) */
 	BHND_IVAR_VENDOR_NAME,	/**< Core vendor name */
 	BHND_IVAR_DEVICE_NAME,	/**< Core name */
 	BHND_IVAR_CORE_INDEX,	/**< Bus-assigned core number */
 	BHND_IVAR_CORE_UNIT,	/**< Bus-assigned core unit number,
 				     assigned sequentially (starting at 0) for
 				     each vendor/device pair. */
 };
 
 /**
  * bhnd device probe priority bands.
  */
 enum {
 	BHND_PROBE_ROOT         = 0,    /**< Nexus or host bridge */
 	BHND_PROBE_BUS		= 1000,	/**< Busses and bridges */
 	BHND_PROBE_CPU		= 2000,	/**< CPU devices */
 	BHND_PROBE_INTERRUPT	= 3000,	/**< Interrupt controllers. */
 	BHND_PROBE_TIMER	= 4000,	/**< Timers and clocks. */
 	BHND_PROBE_RESOURCE	= 5000,	/**< Resource discovery (including NVRAM/SPROM) */
 	BHND_PROBE_DEFAULT	= 6000,	/**< Default device priority */
 };
 
 /**
  * Constants defining fine grained ordering within a BHND_PROBE_* priority band.
  * 
  * Example:
  * @code
  * BHND_PROBE_BUS + BHND_PROBE_ORDER_FIRST
  * @endcode
  */
 enum {
 	BHND_PROBE_ORDER_FIRST		= 0,
 	BHND_PROBE_ORDER_EARLY		= 25,
 	BHND_PROBE_ORDER_MIDDLE		= 50,
 	BHND_PROBE_ORDER_LATE		= 75,
 	BHND_PROBE_ORDER_LAST		= 100
 
 };
 
 /*
  * Simplified accessors for bhnd device ivars
  */
 #define	BHND_ACCESSOR(var, ivar, type) \
 	__BUS_ACCESSOR(bhnd, var, BHND, ivar, type)
 
 BHND_ACCESSOR(vendor,		VENDOR,		uint16_t);
 BHND_ACCESSOR(device,		DEVICE,		uint16_t);
 BHND_ACCESSOR(hwrev,		HWREV,		uint8_t);
 BHND_ACCESSOR(class,		DEVICE_CLASS,	bhnd_devclass_t);
 BHND_ACCESSOR(vendor_name,	VENDOR_NAME,	const char *);
 BHND_ACCESSOR(device_name,	DEVICE_NAME,	const char *);
 BHND_ACCESSOR(core_index,	CORE_INDEX,	u_int);
 BHND_ACCESSOR(core_unit,	CORE_UNIT,	int);
 
 #undef	BHND_ACCESSOR
 
 /**
  * A bhnd(4) board descriptor.
  */
 struct bhnd_board_info {
 	uint16_t	board_vendor;	/**< PCI-SIG vendor ID (even on non-PCI
 					  *  devices).
 					  *
 					  *  On PCI devices, this will generally
 					  *  be the subsystem vendor ID, but the
 					  *  value may be overridden in device
 					  *  NVRAM.
 					  */
 	uint16_t	board_type;	/**< Board type (See BHND_BOARD_*)
 					  *
 					  *  On PCI devices, this will generally
 					  *  be the subsystem device ID, but the
 					  *  value may be overridden in device
 					  *  NVRAM.
 					  */
 	uint16_t	board_rev;	/**< Board revision. */
 	uint8_t		board_srom_rev;	/**< Board SROM format revision */
 
 	uint32_t	board_flags;	/**< Board flags (see BHND_BFL_*) */
 	uint32_t	board_flags2;	/**< Board flags 2 (see BHND_BFL2_*) */
 	uint32_t	board_flags3;	/**< Board flags 3 (see BHND_BFL3_*) */
 };
 
 
 /**
  * Chip Identification
  * 
  * This is read from the ChipCommon ID register; on earlier bhnd(4) devices
  * where ChipCommon is unavailable, known values must be supplied.
  */
 struct bhnd_chipid {
 	uint16_t	chip_id;	/**< chip id (BHND_CHIPID_*) */
 	uint8_t		chip_rev;	/**< chip revision */
 	uint8_t		chip_pkg;	/**< chip package (BHND_PKGID_*) */
 	uint8_t		chip_type;	/**< chip type (BHND_CHIPTYPE_*) */
 
 	bhnd_addr_t	enum_addr;	/**< chip_type-specific enumeration
 					  *  address; either the siba(4) base
 					  *  core register block, or the bcma(4)
 					  *  EROM core address. */
 
 	uint8_t		ncores;		/**< number of cores, if known. 0 if
 					  *  not available. */
 };
 
 /**
  * A bhnd(4) core descriptor.
  */
 struct bhnd_core_info {
 	uint16_t	vendor;		/**< JEP-106 vendor (BHND_MFGID_*) */
 	uint16_t	device;		/**< device */
 	uint16_t	hwrev;		/**< hardware revision */
 	u_int		core_idx;	/**< bus-assigned core index */
 	int		unit;		/**< bus-assigned core unit */
 };
 
 /**
 * A bhnd(4) bus resource.
 * 
 * This provides an abstract interface to per-core resources that may require
 * bus-level remapping of address windows prior to access.
 */
 struct bhnd_resource {
 	struct resource	*res;		/**< the system resource. */
 	bool		 direct;	/**< false if the resource requires
 					 *   bus window remapping before it
 					 *   is MMIO accessible. */
 };
 
 /**
  * Device quirk table descriptor.
  */
 struct bhnd_device_quirk {
 	struct bhnd_device_match desc;		/**< device match descriptor */
 	uint32_t		 quirks;	/**< quirk flags */
 };
 
 #define	BHND_CORE_QUIRK(_rev, _flags)		\
 	{{ BHND_MATCH_CORE_REV(_rev) }, (_flags) }
 
 #define	BHND_CHIP_QUIRK(_chip, _rev, _flags)	\
 	{{ BHND_CHIP_IR(BCM ## _chip, _rev) }, (_flags) }
 
 #define	BHND_PKG_QUIRK(_chip, _pkg, _flags)	\
 	{{ BHND_CHIP_IP(BCM ## _chip, BCM ## _chip ## _pkg) }, (_flags) }
 
 #define	BHND_BOARD_QUIRK(_board, _flags)	\
 	{{ BHND_MATCH_BOARD_TYPE(_board) },	\
 	    (_flags) }
 
 #define	BHND_DEVICE_QUIRK_END		{ { BHND_MATCH_ANY }, 0 }
 #define	BHND_DEVICE_QUIRK_IS_END(_q)	\
 	(((_q)->desc.m.match_flags == 0) && (_q)->quirks == 0)
 
 enum {
 	BHND_DF_ANY	= 0,
 	BHND_DF_HOSTB	= (1<<0),	/**< core is serving as the bus' host
 					  *  bridge. implies BHND_DF_ADAPTER */
 	BHND_DF_SOC	= (1<<1),	/**< core is attached to a native
 					     bus (BHND_ATTACH_NATIVE) */
 	BHND_DF_ADAPTER	= (1<<2),	/**< core is attached to a bridged
 					  *  adapter (BHND_ATTACH_ADAPTER) */
 };
 
 /** Device probe table descriptor */
 struct bhnd_device {
 	const struct bhnd_device_match	 core;		/**< core match descriptor */ 
 	const char			*desc;		/**< device description, or NULL. */
 	const struct bhnd_device_quirk	*quirks_table;	/**< quirks table for this device, or NULL */
 	uint32_t			 device_flags;	/**< required BHND_DF_* flags */
 };
 
 #define	_BHND_DEVICE(_vendor, _device, _desc, _quirks,		\
      _flags, ...)						\
 	{ { BHND_MATCH_CORE(BHND_MFGID_ ## _vendor,		\
 	    BHND_COREID_ ## _device) }, _desc, _quirks,		\
 	    _flags }
 
 #define	BHND_DEVICE(_vendor, _device, _desc, _quirks, ...)	\
 	_BHND_DEVICE(_vendor, _device, _desc, _quirks,		\
 	    ## __VA_ARGS__, 0)
 
 #define	BHND_DEVICE_END		{ { BHND_MATCH_ANY }, NULL, NULL, 0 }
 #define	BHND_DEVICE_IS_END(_d)	\
 	(BHND_MATCH_IS_ANY(&(_d)->core) && (_d)->desc == NULL)
 
 const char			*bhnd_vendor_name(uint16_t vendor);
 const char			*bhnd_port_type_name(bhnd_port_type port_type);
 const char			*bhnd_nvram_src_name(bhnd_nvram_src nvram_src);
 
 const char 			*bhnd_find_core_name(uint16_t vendor,
 				     uint16_t device);
 bhnd_devclass_t			 bhnd_find_core_class(uint16_t vendor,
 				     uint16_t device);
 
 const char			*bhnd_core_name(const struct bhnd_core_info *ci);
 bhnd_devclass_t			 bhnd_core_class(const struct bhnd_core_info *ci);
 
 int				 bhnd_format_chip_id(char *buffer, size_t size,
 				     uint16_t chip_id);
 
 device_t			 bhnd_match_child(device_t dev,
 				     const struct bhnd_core_match *desc);
 
 device_t			 bhnd_find_child(device_t dev,
 				     bhnd_devclass_t class, int unit);
 
 device_t			 bhnd_find_bridge_root(device_t dev,
 				     devclass_t bus_class);
 
 const struct bhnd_core_info	*bhnd_match_core(
 				     const struct bhnd_core_info *cores,
 				     u_int num_cores,
 				     const struct bhnd_core_match *desc);
 
 const struct bhnd_core_info	*bhnd_find_core(
 				     const struct bhnd_core_info *cores,
 				     u_int num_cores, bhnd_devclass_t class);
 
 bool				 bhnd_core_matches(
 				     const struct bhnd_core_info *core,
 				     const struct bhnd_core_match *desc);
 
 bool				 bhnd_chip_matches(
 				     const struct bhnd_chipid *chipid,
 				     const struct bhnd_chip_match *desc);
 
 bool				 bhnd_board_matches(
 				     const struct bhnd_board_info *info,
 				     const struct bhnd_board_match *desc);
 
 bool				 bhnd_hwrev_matches(uint16_t hwrev,
 				     const struct bhnd_hwrev_match *desc);
 
 bool				 bhnd_device_matches(device_t dev,
 				     const struct bhnd_device_match *desc);
 
 const struct bhnd_device	*bhnd_device_lookup(device_t dev,
 				     const struct bhnd_device *table,
 				     size_t entry_size);
 
 uint32_t			 bhnd_device_quirks(device_t dev,
 				     const struct bhnd_device *table,
 				     size_t entry_size);
 
 struct bhnd_core_info		 bhnd_get_core_info(device_t dev);
 
 int				 bhnd_alloc_resources(device_t dev,
 				     struct resource_spec *rs,
 				     struct bhnd_resource **res);
 
 void				 bhnd_release_resources(device_t dev,
 				     const struct resource_spec *rs,
 				     struct bhnd_resource **res);
 
 struct bhnd_chipid		 bhnd_parse_chipid(uint32_t idreg,
 				     bhnd_addr_t enum_addr);
 
+int				 bhnd_chipid_fixed_ncores(
+				     const struct bhnd_chipid *cid,
+				     uint16_t chipc_hwrev, uint8_t *ncores);
+
 int				 bhnd_read_chipid(device_t dev,
 				     struct resource_spec *rs,
 				     bus_size_t chipc_offset,
 				     struct bhnd_chipid *result);
 
 void				 bhnd_set_custom_core_desc(device_t dev,
 				     const char *name);
 void				 bhnd_set_default_core_desc(device_t dev);
 
 void				 bhnd_set_default_bus_desc(device_t dev,
 				     const struct bhnd_chipid *chip_id);
 
 int				 bhnd_nvram_getvar_str(device_t dev,
 				     const char *name, char *buf, size_t len,
 				     size_t *rlen);
 
 int				 bhnd_nvram_getvar_uint(device_t dev,
 				     const char *name, void *value, int width);
 int				 bhnd_nvram_getvar_uint8(device_t dev,
 				     const char *name, uint8_t *value);
 int				 bhnd_nvram_getvar_uint16(device_t dev,
 				     const char *name, uint16_t *value);
 int				 bhnd_nvram_getvar_uint32(device_t dev,
 				     const char *name, uint32_t *value);
 
 int				 bhnd_nvram_getvar_int(device_t dev,
 				     const char *name, void *value, int width);
 int				 bhnd_nvram_getvar_int8(device_t dev,
 				     const char *name, int8_t *value);
 int				 bhnd_nvram_getvar_int16(device_t dev,
 				     const char *name, int16_t *value);
 int				 bhnd_nvram_getvar_int32(device_t dev,
 				     const char *name, int32_t *value);
 
 int				 bhnd_nvram_getvar_array(device_t dev,
 				     const char *name, void *buf, size_t count,
 				     bhnd_nvram_type type);
 
 bool				 bhnd_bus_generic_is_hw_disabled(device_t dev,
 				     device_t child);
 bool				 bhnd_bus_generic_is_region_valid(device_t dev,
 				     device_t child, bhnd_port_type type,
 				     u_int port, u_int region);
 int				 bhnd_bus_generic_get_nvram_var(device_t dev,
 				     device_t child, const char *name,
 				     void *buf, size_t *size,
 				     bhnd_nvram_type type);
 const struct bhnd_chipid	*bhnd_bus_generic_get_chipid(device_t dev,
 				     device_t child);
 int				 bhnd_bus_generic_read_board_info(device_t dev,
 				     device_t child,
 				     struct bhnd_board_info *info);
 struct bhnd_resource		*bhnd_bus_generic_alloc_resource (device_t dev,
 				     device_t child, int type, int *rid,
 				     rman_res_t start, rman_res_t end,
 				     rman_res_t count, u_int flags);
 int				 bhnd_bus_generic_release_resource (device_t dev,
 				     device_t child, int type, int rid,
 				     struct bhnd_resource *r);
 int				 bhnd_bus_generic_activate_resource (device_t dev,
 				     device_t child, int type, int rid,
 				     struct bhnd_resource *r);
 int				 bhnd_bus_generic_deactivate_resource (device_t dev,
 				     device_t child, int type, int rid,
 				     struct bhnd_resource *r);
 bhnd_attach_type		 bhnd_bus_generic_get_attach_type(device_t dev,
 				     device_t child);
 
 
 
 /**
  * Return the active host bridge core for the bhnd bus, if any, or NULL if
  * not found.
  *
  * @param dev A bhnd bus device.
  */
 static inline device_t
 bhnd_find_hostb_device(device_t dev) {
 	return (BHND_BUS_FIND_HOSTB_DEVICE(dev));
 }
 
 /**
  * Return true if the hardware components required by @p dev are known to be
  * unpopulated or otherwise unusable.
  *
  * In some cases, enumerated devices may have pins that are left floating, or
  * the hardware may otherwise be non-functional; this method allows a parent
  * device to explicitly specify if a successfully enumerated @p dev should
  * be disabled.
  *
  * @param dev A bhnd bus child device.
  */
 static inline bool
 bhnd_is_hw_disabled(device_t dev) {
 	return (BHND_BUS_IS_HW_DISABLED(device_get_parent(dev), dev));
 }
 
 /**
  * Return the BHND chip identification info for the bhnd bus.
  *
  * @param dev A bhnd bus child device.
  */
 static inline const struct bhnd_chipid *
 bhnd_get_chipid(device_t dev) {
 	return (BHND_BUS_GET_CHIPID(device_get_parent(dev), dev));
 };
 
 /**
  * Get a list of all cores discoverable on the bhnd bus.
  *
  * Enumerates all cores discoverable on @p dev, returning the list in
  * @p cores and the count in @p num_cores.
  * 
  * The memory allocated for the list should be freed using
  * `free(*cores, M_BHND)`. @p cores and @p num_cores are not changed
  * when an error is returned.
  * 
  * @param	dev		A bhnd bus child device.
  * @param[out]	cores		The table of core descriptors.
  * @param[out]	num_cores	The number of core descriptors in @p cores.
  * 
  * @retval 0		success
  * @retval non-zero	if an error occurs enumerating @p dev, a regular UNIX
  *			error code should be returned.
  */
 static inline int
 bhnd_get_core_table(device_t dev, struct bhnd_core_info **cores,
     u_int *num_cores)
 {
 	return (BHND_BUS_GET_CORE_TABLE(device_get_parent(dev), dev, cores,
 	    num_cores));
 }
 
 /**
  * If supported by the chipset, return the clock source for the given clock.
  *
  * This function is only supported on early PWRCTL-equipped chipsets
  * that expose clock management via their host bridge interface. Currently,
  * this includes PCI (not PCIe) devices, with ChipCommon core revisions 0-9.
  *
  * @param dev A bhnd bus child device.
  * @param clock The clock for which a clock source will be returned.
  *
  * @retval	bhnd_clksrc		The clock source for @p clock.
  * @retval	BHND_CLKSRC_UNKNOWN	If @p clock is unsupported, or its
  *					clock source is not known to the bus.
  */
 static inline bhnd_clksrc
 bhnd_pwrctl_get_clksrc(device_t dev, bhnd_clock clock)
 {
 	return (BHND_BUS_PWRCTL_GET_CLKSRC(device_get_parent(dev), dev, clock));
 }
 
 /**
  * If supported by the chipset, gate @p clock
  *
  * This function is only supported on early PWRCTL-equipped chipsets
  * that expose clock management via their host bridge interface. Currently,
  * this includes PCI (not PCIe) devices, with ChipCommon core revisions 0-9.
  *
  * @param dev A bhnd bus child device.
  * @param clock The clock to be disabled.
  *
  * @retval 0 success
  * @retval ENODEV If bus-level clock source management is not supported.
  * @retval ENXIO If bus-level management of @p clock is not supported.
  */
 static inline int
 bhnd_pwrctl_gate_clock(device_t dev, bhnd_clock clock)
 {
 	return (BHND_BUS_PWRCTL_GATE_CLOCK(device_get_parent(dev), dev, clock));
 }
 
 /**
  * If supported by the chipset, ungate @p clock
  *
  * This function is only supported on early PWRCTL-equipped chipsets
  * that expose clock management via their host bridge interface. Currently,
  * this includes PCI (not PCIe) devices, with ChipCommon core revisions 0-9.
  *
  * @param dev A bhnd bus child device.
  * @param clock The clock to be enabled.
  *
  * @retval 0 success
  * @retval ENODEV If bus-level clock source management is not supported.
  * @retval ENXIO If bus-level management of @p clock is not supported.
  */
 static inline int
 bhnd_pwrctl_ungate_clock(device_t dev, bhnd_clock clock)
 {
 	return (BHND_BUS_PWRCTL_UNGATE_CLOCK(device_get_parent(dev), dev,
 	    clock));
 }
 
 /**
  * Return the BHND attachment type of the parent bhnd bus.
  *
  * @param dev A bhnd bus child device.
  *
  * @retval BHND_ATTACH_ADAPTER if the bus is resident on a bridged adapter,
  * such as a WiFi chipset.
  * @retval BHND_ATTACH_NATIVE if the bus provides hardware services (clock,
  * CPU, etc) to a directly attached native host.
  */
 static inline bhnd_attach_type
 bhnd_get_attach_type (device_t dev) {
 	return (BHND_BUS_GET_ATTACH_TYPE(device_get_parent(dev), dev));
 }
 
 /**
  * Attempt to read the BHND board identification from the bhnd bus.
  *
  * This relies on NVRAM access, and will fail if a valid NVRAM device cannot
  * be found, or is not yet attached.
  *
  * @param dev The parent of @p child.
  * @param child The bhnd device requesting board info.
  * @param[out] info On success, will be populated with the bhnd(4) device's
  * board information.
  *
  * @retval 0 success
  * @retval ENODEV	No valid NVRAM source could be found.
  * @retval non-zero	If reading @p name otherwise fails, a regular unix
  *			error code will be returned.
  */
 static inline int
 bhnd_read_board_info(device_t dev, struct bhnd_board_info *info)
 {
 	return (BHND_BUS_READ_BOARD_INFO(device_get_parent(dev), dev, info));
 }
 
 /**
  * Allocate and enable per-core PMU request handling for @p child.
  *
  * The region containing the core's PMU register block (if any) must be
  * allocated via bus_alloc_resource(9) (or bhnd_alloc_resource) before
  * calling bhnd_alloc_pmu(), and must not be released until after
  * calling bhnd_release_pmu().
  *
  * @param dev The parent of @p child.
  * @param child The requesting bhnd device.
  * 
  * @retval 0           success
  * @retval non-zero    If allocating PMU request state otherwise fails, a
  *                     regular unix error code will be returned.
  */
 static inline int
 bhnd_alloc_pmu(device_t dev)
 {
 	return (BHND_BUS_ALLOC_PMU(device_get_parent(dev), dev));
 }
 
 /**
  * Release any per-core PMU resources allocated for @p child. Any outstanding
  * PMU requests are are discarded.
  *
  * @param dev The parent of @p child.
  * @param child The requesting bhnd device.
  * 
  * @retval 0           success
  * @retval non-zero    If releasing PMU request state otherwise fails, a
  *                     regular unix error code will be returned, and
  *                     the core state will be left unmodified.
  */
 static inline int
 bhnd_release_pmu(device_t dev)
 {
 	return (BHND_BUS_RELEASE_PMU(device_get_parent(dev), dev));
 }
 
 /** 
  * Request that @p clock (or faster) be routed to @p dev.
  * 
  * A driver must ask the bhnd bus to allocate clock request state
  * via bhnd_alloc_pmu() before it can request clock resources.
  * 
  * Request multiplexing is managed by the bus.
  *
  * @param dev The bhnd(4) device to which @p clock should be routed.
  * @param clock The requested clock source. 
  *
  * @retval 0 success
  * @retval ENODEV If an unsupported clock was requested.
  * @retval ENXIO If the PMU has not been initialized or is otherwise unvailable.
  */
 static inline int
 bhnd_request_clock(device_t dev, bhnd_clock clock)
 {
 	return (BHND_BUS_REQUEST_CLOCK(device_get_parent(dev), dev, clock));
 }
 
 /**
  * Request that @p clocks be powered on behalf of @p dev.
  *
  * This will power any clock sources (e.g. XTAL, PLL, etc) required for
  * @p clocks and wait until they are ready, discarding any previous
  * requests by @p dev.
  *
  * Request multiplexing is managed by the bus.
  * 
  * A driver must ask the bhnd bus to allocate clock request state
  * via bhnd_alloc_pmu() before it can request clock resources.
  *
  * @param dev The requesting bhnd(4) device.
  * @param clocks The clock(s) to be enabled.
  *
  * @retval 0 success
  * @retval ENODEV If an unsupported clock was requested.
  * @retval ENXIO If the PMU has not been initialized or is otherwise unvailable.
  */
 static inline int
 bhnd_enable_clocks(device_t dev, uint32_t clocks)
 {
 	return (BHND_BUS_ENABLE_CLOCKS(device_get_parent(dev), dev, clocks));
 }
 
 /**
  * Power up an external PMU-managed resource assigned to @p dev.
  * 
  * A driver must ask the bhnd bus to allocate PMU request state
  * via bhnd_alloc_pmu() before it can request PMU resources.
  *
  * @param dev The requesting bhnd(4) device.
  * @param rsrc The core-specific external resource identifier.
  *
  * @retval 0 success
  * @retval ENODEV If the PMU does not support @p rsrc.
  * @retval ENXIO If the PMU has not been initialized or is otherwise unvailable.
  */
 static inline int
 bhnd_request_ext_rsrc(device_t dev, u_int rsrc)
 {
 	return (BHND_BUS_REQUEST_EXT_RSRC(device_get_parent(dev), dev, rsrc));
 }
 
 /**
  * Power down an external PMU-managed resource assigned to @p dev.
  * 
  * A driver must ask the bhnd bus to allocate PMU request state
  * via bhnd_alloc_pmu() before it can request PMU resources.
  *
  * @param dev The requesting bhnd(4) device.
  * @param rsrc The core-specific external resource identifier.
  *
  * @retval 0 success
  * @retval ENODEV If the PMU does not support @p rsrc.
  * @retval ENXIO If the PMU has not been initialized or is otherwise unvailable.
  */
 static inline int
 bhnd_release_ext_rsrc(device_t dev, u_int rsrc)
 {
 	return (BHND_BUS_RELEASE_EXT_RSRC(device_get_parent(dev), dev, rsrc));
 }
 
 
 /**
  * Read @p width bytes at @p offset from the bus-specific agent/config
  * space of @p dev.
  *
  * @param dev The bhnd device for which @p offset should be read.
  * @param offset The offset to be read.
  * @param width The size of the access. Must be 1, 2 or 4 bytes.
  *
  * The exact behavior of this method is bus-specific. In the case of
  * bcma(4), this method provides access to the first agent port of @p child.
  *
  * @note Device drivers should only use this API for functionality
  * that is not available via another bhnd(4) function.
  */
 static inline uint32_t
 bhnd_read_config(device_t dev, bus_size_t offset, u_int width)
 {
 	return (BHND_BUS_READ_CONFIG(device_get_parent(dev), dev, offset,
 	    width));
 }
 
 /**
  * Read @p width bytes at @p offset from the bus-specific agent/config
  * space of @p dev.
  *
  * @param dev The bhnd device for which @p offset should be read.
  * @param offset The offset to be written.
  * @param width The size of the access. Must be 1, 2 or 4 bytes.
  *
  * The exact behavior of this method is bus-specific. In the case of
  * bcma(4), this method provides access to the first agent port of @p child.
  *
  * @note Device drivers should only use this API for functionality
  * that is not available via another bhnd(4) function.
  */
 static inline void
 bhnd_write_config(device_t dev, bus_size_t offset, uint32_t val, u_int width)
 {
 	BHND_BUS_WRITE_CONFIG(device_get_parent(dev), dev, offset, val, width);
 }
 
 /**
  * Read an NVRAM variable, coerced to the requested @p type.
  *
  * @param 		dev	A bhnd bus child device.
  * @param		name	The NVRAM variable name.
  * @param[out]		buf	A buffer large enough to hold @p len bytes. On
  *				success, the requested value will be written to
  *				this buffer. This argment may be NULL if
  *				the value is not desired.
  * @param[in,out]	len	The maximum capacity of @p buf. On success,
  *				will be set to the actual size of the requested
  *				value.
  * @param		type	The desired data representation to be written
  *				to @p buf.
  * 
  * @retval 0		success
  * @retval ENOENT	The requested variable was not found.
  * @retval ENODEV	No valid NVRAM source could be found.
  * @retval ENOMEM	If a buffer of @p size is too small to hold the
  *			requested value.
  * @retval EOPNOTSUPP	If the value cannot be coerced to @p type.
  * @retval ERANGE	If value coercion would overflow @p type.
  * @retval non-zero	If reading @p name otherwise fails, a regular unix
  *			error code will be returned.
  */
 static inline int
 bhnd_nvram_getvar(device_t dev, const char *name, void *buf, size_t *len,
      bhnd_nvram_type type)
 {
 	return (BHND_BUS_GET_NVRAM_VAR(device_get_parent(dev), dev, name, buf,
 	    len, type));
 }
 
 /**
  * Allocate a resource from a device's parent bhnd(4) bus.
  * 
  * @param dev The device requesting resource ownership.
  * @param type The type of resource to allocate. This may be any type supported
  * by the standard bus APIs.
  * @param rid The bus-specific handle identifying the resource being allocated.
  * @param start The start address of the resource.
  * @param end The end address of the resource.
  * @param count The size of the resource.
  * @param flags The flags for the resource to be allocated. These may be any
  * values supported by the standard bus APIs.
  * 
  * To request the resource's default addresses, pass @p start and
  * @p end values of @c 0 and @c ~0, respectively, and
  * a @p count of @c 1.
  * 
  * @retval NULL The resource could not be allocated.
  * @retval resource The allocated resource.
  */
 static inline struct bhnd_resource *
 bhnd_alloc_resource(device_t dev, int type, int *rid, rman_res_t start,
     rman_res_t end, rman_res_t count, u_int flags)
 {
 	return BHND_BUS_ALLOC_RESOURCE(device_get_parent(dev), dev, type, rid,
 	    start, end, count, flags);
 }
 
 
 /**
  * Allocate a resource from a device's parent bhnd(4) bus, using the
  * resource's default start, end, and count values.
  * 
  * @param dev The device requesting resource ownership.
  * @param type The type of resource to allocate. This may be any type supported
  * by the standard bus APIs.
  * @param rid The bus-specific handle identifying the resource being allocated.
  * @param flags The flags for the resource to be allocated. These may be any
  * values supported by the standard bus APIs.
  * 
  * @retval NULL The resource could not be allocated.
  * @retval resource The allocated resource.
  */
 static inline struct bhnd_resource *
 bhnd_alloc_resource_any(device_t dev, int type, int *rid, u_int flags)
 {
 	return bhnd_alloc_resource(dev, type, rid, 0, ~0, 1, flags);
 }
 
 /**
  * Activate a previously allocated bhnd resource.
  *
  * @param dev The device holding ownership of the allocated resource.
  * @param type The type of the resource. 
  * @param rid The bus-specific handle identifying the resource.
  * @param r A pointer to the resource returned by bhnd_alloc_resource or
  * BHND_BUS_ALLOC_RESOURCE.
  * 
  * @retval 0 success
  * @retval non-zero an error occurred while activating the resource.
  */
 static inline int
 bhnd_activate_resource(device_t dev, int type, int rid,
    struct bhnd_resource *r)
 {
 	return BHND_BUS_ACTIVATE_RESOURCE(device_get_parent(dev), dev, type,
 	    rid, r);
 }
 
 /**
  * Deactivate a previously activated bhnd resource.
  *
  * @param dev The device holding ownership of the activated resource.
  * @param type The type of the resource. 
  * @param rid The bus-specific handle identifying the resource.
  * @param r A pointer to the resource returned by bhnd_alloc_resource or
  * BHND_BUS_ALLOC_RESOURCE.
  * 
  * @retval 0 success
  * @retval non-zero an error occurred while activating the resource.
  */
 static inline int
 bhnd_deactivate_resource(device_t dev, int type, int rid,
    struct bhnd_resource *r)
 {
 	return BHND_BUS_DEACTIVATE_RESOURCE(device_get_parent(dev), dev, type,
 	    rid, r);
 }
 
 /**
  * Free a resource allocated by bhnd_alloc_resource().
  *
  * @param dev The device holding ownership of the resource.
  * @param type The type of the resource. 
  * @param rid The bus-specific handle identifying the resource.
  * @param r A pointer to the resource returned by bhnd_alloc_resource or
  * BHND_ALLOC_RESOURCE.
  * 
  * @retval 0 success
  * @retval non-zero an error occurred while activating the resource.
  */
 static inline int
 bhnd_release_resource(device_t dev, int type, int rid,
    struct bhnd_resource *r)
 {
 	return BHND_BUS_RELEASE_RESOURCE(device_get_parent(dev), dev, type,
 	    rid, r);
 }
 
 /**
  * Return true if @p region_num is a valid region on @p port_num of
  * @p type attached to @p dev.
  *
  * @param dev A bhnd bus child device.
  * @param type The port type being queried.
  * @param port_num The port number being queried.
  * @param region_num The region number being queried.
  */
 static inline bool
 bhnd_is_region_valid(device_t dev, bhnd_port_type type, u_int port_num,
     u_int region_num)
 {
 	return (BHND_BUS_IS_REGION_VALID(device_get_parent(dev), dev, type,
 	    port_num, region_num));
 }
 
 /**
  * Return the number of ports of type @p type attached to @p def.
  *
  * @param dev A bhnd bus child device.
  * @param type The port type being queried.
  */
 static inline u_int
 bhnd_get_port_count(device_t dev, bhnd_port_type type) {
 	return (BHND_BUS_GET_PORT_COUNT(device_get_parent(dev), dev, type));
 }
 
 /**
  * Return the number of memory regions mapped to @p child @p port of
  * type @p type.
  *
  * @param dev A bhnd bus child device.
  * @param port The port number being queried.
  * @param type The port type being queried.
  */
 static inline u_int
 bhnd_get_region_count(device_t dev, bhnd_port_type type, u_int port) {
 	return (BHND_BUS_GET_REGION_COUNT(device_get_parent(dev), dev, type,
 	    port));
 }
 
 /**
  * Return the resource-ID for a memory region on the given device port.
  *
  * @param dev A bhnd bus child device.
  * @param type The port type.
  * @param port The port identifier.
  * @param region The identifier of the memory region on @p port.
  * 
  * @retval int The RID for the given @p port and @p region on @p device.
  * @retval -1 No such port/region found.
  */
 static inline int
 bhnd_get_port_rid(device_t dev, bhnd_port_type type, u_int port, u_int region)
 {
 	return BHND_BUS_GET_PORT_RID(device_get_parent(dev), dev, type, port,
 	    region);
 }
 
 /**
  * Decode a port / region pair on @p dev defined by @p rid.
  *
  * @param dev A bhnd bus child device.
  * @param type The resource type.
  * @param rid The resource identifier.
  * @param[out] port_type The decoded port type.
  * @param[out] port The decoded port identifier.
  * @param[out] region The decoded region identifier.
  *
  * @retval 0 success
  * @retval non-zero No matching port/region found.
  */
 static inline int
 bhnd_decode_port_rid(device_t dev, int type, int rid, bhnd_port_type *port_type,
     u_int *port, u_int *region)
 {
 	return BHND_BUS_DECODE_PORT_RID(device_get_parent(dev), dev, type, rid,
 	    port_type, port, region);
 }
 
 /**
  * Get the address and size of @p region on @p port.
  *
  * @param dev A bhnd bus child device.
  * @param port_type The port type.
  * @param port The port identifier.
  * @param region The identifier of the memory region on @p port.
  * @param[out] region_addr The region's base address.
  * @param[out] region_size The region's size.
  *
  * @retval 0 success
  * @retval non-zero No matching port/region found.
  */
 static inline int
 bhnd_get_region_addr(device_t dev, bhnd_port_type port_type, u_int port,
     u_int region, bhnd_addr_t *region_addr, bhnd_size_t *region_size)
 {
 	return BHND_BUS_GET_REGION_ADDR(device_get_parent(dev), dev, port_type,
 	    port, region, region_addr, region_size);
 }
 
 /*
  * bhnd bus-level equivalents of the bus_(read|write|set|barrier|...)
  * macros (compatible with bhnd_resource).
  *
  * Generated with bhnd/tools/bus_macro.sh
  */
 #define bhnd_bus_barrier(r, o, l, f) \
     ((r)->direct) ? \
 	bus_barrier((r)->res, (o), (l), (f)) : \
 	BHND_BUS_BARRIER( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (l), (f))
 #define bhnd_bus_read_1(r, o) \
     ((r)->direct) ? \
 	bus_read_1((r)->res, (o)) : \
 	BHND_BUS_READ_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o))
 #define bhnd_bus_read_multi_1(r, o, d, c) \
     ((r)->direct) ? \
 	bus_read_multi_1((r)->res, (o), (d), (c)) : \
 	BHND_BUS_READ_MULTI_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_read_region_1(r, o, d, c) \
     ((r)->direct) ? \
 	bus_read_region_1((r)->res, (o), (d), (c)) : \
 	BHND_BUS_READ_REGION_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_write_1(r, o, v) \
     ((r)->direct) ? \
 	bus_write_1((r)->res, (o), (v)) : \
 	BHND_BUS_WRITE_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (v))
 #define bhnd_bus_write_multi_1(r, o, d, c) \
     ((r)->direct) ? \
 	bus_write_multi_1((r)->res, (o), (d), (c)) : \
 	BHND_BUS_WRITE_MULTI_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_write_region_1(r, o, d, c) \
     ((r)->direct) ? \
 	bus_write_region_1((r)->res, (o), (d), (c)) : \
 	BHND_BUS_WRITE_REGION_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_read_stream_1(r, o) \
     ((r)->direct) ? \
 	bus_read_stream_1((r)->res, (o)) : \
 	BHND_BUS_READ_STREAM_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o))
 #define bhnd_bus_read_multi_stream_1(r, o, d, c) \
     ((r)->direct) ? \
 	bus_read_multi_stream_1((r)->res, (o), (d), (c)) : \
 	BHND_BUS_READ_MULTI_STREAM_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_read_region_stream_1(r, o, d, c) \
     ((r)->direct) ? \
 	bus_read_region_stream_1((r)->res, (o), (d), (c)) : \
 	BHND_BUS_READ_REGION_STREAM_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_write_stream_1(r, o, v) \
     ((r)->direct) ? \
 	bus_write_stream_1((r)->res, (o), (v)) : \
 	BHND_BUS_WRITE_STREAM_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (v))
 #define bhnd_bus_write_multi_stream_1(r, o, d, c) \
     ((r)->direct) ? \
 	bus_write_multi_stream_1((r)->res, (o), (d), (c)) : \
 	BHND_BUS_WRITE_MULTI_STREAM_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_write_region_stream_1(r, o, d, c) \
     ((r)->direct) ? \
 	bus_write_region_stream_1((r)->res, (o), (d), (c)) : \
 	BHND_BUS_WRITE_REGION_STREAM_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_set_multi_1(r, o, v, c) \
     ((r)->direct) ? \
 	bus_set_multi_1((r)->res, (o), (v), (c)) : \
 	BHND_BUS_SET_MULTI_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (v), (c))
 #define bhnd_bus_set_region_1(r, o, v, c) \
     ((r)->direct) ? \
 	bus_set_region_1((r)->res, (o), (v), (c)) : \
 	BHND_BUS_SET_REGION_1( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (v), (c))
 #define bhnd_bus_read_2(r, o) \
     ((r)->direct) ? \
 	bus_read_2((r)->res, (o)) : \
 	BHND_BUS_READ_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o))
 #define bhnd_bus_read_multi_2(r, o, d, c) \
     ((r)->direct) ? \
 	bus_read_multi_2((r)->res, (o), (d), (c)) : \
 	BHND_BUS_READ_MULTI_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_read_region_2(r, o, d, c) \
     ((r)->direct) ? \
 	bus_read_region_2((r)->res, (o), (d), (c)) : \
 	BHND_BUS_READ_REGION_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_write_2(r, o, v) \
     ((r)->direct) ? \
 	bus_write_2((r)->res, (o), (v)) : \
 	BHND_BUS_WRITE_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (v))
 #define bhnd_bus_write_multi_2(r, o, d, c) \
     ((r)->direct) ? \
 	bus_write_multi_2((r)->res, (o), (d), (c)) : \
 	BHND_BUS_WRITE_MULTI_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_write_region_2(r, o, d, c) \
     ((r)->direct) ? \
 	bus_write_region_2((r)->res, (o), (d), (c)) : \
 	BHND_BUS_WRITE_REGION_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_read_stream_2(r, o) \
     ((r)->direct) ? \
 	bus_read_stream_2((r)->res, (o)) : \
 	BHND_BUS_READ_STREAM_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o))
 #define bhnd_bus_read_multi_stream_2(r, o, d, c) \
     ((r)->direct) ? \
 	bus_read_multi_stream_2((r)->res, (o), (d), (c)) : \
 	BHND_BUS_READ_MULTI_STREAM_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_read_region_stream_2(r, o, d, c) \
     ((r)->direct) ? \
 	bus_read_region_stream_2((r)->res, (o), (d), (c)) : \
 	BHND_BUS_READ_REGION_STREAM_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_write_stream_2(r, o, v) \
     ((r)->direct) ? \
 	bus_write_stream_2((r)->res, (o), (v)) : \
 	BHND_BUS_WRITE_STREAM_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (v))
 #define bhnd_bus_write_multi_stream_2(r, o, d, c) \
     ((r)->direct) ? \
 	bus_write_multi_stream_2((r)->res, (o), (d), (c)) : \
 	BHND_BUS_WRITE_MULTI_STREAM_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_write_region_stream_2(r, o, d, c) \
     ((r)->direct) ? \
 	bus_write_region_stream_2((r)->res, (o), (d), (c)) : \
 	BHND_BUS_WRITE_REGION_STREAM_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_set_multi_2(r, o, v, c) \
     ((r)->direct) ? \
 	bus_set_multi_2((r)->res, (o), (v), (c)) : \
 	BHND_BUS_SET_MULTI_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (v), (c))
 #define bhnd_bus_set_region_2(r, o, v, c) \
     ((r)->direct) ? \
 	bus_set_region_2((r)->res, (o), (v), (c)) : \
 	BHND_BUS_SET_REGION_2( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (v), (c))
 #define bhnd_bus_read_4(r, o) \
     ((r)->direct) ? \
 	bus_read_4((r)->res, (o)) : \
 	BHND_BUS_READ_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o))
 #define bhnd_bus_read_multi_4(r, o, d, c) \
     ((r)->direct) ? \
 	bus_read_multi_4((r)->res, (o), (d), (c)) : \
 	BHND_BUS_READ_MULTI_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_read_region_4(r, o, d, c) \
     ((r)->direct) ? \
 	bus_read_region_4((r)->res, (o), (d), (c)) : \
 	BHND_BUS_READ_REGION_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_write_4(r, o, v) \
     ((r)->direct) ? \
 	bus_write_4((r)->res, (o), (v)) : \
 	BHND_BUS_WRITE_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (v))
 #define bhnd_bus_write_multi_4(r, o, d, c) \
     ((r)->direct) ? \
 	bus_write_multi_4((r)->res, (o), (d), (c)) : \
 	BHND_BUS_WRITE_MULTI_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_write_region_4(r, o, d, c) \
     ((r)->direct) ? \
 	bus_write_region_4((r)->res, (o), (d), (c)) : \
 	BHND_BUS_WRITE_REGION_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_read_stream_4(r, o) \
     ((r)->direct) ? \
 	bus_read_stream_4((r)->res, (o)) : \
 	BHND_BUS_READ_STREAM_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o))
 #define bhnd_bus_read_multi_stream_4(r, o, d, c) \
     ((r)->direct) ? \
 	bus_read_multi_stream_4((r)->res, (o), (d), (c)) : \
 	BHND_BUS_READ_MULTI_STREAM_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_read_region_stream_4(r, o, d, c) \
     ((r)->direct) ? \
 	bus_read_region_stream_4((r)->res, (o), (d), (c)) : \
 	BHND_BUS_READ_REGION_STREAM_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_write_stream_4(r, o, v) \
     ((r)->direct) ? \
 	bus_write_stream_4((r)->res, (o), (v)) : \
 	BHND_BUS_WRITE_STREAM_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (v))
 #define bhnd_bus_write_multi_stream_4(r, o, d, c) \
     ((r)->direct) ? \
 	bus_write_multi_stream_4((r)->res, (o), (d), (c)) : \
 	BHND_BUS_WRITE_MULTI_STREAM_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_write_region_stream_4(r, o, d, c) \
     ((r)->direct) ? \
 	bus_write_region_stream_4((r)->res, (o), (d), (c)) : \
 	BHND_BUS_WRITE_REGION_STREAM_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (d), (c))
 #define bhnd_bus_set_multi_4(r, o, v, c) \
     ((r)->direct) ? \
 	bus_set_multi_4((r)->res, (o), (v), (c)) : \
 	BHND_BUS_SET_MULTI_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (v), (c))
 #define bhnd_bus_set_region_4(r, o, v, c) \
     ((r)->direct) ? \
 	bus_set_region_4((r)->res, (o), (v), (c)) : \
 	BHND_BUS_SET_REGION_4( \
 	    device_get_parent(rman_get_device((r)->res)),	\
 	    rman_get_device((r)->res), (r), (o), (v), (c))
 
 #endif /* _BHND_BHND_H_ */
Index: projects/clang390-import/sys/dev/bhnd/bhnd_subr.c
===================================================================
--- projects/clang390-import/sys/dev/bhnd/bhnd_subr.c	(revision 305016)
+++ projects/clang390-import/sys/dev/bhnd/bhnd_subr.c	(revision 305017)
@@ -1,1532 +1,1612 @@
 /*-
  * Copyright (c) 2015 Landon Fuller <landon@landonf.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <machine/resource.h>
 
+#include <dev/bhnd/siba/sibareg.h>
+
 #include <dev/bhnd/cores/chipc/chipcreg.h>
 
 #include "nvram/bhnd_nvram.h"
 
 #include "bhnd_chipc_if.h"
 
 #include "bhnd_nvram_if.h"
 #include "bhnd_nvram_map.h"
 
 #include "bhndreg.h"
 #include "bhndvar.h"
 
 /* BHND core device description table. */
 static const struct bhnd_core_desc {
 	uint16_t	 vendor;
 	uint16_t	 device;
 	bhnd_devclass_t	 class;
 	const char	*desc;
 } bhnd_core_descs[] = {
 	#define	BHND_CDESC(_mfg, _cid, _cls, _desc)		\
 	    { BHND_MFGID_ ## _mfg, BHND_COREID_ ## _cid,	\
 		BHND_DEVCLASS_ ## _cls, _desc }
 
 	BHND_CDESC(BCM, CC,		CC,		"ChipCommon I/O Controller"),
 	BHND_CDESC(BCM, ILINE20,	OTHER,		"iLine20 HPNA"),
 	BHND_CDESC(BCM, SRAM,		RAM,		"SRAM"),
 	BHND_CDESC(BCM, SDRAM,		RAM,		"SDRAM"),
 	BHND_CDESC(BCM, PCI,		PCI,		"PCI Bridge"),
 	BHND_CDESC(BCM, MIPS,		CPU,		"MIPS Core"),
 	BHND_CDESC(BCM, ENET,		ENET_MAC,	"Fast Ethernet MAC"),
 	BHND_CDESC(BCM, CODEC,		OTHER,		"V.90 Modem Codec"),
 	BHND_CDESC(BCM, USB,		OTHER,		"USB 1.1 Device/Host Controller"),
 	BHND_CDESC(BCM, ADSL,		OTHER,		"ADSL Core"),
 	BHND_CDESC(BCM, ILINE100,	OTHER,		"iLine100 HPNA"),
 	BHND_CDESC(BCM, IPSEC,		OTHER,		"IPsec Accelerator"),
 	BHND_CDESC(BCM, UTOPIA,		OTHER,		"UTOPIA ATM Core"),
 	BHND_CDESC(BCM, PCMCIA,		PCCARD,		"PCMCIA Bridge"),
 	BHND_CDESC(BCM, SOCRAM,		RAM,		"Internal Memory"),
 	BHND_CDESC(BCM, MEMC,		MEMC,		"MEMC SDRAM Controller"),
 	BHND_CDESC(BCM, OFDM,		OTHER,		"OFDM PHY"),
 	BHND_CDESC(BCM, EXTIF,		OTHER,		"External Interface"),
 	BHND_CDESC(BCM, D11,		WLAN,		"802.11 MAC/PHY/Radio"),
 	BHND_CDESC(BCM, APHY,		WLAN_PHY,	"802.11a PHY"),
 	BHND_CDESC(BCM, BPHY,		WLAN_PHY,	"802.11b PHY"),
 	BHND_CDESC(BCM, GPHY,		WLAN_PHY,	"802.11g PHY"),
 	BHND_CDESC(BCM, MIPS33,		CPU,		"MIPS3302 Core"),
 	BHND_CDESC(BCM, USB11H,		OTHER,		"USB 1.1 Host Controller"),
 	BHND_CDESC(BCM, USB11D,		OTHER,		"USB 1.1 Device Core"),
 	BHND_CDESC(BCM, USB20H,		OTHER,		"USB 2.0 Host Controller"),
 	BHND_CDESC(BCM, USB20D,		OTHER,		"USB 2.0 Device Core"),
 	BHND_CDESC(BCM, SDIOH,		OTHER,		"SDIO Host Controller"),
 	BHND_CDESC(BCM, ROBO,		OTHER,		"RoboSwitch"),
 	BHND_CDESC(BCM, ATA100,		OTHER,		"Parallel ATA Controller"),
 	BHND_CDESC(BCM, SATAXOR,	OTHER,		"SATA DMA/XOR Controller"),
 	BHND_CDESC(BCM, GIGETH,		ENET_MAC,	"Gigabit Ethernet MAC"),
 	BHND_CDESC(BCM, PCIE,		PCIE,		"PCIe Bridge"),
 	BHND_CDESC(BCM, NPHY,		WLAN_PHY,	"802.11n 2x2 PHY"),
 	BHND_CDESC(BCM, SRAMC,		MEMC,		"SRAM Controller"),
 	BHND_CDESC(BCM, MINIMAC,	OTHER,		"MINI MAC/PHY"),
 	BHND_CDESC(BCM, ARM11,		CPU,		"ARM1176 CPU"),
 	BHND_CDESC(BCM, ARM7S,		CPU,		"ARM7TDMI-S CPU"),
 	BHND_CDESC(BCM, LPPHY,		WLAN_PHY,	"802.11a/b/g PHY"),
 	BHND_CDESC(BCM, PMU,		PMU,		"PMU"),
 	BHND_CDESC(BCM, SSNPHY,		WLAN_PHY,	"802.11n Single-Stream PHY"),
 	BHND_CDESC(BCM, SDIOD,		OTHER,		"SDIO Device Core"),
 	BHND_CDESC(BCM, ARMCM3,		CPU,		"ARM Cortex-M3 CPU"),
 	BHND_CDESC(BCM, HTPHY,		WLAN_PHY,	"802.11n 4x4 PHY"),
 	BHND_CDESC(MIPS,MIPS74K,	CPU,		"MIPS74k CPU"),
 	BHND_CDESC(BCM, GMAC,		ENET_MAC,	"Gigabit MAC core"),
 	BHND_CDESC(BCM, DMEMC,		MEMC,		"DDR1/DDR2 Memory Controller"),
 	BHND_CDESC(BCM, PCIERC,		OTHER,		"PCIe Root Complex"),
 	BHND_CDESC(BCM, OCP,		SOC_BRIDGE,	"OCP to OCP Bridge"),
 	BHND_CDESC(BCM, SC,		OTHER,		"Shared Common Core"),
 	BHND_CDESC(BCM, AHB,		SOC_BRIDGE,	"OCP to AHB Bridge"),
 	BHND_CDESC(BCM, SPIH,		OTHER,		"SPI Host Controller"),
 	BHND_CDESC(BCM, I2S,		OTHER,		"I2S Digital Audio Interface"),
 	BHND_CDESC(BCM, DMEMS,		MEMC,		"SDR/DDR1 Memory Controller"),
 	BHND_CDESC(BCM, UBUS_SHIM,	OTHER,		"BCM6362/UBUS WLAN SHIM"),
 	BHND_CDESC(BCM, PCIE2,		PCIE,		"PCIe Bridge (Gen2)"),
 
 	BHND_CDESC(ARM, APB_BRIDGE,	SOC_BRIDGE,	"BP135 AMBA3 AXI to APB Bridge"),
 	BHND_CDESC(ARM, PL301,		SOC_ROUTER,	"PL301 AMBA3 Interconnect"),
 	BHND_CDESC(ARM, EROM,		EROM,		"PL366 Device Enumeration ROM"),
 	BHND_CDESC(ARM, OOB_ROUTER,	OTHER,		"PL367 OOB Interrupt Router"),
 	BHND_CDESC(ARM, AXI_UNMAPPED,	OTHER,		"Unmapped Address Ranges"),
 
 	BHND_CDESC(BCM, 4706_CC,	CC,		"ChipCommon I/O Controller"),
 	BHND_CDESC(BCM, NS_PCIE2,	PCIE,		"PCIe Bridge (Gen2)"),
 	BHND_CDESC(BCM, NS_DMA,		OTHER,		"DMA engine"),
 	BHND_CDESC(BCM, NS_SDIO,	OTHER,		"SDIO 3.0 Host Controller"),
 	BHND_CDESC(BCM, NS_USB20H,	OTHER,		"USB 2.0 Host Controller"),
 	BHND_CDESC(BCM, NS_USB30H,	OTHER,		"USB 3.0 Host Controller"),
 	BHND_CDESC(BCM, NS_A9JTAG,	OTHER,		"ARM Cortex A9 JTAG Interface"),
 	BHND_CDESC(BCM, NS_DDR23_MEMC,	MEMC,		"Denali DDR2/DD3 Memory Controller"),
 	BHND_CDESC(BCM, NS_ROM,		NVRAM,		"System ROM"),
 	BHND_CDESC(BCM, NS_NAND,	NVRAM,		"NAND Flash Controller"),
 	BHND_CDESC(BCM, NS_QSPI,	NVRAM,		"QSPI Flash Controller"),
 	BHND_CDESC(BCM, NS_CC_B,	CC_B,		"ChipCommon B Auxiliary I/O Controller"),
 	BHND_CDESC(BCM, 4706_SOCRAM,	RAM,		"Internal Memory"),
 	BHND_CDESC(BCM, IHOST_ARMCA9,	CPU,		"ARM Cortex A9 CPU"),
 	BHND_CDESC(BCM, 4706_GMAC_CMN,	ENET,		"Gigabit MAC (Common)"),
 	BHND_CDESC(BCM, 4706_GMAC,	ENET_MAC,	"Gigabit MAC"),
 	BHND_CDESC(BCM, AMEMC,		MEMC,		"Denali DDR1/DDR2 Memory Controller"),
 #undef	BHND_CDESC
 
 	/* Derived from inspection of the BCM4331 cores that provide PrimeCell
 	 * IDs. Due to lack of documentation, the surmised device name/purpose
 	 * provided here may be incorrect. */
 	{ BHND_MFGID_ARM,	BHND_PRIMEID_EROM,	BHND_DEVCLASS_OTHER,
 	    "PL364 Device Enumeration ROM" },
 	{ BHND_MFGID_ARM,	BHND_PRIMEID_SWRAP,	BHND_DEVCLASS_OTHER,
 	    "PL368 Device Management Interface" },
 	{ BHND_MFGID_ARM,	BHND_PRIMEID_MWRAP,	BHND_DEVCLASS_OTHER,
 	    "PL369 Device Management Interface" },
 
 	{ 0, 0, 0, NULL }
 };
 
 /**
  * Return the name for a given JEP106 manufacturer ID.
  * 
  * @param vendor A JEP106 Manufacturer ID, including the non-standard ARM 4-bit
  * JEP106 continuation code.
  */
 const char *
 bhnd_vendor_name(uint16_t vendor)
 {
 	switch (vendor) {
 	case BHND_MFGID_ARM:
 		return "ARM";
 	case BHND_MFGID_BCM:
 		return "Broadcom";
 	case BHND_MFGID_MIPS:
 		return "MIPS";
 	default:
 		return "unknown";
 	}
 }
 
 /**
  * Return the name of a port type.
  */
 const char *
 bhnd_port_type_name(bhnd_port_type port_type)
 {
 	switch (port_type) {
 	case BHND_PORT_DEVICE:
 		return ("device");
 	case BHND_PORT_BRIDGE:
 		return ("bridge");
 	case BHND_PORT_AGENT:
 		return ("agent");
 	default:
 		return "unknown";
 	}
 }
 
 /**
  * Return the name of an NVRAM source.
  */
 const char *
 bhnd_nvram_src_name(bhnd_nvram_src nvram_src)
 {
 	switch (nvram_src) {
 	case BHND_NVRAM_SRC_FLASH:
 		return ("flash");
 	case BHND_NVRAM_SRC_OTP:
 		return ("OTP");
 	case BHND_NVRAM_SRC_SPROM:
 		return ("SPROM");
 	case BHND_NVRAM_SRC_UNKNOWN:
 		return ("none");
 	default:
 		return ("unknown");
 	}
 }
 
 static const struct bhnd_core_desc *
 bhnd_find_core_desc(uint16_t vendor, uint16_t device)
 {
 	for (u_int i = 0; bhnd_core_descs[i].desc != NULL; i++) {
 		if (bhnd_core_descs[i].vendor != vendor)
 			continue;
 		
 		if (bhnd_core_descs[i].device != device)
 			continue;
 		
 		return (&bhnd_core_descs[i]);
 	}
 	
 	return (NULL);
 }
 
 /**
  * Return a human-readable name for a BHND core.
  * 
  * @param vendor The core designer's JEDEC-106 Manufacturer ID
  * @param device The core identifier.
  */
 const char *
 bhnd_find_core_name(uint16_t vendor, uint16_t device)
 {
 	const struct bhnd_core_desc *desc;
 	
 	if ((desc = bhnd_find_core_desc(vendor, device)) == NULL)
 		return ("unknown");
 
 	return desc->desc;
 }
 
 /**
  * Return the device class for a BHND core.
  * 
  * @param vendor The core designer's JEDEC-106 Manufacturer ID
  * @param device The core identifier.
  */
 bhnd_devclass_t
 bhnd_find_core_class(uint16_t vendor, uint16_t device)
 {
 	const struct bhnd_core_desc *desc;
 	
 	if ((desc = bhnd_find_core_desc(vendor, device)) == NULL)
 		return (BHND_DEVCLASS_OTHER);
 
 	return desc->class;
 }
 
 /**
  * Return a human-readable name for a BHND core.
  * 
  * @param ci The core's info record.
  */
 const char *
 bhnd_core_name(const struct bhnd_core_info *ci)
 {
 	return bhnd_find_core_name(ci->vendor, ci->device);
 }
 
 /**
  * Return the device class for a BHND core.
  * 
  * @param ci The core's info record.
  */
 bhnd_devclass_t
 bhnd_core_class(const struct bhnd_core_info *ci)
 {
 	return bhnd_find_core_class(ci->vendor, ci->device);
 }
 
 /**
  * Write a human readable name representation of the given
  * BHND_CHIPID_* constant to @p buffer.
  * 
  * @param buffer Output buffer, or NULL to compute the required size.
  * @param size Capacity of @p buffer, in bytes.
  * @param chip_id Chip ID to be formatted.
  * 
  * @return Returns the required number of bytes on success, or a negative
  * integer on failure. No more than @p size-1 characters be written, with
  * the @p size'th set to '\0'.
  * 
  * @sa BHND_CHIPID_MAX_NAMELEN
  */
 int
 bhnd_format_chip_id(char *buffer, size_t size, uint16_t chip_id)
 {
 	/* All hex formatted IDs are within the range of 0x4000-0x9C3F (40000-1) */
 	if (chip_id >= 0x4000 && chip_id <= 0x9C3F)
 		return (snprintf(buffer, size, "BCM%hX", chip_id));
 	else
 		return (snprintf(buffer, size, "BCM%hu", chip_id));
 }
 
 /**
  * Initialize a core info record with data from from a bhnd-attached @p dev.
  * 
  * @param dev A bhnd device.
  * @param core The record to be initialized.
  */
 struct bhnd_core_info
 bhnd_get_core_info(device_t dev) {
 	return (struct bhnd_core_info) {
 		.vendor		= bhnd_get_vendor(dev),
 		.device		= bhnd_get_device(dev),
 		.hwrev		= bhnd_get_hwrev(dev),
 		.core_idx	= bhnd_get_core_index(dev),
 		.unit		= bhnd_get_core_unit(dev)
 	};
 }
 
 /**
  * Find a @p class child device with @p unit on @p dev.
  * 
  * @param parent The bhnd-compatible bus to be searched.
  * @param class The device class to match on.
  * @param unit The core unit number; specify -1 to return the first match
  * regardless of unit number.
  * 
  * @retval device_t if a matching child device is found.
  * @retval NULL if no matching child device is found.
  */
 device_t
 bhnd_find_child(device_t dev, bhnd_devclass_t class, int unit)
 {
 	struct bhnd_core_match md = {
 		BHND_MATCH_CORE_CLASS(class),
 		BHND_MATCH_CORE_UNIT(unit)
 	};
 
 	if (unit == -1)
 		md.m.match.core_unit = 0;
 
 	return bhnd_match_child(dev, &md);
 }
 
 /**
  * Find the first child device on @p dev that matches @p desc.
  * 
  * @param parent The bhnd-compatible bus to be searched.
  * @param desc A match descriptor.
  * 
  * @retval device_t if a matching child device is found.
  * @retval NULL if no matching child device is found.
  */
 device_t
 bhnd_match_child(device_t dev, const struct bhnd_core_match *desc)
 {
 	device_t	*devlistp;
 	device_t	 match;
 	int		 devcnt;
 	int		 error;
 
 	error = device_get_children(dev, &devlistp, &devcnt);
 	if (error != 0)
 		return (NULL);
 
 	match = NULL;
 	for (int i = 0; i < devcnt; i++) {
 		struct bhnd_core_info ci = bhnd_get_core_info(devlistp[i]);
 
 		if (bhnd_core_matches(&ci, desc)) {
 			match = devlistp[i];
 			goto done;
 		}
 	}
 
 done:
 	free(devlistp, M_TEMP);
 	return match;
 }
 
 /**
  * Walk up the bhnd device hierarchy to locate the root device
  * to which the bhndb bridge is attached.
  * 
  * This can be used from within bhnd host bridge drivers to locate the
  * actual upstream host device.
  * 
  * @param dev A bhnd device.
  * @param bus_class The expected bus (e.g. "pci") to which the bridge root
  * should be attached.
  * 
  * @retval device_t if a matching parent device is found.
  * @retval NULL @p dev is not attached via a bhndb bus
  * @retval NULL no parent device is attached via @p bus_class.
  */
 device_t
 bhnd_find_bridge_root(device_t dev, devclass_t bus_class)
 {
 	devclass_t	bhndb_class;
 	device_t	parent;
 
 	KASSERT(device_get_devclass(device_get_parent(dev)) == bhnd_devclass,
 	   ("%s not a bhnd device", device_get_nameunit(dev)));
 
 	bhndb_class = devclass_find("bhndb");
 
 	/* Walk the device tree until we hit a bridge */
 	parent = dev;
 	while ((parent = device_get_parent(parent)) != NULL) {
 		if (device_get_devclass(parent) == bhndb_class)
 			break;
 	}
 
 	/* No bridge? */
 	if (parent == NULL)
 		return (NULL);
 
 	/* Search for a parent attached to the expected bus class */
 	while ((parent = device_get_parent(parent)) != NULL) {
 		device_t bus;
 
 		bus = device_get_parent(parent);
 		if (bus != NULL && device_get_devclass(bus) == bus_class)
 			return (parent);
 	}
 
 	/* Not found */
 	return (NULL);
 }
 
 /**
  * Find the first core in @p cores that matches @p desc.
  * 
  * @param cores The table to search.
  * @param num_cores The length of @p cores.
  * @param desc A match descriptor.
  * 
  * @retval bhnd_core_info if a matching core is found.
  * @retval NULL if no matching core is found.
  */
 const struct bhnd_core_info *
 bhnd_match_core(const struct bhnd_core_info *cores, u_int num_cores,
     const struct bhnd_core_match *desc)
 {
 	for (u_int i = 0; i < num_cores; i++) {
 		if (bhnd_core_matches(&cores[i], desc))
 			return &cores[i];
 	}
 
 	return (NULL);
 }
 
 
 /**
  * Find the first core in @p cores with the given @p class.
  * 
  * @param cores The table to search.
  * @param num_cores The length of @p cores.
  * @param desc A match descriptor.
  * 
  * @retval bhnd_core_info if a matching core is found.
  * @retval NULL if no matching core is found.
  */
 const struct bhnd_core_info *
 bhnd_find_core(const struct bhnd_core_info *cores, u_int num_cores,
     bhnd_devclass_t class)
 {
 	struct bhnd_core_match md = {
 		BHND_MATCH_CORE_CLASS(class)
 	};
 
 	return bhnd_match_core(cores, num_cores, &md);
 }
 
 /**
  * Return true if the @p core matches @p desc.
  * 
  * @param core A bhnd core descriptor.
  * @param desc A match descriptor to compare against @p core.
  * 
  * @retval true if @p core matches @p match
  * @retval false if @p core does not match @p match.
  */
 bool
 bhnd_core_matches(const struct bhnd_core_info *core,
     const struct bhnd_core_match *desc)
 {
 	if (desc->m.match.core_vendor && desc->core_vendor != core->vendor)
 		return (false);
 
 	if (desc->m.match.core_id && desc->core_id != core->device)
 		return (false);
 
 	if (desc->m.match.core_unit && desc->core_unit != core->unit)
 		return (false);
 
 	if (desc->m.match.core_rev && 
 	    !bhnd_hwrev_matches(core->hwrev, &desc->core_rev))
 		return (false);
 
 	if (desc->m.match.core_class &&
 	    desc->core_class != bhnd_core_class(core))
 		return (false);
 
 	return true;
 }
 
 /**
  * Return true if the @p chip matches @p desc.
  * 
  * @param chip A bhnd chip identifier.
  * @param desc A match descriptor to compare against @p chip.
  * 
  * @retval true if @p chip matches @p match
  * @retval false if @p chip does not match @p match.
  */
 bool
 bhnd_chip_matches(const struct bhnd_chipid *chip,
     const struct bhnd_chip_match *desc)
 {
 	if (desc->m.match.chip_id && chip->chip_id != desc->chip_id)
 		return (false);
 
 	if (desc->m.match.chip_pkg && chip->chip_pkg != desc->chip_pkg)
 		return (false);
 
 	if (desc->m.match.chip_rev &&
 	    !bhnd_hwrev_matches(chip->chip_rev, &desc->chip_rev))
 		return (false);
 
 	return (true);
 }
 
 /**
  * Return true if the @p board matches @p desc.
  * 
  * @param board The bhnd board info.
  * @param desc A match descriptor to compare against @p board.
  * 
  * @retval true if @p chip matches @p match
  * @retval false if @p chip does not match @p match.
  */
 bool
 bhnd_board_matches(const struct bhnd_board_info *board,
     const struct bhnd_board_match *desc)
 {
 	if (desc->m.match.board_srom_rev &&
 	    !bhnd_hwrev_matches(board->board_srom_rev, &desc->board_srom_rev))
 		return (false);
 
 	if (desc->m.match.board_vendor &&
 	    board->board_vendor != desc->board_vendor)
 		return (false);
 
 	if (desc->m.match.board_type && board->board_type != desc->board_type)
 		return (false);
 
 	if (desc->m.match.board_rev &&
 	    !bhnd_hwrev_matches(board->board_rev, &desc->board_rev))
 		return (false);
 
 	return (true);
 }
 
 /**
  * Return true if the @p hwrev matches @p desc.
  * 
  * @param hwrev A bhnd hardware revision.
  * @param desc A match descriptor to compare against @p core.
  * 
  * @retval true if @p hwrev matches @p match
  * @retval false if @p hwrev does not match @p match.
  */
 bool
 bhnd_hwrev_matches(uint16_t hwrev, const struct bhnd_hwrev_match *desc)
 {
 	if (desc->start != BHND_HWREV_INVALID &&
 	    desc->start > hwrev)
 		return false;
 		
 	if (desc->end != BHND_HWREV_INVALID &&
 	    desc->end < hwrev)
 		return false;
 
 	return true;
 }
 
 /**
  * Return true if the @p dev matches @p desc.
  * 
  * @param dev A bhnd device.
  * @param desc A match descriptor to compare against @p dev.
  * 
  * @retval true if @p dev matches @p match
  * @retval false if @p dev does not match @p match.
  */
 bool
 bhnd_device_matches(device_t dev, const struct bhnd_device_match *desc)
 {
 	struct bhnd_core_info		 core;
 	const struct bhnd_chipid	*chip;
 	struct bhnd_board_info		 board;
 	device_t			 parent;
 	int				 error;
 
 	/* Construct individual match descriptors */
 	struct bhnd_core_match	m_core	= { _BHND_CORE_MATCH_COPY(desc) };
 	struct bhnd_chip_match	m_chip	= { _BHND_CHIP_MATCH_COPY(desc) };
 	struct bhnd_board_match	m_board	= { _BHND_BOARD_MATCH_COPY(desc) };
 
 	/* Fetch and match core info */
 	if (m_core.m.match_flags) {
 		/* Only applicable to bhnd-attached cores */
 		parent = device_get_parent(dev);
 		if (device_get_devclass(parent) != bhnd_devclass) {
 			device_printf(dev, "attempting to match core "
 			    "attributes against non-core device\n");
 			return (false);
 		}
 
 		core = bhnd_get_core_info(dev);
 		if (!bhnd_core_matches(&core, &m_core))
 			return (false);
 	}
 
 	/* Fetch and match chip info */
 	if (m_chip.m.match_flags) {
 		chip = bhnd_get_chipid(dev);
 
 		if (!bhnd_chip_matches(chip, &m_chip))
 			return (false);
 	}
 
 	/* Fetch and match board info.
 	 *
 	 * This is not available until  after NVRAM is up; earlier device
 	 * matches should not include board requirements */
 	if (m_board.m.match_flags) {
 		if ((error = bhnd_read_board_info(dev, &board))) {
 			device_printf(dev, "failed to read required board info "
 			    "during device matching: %d\n", error);
 			return (false);
 		}
 
 		if (!bhnd_board_matches(&board, &m_board))
 			return (false);
 	}
 
 	/* All matched */
 	return (true);
 }
 
 /**
  * Search @p table for an entry matching @p dev.
  * 
  * @param dev A bhnd device to match against @p table.
  * @param table The device table to search.
  * @param entry_size The @p table entry size, in bytes.
  * 
  * @retval bhnd_device the first matching device, if any.
  * @retval NULL if no matching device is found in @p table.
  */
 const struct bhnd_device *
 bhnd_device_lookup(device_t dev, const struct bhnd_device *table,
     size_t entry_size)
 {
 	const struct bhnd_device	*entry;
 	device_t			 hostb, parent;
 	bhnd_attach_type		 attach_type;
 	uint32_t			 dflags;
 
 	parent = device_get_parent(dev);
 	hostb = bhnd_find_hostb_device(parent);
 	attach_type = bhnd_get_attach_type(dev);
 
 	for (entry = table; !BHND_DEVICE_IS_END(entry); entry =
 	    (const struct bhnd_device *) ((const char *) entry + entry_size))
 	{
 		/* match core info */
 		if (!bhnd_device_matches(dev, &entry->core))
 			continue;
 
 		/* match device flags */
 		dflags = entry->device_flags;
 
 		/* hostb implies BHND_ATTACH_ADAPTER requirement */
 		if (dflags & BHND_DF_HOSTB)
 			dflags |= BHND_DF_ADAPTER;
 	
 		if (dflags & BHND_DF_ADAPTER)
 			if (attach_type != BHND_ATTACH_ADAPTER)
 				continue;
 
 		if (dflags & BHND_DF_HOSTB)
 			if (dev != hostb)
 				continue;
 
 		if (dflags & BHND_DF_SOC)
 			if (attach_type != BHND_ATTACH_NATIVE)
 				continue;
 
 		/* device found */
 		return (entry);
 	}
 
 	/* not found */
 	return (NULL);
 }
 
 /**
  * Scan the device @p table for all quirk flags applicable to @p dev.
  * 
  * @param dev A bhnd device to match against @p table.
  * @param table The device table to search.
  * 
  * @return returns all matching quirk flags.
  */
 uint32_t
 bhnd_device_quirks(device_t dev, const struct bhnd_device *table,
     size_t entry_size)
 {
 	const struct bhnd_device	*dent;
 	const struct bhnd_device_quirk	*qent, *qtable;
 	uint32_t			 quirks;
 
 	/* Locate the device entry */
 	if ((dent = bhnd_device_lookup(dev, table, entry_size)) == NULL)
 		return (0);
 
 	/* Quirks table is optional */
 	qtable = dent->quirks_table;
 	if (qtable == NULL)
 		return (0);
 
 	/* Collect matching device quirk entries */
 	quirks = 0;
 	for (qent = qtable; !BHND_DEVICE_QUIRK_IS_END(qent); qent++) {
 		if (bhnd_device_matches(dev, &qent->desc))
 			quirks |= qent->quirks;
 	}
 
 	return (quirks);
 }
 
 
 /**
  * Allocate bhnd(4) resources defined in @p rs from a parent bus.
  * 
  * @param dev The device requesting ownership of the resources.
  * @param rs A standard bus resource specification. This will be updated
  * with the allocated resource's RIDs.
  * @param res On success, the allocated bhnd resources.
  * 
  * @retval 0 success
  * @retval non-zero if allocation of any non-RF_OPTIONAL resource fails,
  * 		    all allocated resources will be released and a regular
  * 		    unix error code will be returned.
  */
 int
 bhnd_alloc_resources(device_t dev, struct resource_spec *rs,
     struct bhnd_resource **res)
 {
 	/* Initialize output array */
 	for (u_int i = 0; rs[i].type != -1; i++)
 		res[i] = NULL;
 
 	for (u_int i = 0; rs[i].type != -1; i++) {
 		res[i] = bhnd_alloc_resource_any(dev, rs[i].type, &rs[i].rid,
 		    rs[i].flags);
 
 		/* Clean up all allocations on failure */
 		if (res[i] == NULL && !(rs[i].flags & RF_OPTIONAL)) {
 			bhnd_release_resources(dev, rs, res);
 			return (ENXIO);
 		}
 	}
 
 	return (0);
 };
 
 /**
  * Release bhnd(4) resources defined in @p rs from a parent bus.
  * 
  * @param dev The device that owns the resources.
  * @param rs A standard bus resource specification previously initialized
  * by @p bhnd_alloc_resources.
  * @param res The bhnd resources to be released.
  */
 void
 bhnd_release_resources(device_t dev, const struct resource_spec *rs,
     struct bhnd_resource **res)
 {
 	for (u_int i = 0; rs[i].type != -1; i++) {
 		if (res[i] == NULL)
 			continue;
 
 		bhnd_release_resource(dev, rs[i].type, rs[i].rid, res[i]);
 		res[i] = NULL;
 	}
 }
 
 /**
  * Parse the CHIPC_ID_* fields from the ChipCommon CHIPC_ID
  * register, returning its bhnd_chipid representation.
  * 
  * @param idreg The CHIPC_ID register value.
  * @param enum_addr The enumeration address to include in the result.
  *
  * @warning
  * On early siba(4) devices, the ChipCommon core does not provide
  * a valid CHIPC_ID_NUMCORE field. On these ChipCommon revisions
  * (see CHIPC_NCORES_MIN_HWREV()), this function will parse and return
  * an invalid `ncores` value.
  */
 struct bhnd_chipid
 bhnd_parse_chipid(uint32_t idreg, bhnd_addr_t enum_addr)
 {
 	struct bhnd_chipid result;
 
 	/* Fetch the basic chip info */
 	result.chip_id = CHIPC_GET_BITS(idreg, CHIPC_ID_CHIP);
 	result.chip_pkg = CHIPC_GET_BITS(idreg, CHIPC_ID_PKG);
 	result.chip_rev = CHIPC_GET_BITS(idreg, CHIPC_ID_REV);
 	result.chip_type = CHIPC_GET_BITS(idreg, CHIPC_ID_BUS);
 	result.ncores = CHIPC_GET_BITS(idreg, CHIPC_ID_NUMCORE);
 
 	result.enum_addr = enum_addr;
 
 	return (result);
 }
 
+
 /**
+ * Determine the correct core count for a chip identification value that
+ * may contain an invalid core count.
+ * 
+ * On some early siba(4) devices (see CHIPC_NCORES_MIN_HWREV()), the ChipCommon
+ * core does not provide a valid CHIPC_ID_NUMCORE field.
+ * 
+ * @param cid The chip identification to be queried.
+ * @param chipc_hwrev The hardware revision of the ChipCommon core from which
+ * @p cid was parsed.
+ * @param[out] ncores On success, will be set to the correct core count.
+ * 
+ * @retval 0 If the core count is already correct, or was mapped to a
+ * a correct value.
+ * @retval EINVAL If the core count is incorrect, but the chip was not
+ * recognized.
+ */
+int
+bhnd_chipid_fixed_ncores(const struct bhnd_chipid *cid, uint16_t chipc_hwrev,
+    uint8_t *ncores)
+{
+	/* bcma(4), and most siba(4) devices */
+	if (CHIPC_NCORES_MIN_HWREV(chipc_hwrev)) {
+		*ncores = cid->ncores;
+		return (0);
+	}
+
+	/* broken siba(4) chipsets */
+	switch (cid->chip_id) {
+	case BHND_CHIPID_BCM4306:
+		*ncores = 6;
+		break;
+	case BHND_CHIPID_BCM4704:
+		*ncores = 9;
+		break;
+	case BHND_CHIPID_BCM5365:
+		/*
+		* BCM5365 does support ID_NUMCORE in at least
+		* some of its revisions, but for unknown
+		* reasons, Broadcom's drivers always exclude
+		* the ChipCommon revision (0x5) used by BCM5365
+		* from the set of revisions supporting
+		* ID_NUMCORE, and instead supply a fixed value.
+		* 
+		* Presumably, at least some of these devices
+		* shipped with a broken ID_NUMCORE value.
+		*/
+		*ncores = 7;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
+/**
  * Allocate the resource defined by @p rs via @p dev, use it
  * to read the ChipCommon ID register relative to @p chipc_offset,
  * then release the resource.
  * 
  * @param dev The device owning @p rs.
  * @param rs A resource spec that encompasses the ChipCommon register block.
  * @param chipc_offset The offset of the ChipCommon registers within @p rs.
  * @param[out] result the chip identification data.
  * 
  * @retval 0 success
  * @retval non-zero if the ChipCommon identification data could not be read.
  */
 int
 bhnd_read_chipid(device_t dev, struct resource_spec *rs,
     bus_size_t chipc_offset, struct bhnd_chipid *result)
 {
 	struct resource			*res;
 	bhnd_addr_t			 enum_addr;
 	uint32_t			 reg;
 	uint8_t				 chip_type;
 	int				 error, rid, rtype;
 
 	rid = rs->rid;
 	rtype = rs->type;
 	error = 0;
 
 	/* Allocate the ChipCommon window resource and fetch the chipid data */
 	res = bus_alloc_resource_any(dev, rtype, &rid, RF_ACTIVE);
 	if (res == NULL) {
 		device_printf(dev,
 		    "failed to allocate bhnd chipc resource\n");
 		return (ENXIO);
 	}
 
 	/* Fetch the basic chip info */
 	reg = bus_read_4(res, chipc_offset + CHIPC_ID);
 	chip_type = CHIPC_GET_BITS(reg, CHIPC_ID_BUS);
 
 	/* Fetch the EROMPTR */
 	if (BHND_CHIPTYPE_HAS_EROM(chip_type)) {
 		enum_addr = bus_read_4(res, chipc_offset + CHIPC_EROMPTR);
 	} else if (chip_type == BHND_CHIPTYPE_SIBA) {
 		/* siba(4) uses the ChipCommon base address as the enumeration
 		 * address */
 		enum_addr = rman_get_start(res) + chipc_offset;
 	} else {
 		device_printf(dev, "unknown chip type %hhu\n", chip_type);
 		error = ENODEV;
 		goto cleanup;
 	}
 
 	*result = bhnd_parse_chipid(reg, enum_addr);
+
+	/* Fix the core count on early siba(4) devices */
+	if (chip_type == BHND_CHIPTYPE_SIBA) {
+		uint32_t	idh;
+		uint16_t	chipc_hwrev;
+
+		/* 
+		 * We need the ChipCommon revision to determine whether
+		 * the ncore field is valid.
+		 * 
+		 * We can safely assume the siba IDHIGH register is mapped
+		 * within the chipc register block.
+		 */
+		idh = bus_read_4(res, SB0_REG_ABS(SIBA_CFG0_IDHIGH));
+		chipc_hwrev = SIBA_IDH_CORE_REV(idh);
+
+		error = bhnd_chipid_fixed_ncores(result, chipc_hwrev,
+		    &result->ncores);
+		if (error)
+			goto cleanup;
+	}
 
 cleanup:
 	/* Clean up */
 	bus_release_resource(dev, rtype, rid, res);
 	return (error);
 }
 
 /**
  * Read an NVRAM variable's NUL-terminated string value.
  *
  * @param 	dev	A bhnd bus child device.
  * @param	name	The NVRAM variable name.
  * @param[out]	buf	A buffer large enough to hold @p len bytes. On
  *			success, the NUL-terminated string value will be
  *			written to this buffer. This argment may be NULL if
  *			the value is not desired.
  * @param	len	The maximum capacity of @p buf.
  * @param[out]	rlen	On success, will be set to the actual size of
  *			the requested value (including NUL termination). This
  *			argment may be NULL if the size is not desired.
  *
  * @retval 0		success
  * @retval ENOENT	The requested variable was not found.
  * @retval ENODEV	No valid NVRAM source could be found.
  * @retval ENOMEM	If @p buf is non-NULL and a buffer of @p len is too
  *			small to hold the requested value.
  * @retval EFTYPE	If the variable data cannot be coerced to a valid
  *			string representation.
  * @retval ERANGE	If value coercion would overflow @p type.
  * @retval non-zero	If reading @p name otherwise fails, a regular unix
  *			error code will be returned.
  */
 int
 bhnd_nvram_getvar_str(device_t dev, const char *name, char *buf, size_t len,
     size_t *rlen)
 {
 	size_t	larg;
 	int	error;
 
 	larg = len;
 	error = bhnd_nvram_getvar(dev, name, buf, &larg, BHND_NVRAM_TYPE_CSTR);
 	if (rlen != NULL)
 		*rlen = larg;
 
 	return (error);
 }
 
 /**
  * Read an NVRAM variable's unsigned integer value.
  *
  * @param 		dev	A bhnd bus child device.
  * @param		name	The NVRAM variable name.
  * @param[out]		value	On success, the requested value will be written
  *				to this pointer.
  * @param		width	The output integer type width (1, 2, or
  *				4 bytes).
  * 
  * @retval 0		success
  * @retval ENOENT	The requested variable was not found.
  * @retval ENODEV	No valid NVRAM source could be found.
  * @retval EFTYPE	If the variable data cannot be coerced to a
  *			a valid unsigned integer representation.
  * @retval ERANGE	If value coercion would overflow (or underflow) an
  *			unsigned representation of the given @p width.
  * @retval non-zero	If reading @p name otherwise fails, a regular unix
  *			error code will be returned.
  */
 int
 bhnd_nvram_getvar_uint(device_t dev, const char *name, void *value, int width)
 {
 	bhnd_nvram_type	type;
 	size_t		len;
 
 	switch (width) {
 	case 1:
 		type = BHND_NVRAM_TYPE_UINT8;
 		break;
 	case 2:
 		type = BHND_NVRAM_TYPE_UINT16;
 		break;
 	case 4:
 		type = BHND_NVRAM_TYPE_UINT32;
 		break;
 	default:
 		device_printf(dev, "unsupported NVRAM integer width: %d\n",
 		    width);
 		return (EINVAL);
 	}
 
 	len = width;
 	return (bhnd_nvram_getvar(dev, name, value, &len, type));
 }
 
 /**
  * Read an NVRAM variable's unsigned 8-bit integer value.
  *
  * @param 		dev	A bhnd bus child device.
  * @param		name	The NVRAM variable name.
  * @param[out]		value	On success, the requested value will be written
  *				to this pointer.
  * 
  * @retval 0		success
  * @retval ENOENT	The requested variable was not found.
  * @retval ENODEV	No valid NVRAM source could be found.
  * @retval EFTYPE	If the variable data cannot be coerced to a
  *			a valid unsigned integer representation.
  * @retval ERANGE	If value coercion would overflow (or underflow) uint8_t.
  * @retval non-zero	If reading @p name otherwise fails, a regular unix
  *			error code will be returned.
  */
 int
 bhnd_nvram_getvar_uint8(device_t dev, const char *name, uint8_t *value)
 {
 	return (bhnd_nvram_getvar_uint(dev, name, value, sizeof(*value)));
 }
 
 /**
  * Read an NVRAM variable's unsigned 16-bit integer value.
  *
  * @param 		dev	A bhnd bus child device.
  * @param		name	The NVRAM variable name.
  * @param[out]		value	On success, the requested value will be written
  *				to this pointer.
  * 
  * @retval 0		success
  * @retval ENOENT	The requested variable was not found.
  * @retval ENODEV	No valid NVRAM source could be found.
  * @retval EFTYPE	If the variable data cannot be coerced to a
  *			a valid unsigned integer representation.
  * @retval ERANGE	If value coercion would overflow (or underflow)
  *			uint16_t.
  * @retval non-zero	If reading @p name otherwise fails, a regular unix
  *			error code will be returned.
  */
 int
 bhnd_nvram_getvar_uint16(device_t dev, const char *name, uint16_t *value)
 {
 	return (bhnd_nvram_getvar_uint(dev, name, value, sizeof(*value)));
 }
 
 /**
  * Read an NVRAM variable's unsigned 32-bit integer value.
  *
  * @param 		dev	A bhnd bus child device.
  * @param		name	The NVRAM variable name.
  * @param[out]		value	On success, the requested value will be written
  *				to this pointer.
  * 
  * @retval 0		success
  * @retval ENOENT	The requested variable was not found.
  * @retval ENODEV	No valid NVRAM source could be found.
  * @retval EFTYPE	If the variable data cannot be coerced to a
  *			a valid unsigned integer representation.
  * @retval ERANGE	If value coercion would overflow (or underflow)
  *			uint32_t.
  * @retval non-zero	If reading @p name otherwise fails, a regular unix
  *			error code will be returned.
  */
 int
 bhnd_nvram_getvar_uint32(device_t dev, const char *name, uint32_t *value)
 {
 	return (bhnd_nvram_getvar_uint(dev, name, value, sizeof(*value)));
 }
 
 /**
  * Read an NVRAM variable's signed integer value.
  *
  * @param 		dev	A bhnd bus child device.
  * @param		name	The NVRAM variable name.
  * @param[out]		value	On success, the requested value will be written
  *				to this pointer.
  * @param		width	The output integer type width (1, 2, or
  *				4 bytes).
  * 
  * @retval 0		success
  * @retval ENOENT	The requested variable was not found.
  * @retval ENODEV	No valid NVRAM source could be found.
  * @retval EFTYPE	If the variable data cannot be coerced to a
  *			a valid integer representation.
  * @retval ERANGE	If value coercion would overflow (or underflow) an
  *			signed representation of the given @p width.
  * @retval non-zero	If reading @p name otherwise fails, a regular unix
  *			error code will be returned.
  */
 int
 bhnd_nvram_getvar_int(device_t dev, const char *name, void *value, int width)
 {
 	bhnd_nvram_type	type;
 	size_t		len;
 
 	switch (width) {
 	case 1:
 		type = BHND_NVRAM_TYPE_INT8;
 		break;
 	case 2:
 		type = BHND_NVRAM_TYPE_INT16;
 		break;
 	case 4:
 		type = BHND_NVRAM_TYPE_INT32;
 		break;
 	default:
 		device_printf(dev, "unsupported NVRAM integer width: %d\n",
 		    width);
 		return (EINVAL);
 	}
 
 	len = width;
 	return (bhnd_nvram_getvar(dev, name, value, &len, type));
 }
 
 /**
  * Read an NVRAM variable's signed 8-bit integer value.
  *
  * @param 		dev	A bhnd bus child device.
  * @param		name	The NVRAM variable name.
  * @param[out]		value	On success, the requested value will be written
  *				to this pointer.
  * 
  * @retval 0		success
  * @retval ENOENT	The requested variable was not found.
  * @retval ENODEV	No valid NVRAM source could be found.
  * @retval EFTYPE	If the variable data cannot be coerced to a
  *			a valid integer representation.
  * @retval ERANGE	If value coercion would overflow (or underflow) int8_t.
  * @retval non-zero	If reading @p name otherwise fails, a regular unix
  *			error code will be returned.
  */
 int
 bhnd_nvram_getvar_int8(device_t dev, const char *name, int8_t *value)
 {
 	return (bhnd_nvram_getvar_int(dev, name, value, sizeof(*value)));
 }
 
 /**
  * Read an NVRAM variable's signed 16-bit integer value.
  *
  * @param 		dev	A bhnd bus child device.
  * @param		name	The NVRAM variable name.
  * @param[out]		value	On success, the requested value will be written
  *				to this pointer.
  * 
  * @retval 0		success
  * @retval ENOENT	The requested variable was not found.
  * @retval ENODEV	No valid NVRAM source could be found.
  * @retval EFTYPE	If the variable data cannot be coerced to a
  *			a valid integer representation.
  * @retval ERANGE	If value coercion would overflow (or underflow)
  *			int16_t.
  * @retval non-zero	If reading @p name otherwise fails, a regular unix
  *			error code will be returned.
  */
 int
 bhnd_nvram_getvar_int16(device_t dev, const char *name, int16_t *value)
 {
 	return (bhnd_nvram_getvar_int(dev, name, value, sizeof(*value)));
 }
 
 /**
  * Read an NVRAM variable's signed 32-bit integer value.
  *
  * @param 		dev	A bhnd bus child device.
  * @param		name	The NVRAM variable name.
  * @param[out]		value	On success, the requested value will be written
  *				to this pointer.
  * 
  * @retval 0		success
  * @retval ENOENT	The requested variable was not found.
  * @retval ENODEV	No valid NVRAM source could be found.
  * @retval EFTYPE	If the variable data cannot be coerced to a
  *			a valid integer representation.
  * @retval ERANGE	If value coercion would overflow (or underflow)
  *			int32_t.
  * @retval non-zero	If reading @p name otherwise fails, a regular unix
  *			error code will be returned.
  */
 int
 bhnd_nvram_getvar_int32(device_t dev, const char *name, int32_t *value)
 {
 	return (bhnd_nvram_getvar_int(dev, name, value, sizeof(*value)));
 }
 
 
 /**
  * Read an NVRAM variable's array value.
  *
  * @param 		dev	A bhnd bus child device.
  * @param		name	The NVRAM variable name.
  * @param[out]		buf	A buffer large enough to hold @p size bytes.
  *				On success, the requested value will be written
  *				to this buffer.
  * @param[in,out]	size	The required number of bytes to write to
  *				@p buf.
  * @param		type	The desired array element data representation.
  * 
  * @retval 0		success
  * @retval ENOENT	The requested variable was not found.
  * @retval ENODEV	No valid NVRAM source could be found.
  * @retval ENXIO	If less than @p size bytes are available.
  * @retval ENOMEM	If a buffer of @p size is too small to hold the
  *			requested value.
  * @retval EFTYPE	If the variable data cannot be coerced to a
  *			a valid instance of @p type.
  * @retval ERANGE	If value coercion would overflow (or underflow) a
  *			representation of @p type.
  * @retval non-zero	If reading @p name otherwise fails, a regular unix
  *			error code will be returned.
  */
 int
 bhnd_nvram_getvar_array(device_t dev, const char *name, void *buf, size_t size,
     bhnd_nvram_type type)
 {
 	size_t	nbytes;
 	int	error;
 
 	/* Attempt read */
 	nbytes = size;
 	if ((error = bhnd_nvram_getvar(dev, name, buf, &nbytes, type)))
 		return (error);
 
 	/* Verify that the expected number of bytes were fetched */
 	if (nbytes < size)
 		return (ENXIO);
 
 	return (0);
 }
 
 /**
  * Using the bhnd(4) bus-level core information and a custom core name,
  * populate @p dev's device description.
  * 
  * @param dev A bhnd-bus attached device.
  * @param dev_name The core's name (e.g. "SDIO Device Core")
  */
 void
 bhnd_set_custom_core_desc(device_t dev, const char *dev_name)
 {
 	const char *vendor_name;
 	char *desc;
 
 	vendor_name = bhnd_get_vendor_name(dev);
 	asprintf(&desc, M_BHND, "%s %s, rev %hhu", vendor_name, dev_name,
 	    bhnd_get_hwrev(dev));
 
 	if (desc != NULL) {
 		device_set_desc_copy(dev, desc);
 		free(desc, M_BHND);
 	} else {
 		device_set_desc(dev, dev_name);
 	}
 }
 
 /**
  * Using the bhnd(4) bus-level core information, populate @p dev's device
  * description.
  * 
  * @param dev A bhnd-bus attached device.
  */
 void
 bhnd_set_default_core_desc(device_t dev)
 {
 	bhnd_set_custom_core_desc(dev, bhnd_get_device_name(dev));
 }
 
 
 /**
  * Using the bhnd @p chip_id, populate the bhnd(4) bus @p dev's device
  * description.
  * 
  * @param dev A bhnd-bus attached device.
  */
 void
 bhnd_set_default_bus_desc(device_t dev, const struct bhnd_chipid *chip_id)
 {
 	const char	*bus_name;
 	char		*desc;
 	char		 chip_name[BHND_CHIPID_MAX_NAMELEN];
 
 	/* Determine chip type's bus name */
 	switch (chip_id->chip_type) {
 	case BHND_CHIPTYPE_SIBA:
 		bus_name = "SIBA bus";
 		break;
 	case BHND_CHIPTYPE_BCMA:
 	case BHND_CHIPTYPE_BCMA_ALT:
 		bus_name = "BCMA bus";
 		break;
 	case BHND_CHIPTYPE_UBUS:
 		bus_name = "UBUS bus";
 		break;
 	default:
 		bus_name = "Unknown Type";
 		break;
 	}
 
 	/* Format chip name */
 	bhnd_format_chip_id(chip_name, sizeof(chip_name),
 	     chip_id->chip_id);
 
 	/* Format and set device description */
 	asprintf(&desc, M_BHND, "%s %s", chip_name, bus_name);
 	if (desc != NULL) {
 		device_set_desc_copy(dev, desc);
 		free(desc, M_BHND);
 	} else {
 		device_set_desc(dev, bus_name);
 	}
 	
 }
 
 /**
  * Helper function for implementing BHND_BUS_IS_HW_DISABLED().
  * 
  * If a parent device is available, this implementation delegates the
  * request to the BHND_BUS_IS_HW_DISABLED() method on the parent of @p dev.
  * 
  * If no parent device is available (i.e. on a the bus root), the hardware
  * is assumed to be usable and false is returned.
  */
 bool
 bhnd_bus_generic_is_hw_disabled(device_t dev, device_t child)
 {
 	if (device_get_parent(dev) != NULL)
 		return (BHND_BUS_IS_HW_DISABLED(device_get_parent(dev), child));
 
 	return (false);
 }
 
 /**
  * Helper function for implementing BHND_BUS_GET_CHIPID().
  * 
  * This implementation delegates the request to the BHND_BUS_GET_CHIPID()
  * method on the parent of @p dev. If no parent exists, the implementation
  * will panic.
  */
 const struct bhnd_chipid *
 bhnd_bus_generic_get_chipid(device_t dev, device_t child)
 {
 	if (device_get_parent(dev) != NULL)
 		return (BHND_BUS_GET_CHIPID(device_get_parent(dev), child));
 
 	panic("missing BHND_BUS_GET_CHIPID()");
 }
 
 /* nvram board_info population macros for bhnd_bus_generic_read_board_info() */
 #define	BHND_GV(_dest, _name)	\
 	bhnd_nvram_getvar_uint(child, BHND_NVAR_ ## _name, &_dest,	\
 	    sizeof(_dest))
 
 #define	REQ_BHND_GV(_dest, _name)		do {			\
 	if ((error = BHND_GV(_dest, _name))) {				\
 		device_printf(dev,					\
 		    "error reading " __STRING(_name) ": %d\n", error);	\
 		return (error);						\
 	}								\
 } while(0)
 
 #define	OPT_BHND_GV(_dest, _name, _default)	do {			\
 	if ((error = BHND_GV(_dest, _name))) {				\
 		if (error != ENOENT) {					\
 			device_printf(dev,				\
 			    "error reading "				\
 			       __STRING(_name) ": %d\n", error);	\
 			return (error);					\
 		}							\
 		_dest = _default;					\
 	}								\
 } while(0)
 
 /**
  * Helper function for implementing BHND_BUS_READ_BOARDINFO().
  * 
  * This implementation populates @p info with information from NVRAM,
  * defaulting board_vendor and board_type fields to 0 if the
  * requested variables cannot be found.
  * 
  * This behavior is correct for most SoCs, but must be overridden on
  * bridged (PCI, PCMCIA, etc) devices to produce a complete bhnd_board_info
  * result.
  */
 int
 bhnd_bus_generic_read_board_info(device_t dev, device_t child,
     struct bhnd_board_info *info)
 {
 	int	error;
 
 	OPT_BHND_GV(info->board_vendor,	BOARDVENDOR,	0);
 	OPT_BHND_GV(info->board_type,	BOARDTYPE,	0);	/* srom >= 2 */
 	REQ_BHND_GV(info->board_rev,	BOARDREV);
 	OPT_BHND_GV(info->board_srom_rev,SROMREV,	0);	/* missing in
 								   some SoC
 								   NVRAM */
 	REQ_BHND_GV(info->board_flags,	BOARDFLAGS);
 	OPT_BHND_GV(info->board_flags2,	BOARDFLAGS2,	0);	/* srom >= 4 */
 	OPT_BHND_GV(info->board_flags3,	BOARDFLAGS3,	0);	/* srom >= 11 */
 
 	return (0);
 }
 
 #undef	BHND_GV
 #undef	BHND_GV_REQ
 #undef	BHND_GV_OPT
 
 /**
  * Helper function for implementing BHND_BUS_GET_NVRAM_VAR().
  * 
  * This implementation searches @p dev for a usable NVRAM child device.
  * 
  * If no usable child device is found on @p dev, the request is delegated to
  * the BHND_BUS_GET_NVRAM_VAR() method on the parent of @p dev.
  */
 int
 bhnd_bus_generic_get_nvram_var(device_t dev, device_t child, const char *name,
     void *buf, size_t *size, bhnd_nvram_type type)
 {
 	device_t	nvram;
 	device_t	parent;
 
         /* Make sure we're holding Giant for newbus */
 	GIANT_REQUIRED;
 
 	/* Look for a directly-attached NVRAM child */
 	if ((nvram = device_find_child(dev, "bhnd_nvram", -1)) != NULL)
 		return BHND_NVRAM_GETVAR(nvram, name, buf, size, type);
 
 	/* Try to delegate to parent */
 	if ((parent = device_get_parent(dev)) == NULL)
 		return (ENODEV);
 
 	return (BHND_BUS_GET_NVRAM_VAR(device_get_parent(dev), child,
 	    name, buf, size, type));
 }
 
 /**
  * Helper function for implementing BHND_BUS_ALLOC_RESOURCE().
  * 
  * This implementation of BHND_BUS_ALLOC_RESOURCE() delegates allocation
  * of the underlying resource to BUS_ALLOC_RESOURCE(), and activation
  * to @p dev's BHND_BUS_ACTIVATE_RESOURCE().
  */
 struct bhnd_resource *
 bhnd_bus_generic_alloc_resource(device_t dev, device_t child, int type,
 	int *rid, rman_res_t start, rman_res_t end, rman_res_t count,
 	u_int flags)
 {
 	struct bhnd_resource	*br;
 	struct resource		*res;
 	int			 error;
 
 	br = NULL;
 	res = NULL;
 
 	/* Allocate the real bus resource (without activating it) */
 	res = BUS_ALLOC_RESOURCE(dev, child, type, rid, start, end, count,
 	    (flags & ~RF_ACTIVE));
 	if (res == NULL)
 		return (NULL);
 
 	/* Allocate our bhnd resource wrapper. */
 	br = malloc(sizeof(struct bhnd_resource), M_BHND, M_NOWAIT);
 	if (br == NULL)
 		goto failed;
 	
 	br->direct = false;
 	br->res = res;
 
 	/* Attempt activation */
 	if (flags & RF_ACTIVE) {
 		error = BHND_BUS_ACTIVATE_RESOURCE(dev, child, type, *rid, br);
 		if (error)
 			goto failed;
 	}
 
 	return (br);
 	
 failed:
 	if (res != NULL)
 		BUS_RELEASE_RESOURCE(dev, child, type, *rid, res);
 
 	free(br, M_BHND);
 	return (NULL);
 }
 
 /**
  * Helper function for implementing BHND_BUS_RELEASE_RESOURCE().
  * 
  * This implementation of BHND_BUS_RELEASE_RESOURCE() delegates release of
  * the backing resource to BUS_RELEASE_RESOURCE().
  */
 int
 bhnd_bus_generic_release_resource(device_t dev, device_t child, int type,
     int rid, struct bhnd_resource *r)
 {
 	int error;
 
 	if ((error = BUS_RELEASE_RESOURCE(dev, child, type, rid, r->res)))
 		return (error);
 
 	free(r, M_BHND);
 	return (0);
 }
 
 
 /**
  * Helper function for implementing BHND_BUS_ACTIVATE_RESOURCE().
  * 
  * This implementation of BHND_BUS_ACTIVATE_RESOURCE() simply calls the
  * BHND_BUS_ACTIVATE_RESOURCE() method of the parent of @p dev.
  */
 int
 bhnd_bus_generic_activate_resource(device_t dev, device_t child, int type,
     int rid, struct bhnd_resource *r)
 {
 	/* Try to delegate to the parent */
 	if (device_get_parent(dev) != NULL)
 		return (BHND_BUS_ACTIVATE_RESOURCE(device_get_parent(dev),
 		    child, type, rid, r));
 
 	return (EINVAL);
 };
 
 /**
  * Helper function for implementing BHND_BUS_DEACTIVATE_RESOURCE().
  * 
  * This implementation of BHND_BUS_ACTIVATE_RESOURCE() simply calls the
  * BHND_BUS_ACTIVATE_RESOURCE() method of the parent of @p dev.
  */
 int
 bhnd_bus_generic_deactivate_resource(device_t dev, device_t child,
     int type, int rid, struct bhnd_resource *r)
 {
 	if (device_get_parent(dev) != NULL)
 		return (BHND_BUS_DEACTIVATE_RESOURCE(device_get_parent(dev),
 		    child, type, rid, r));
 
 	return (EINVAL);
 };
 
Index: projects/clang390-import/sys/dev/bhnd/siba/siba.c
===================================================================
--- projects/clang390-import/sys/dev/bhnd/siba/siba.c	(revision 305016)
+++ projects/clang390-import/sys/dev/bhnd/siba/siba.c	(revision 305017)
@@ -1,788 +1,765 @@
 /*-
  * Copyright (c) 2015 Landon Fuller <landon@landonf.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
  *    redistribution must be conditioned upon including a substantially
  *    similar Disclaimer requirement for further binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGES.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 
 #include <machine/bus.h>
 
 #include <dev/bhnd/cores/chipc/chipcreg.h>
 
 #include "sibareg.h"
 #include "sibavar.h"
 
 int
 siba_probe(device_t dev)
 {
 	device_set_desc(dev, "SIBA BHND bus");
 	return (BUS_PROBE_DEFAULT);
 }
 
 int
 siba_attach(device_t dev)
 {
 	struct siba_devinfo	*dinfo;
 	struct siba_softc	*sc;
 	device_t		*devs;
 	int			 ndevs;
 	int			 error;
 	
 	sc = device_get_softc(dev);
 	sc->dev = dev;
 
 	/* Fetch references to the siba SIBA_CFG* blocks for all
 	 * registered devices */
 	if ((error = device_get_children(dev, &devs, &ndevs)))
 		return (error);
 
 	for (int i = 0; i < ndevs; i++) {
 		struct siba_addrspace	*addrspace;
 
 		dinfo = device_get_ivars(devs[i]);
 
 		KASSERT(!device_is_suspended(devs[i]),
 		    ("siba(4) stateful suspend handling requires that devices "
 		        "not be suspended before siba_attach()"));
 
 		/* Fetch the core register address space */
 		addrspace = siba_find_addrspace(dinfo, BHND_PORT_DEVICE, 0, 0);
 		if (addrspace == NULL) {
 			device_printf(dev,
 			    "missing device registers for core %d\n", i);
 			error = ENXIO;
 			goto cleanup;
 		}
 
 		/*
 		 * Map the per-core configuration blocks
 		 */
 		KASSERT(dinfo->core_id.num_cfg_blocks <= SIBA_MAX_CFG,
 		    ("config block count %u out of range", 
 		        dinfo->core_id.num_cfg_blocks));
 
 		for (u_int cfgidx = 0; cfgidx < dinfo->core_id.num_cfg_blocks;
 		    cfgidx++)
 		{
 			rman_res_t	r_start, r_count, r_end;
 
 			/* Determine the config block's address range; configuration
 			 * blocks are allocated starting at SIBA_CFG0_OFFSET,
 			 * growing downwards. */
 			r_start = addrspace->sa_base + SIBA_CFG0_OFFSET;
 			r_start -= cfgidx * SIBA_CFG_SIZE;
 
 			r_count = SIBA_CFG_SIZE;
 			r_end = r_start + r_count - 1;
 
 			/* Allocate the config resource */
 			dinfo->cfg_rid[cfgidx] = 0;
 			dinfo->cfg[cfgidx] = BHND_BUS_ALLOC_RESOURCE(dev, dev,
 			    SYS_RES_MEMORY, &dinfo->cfg_rid[cfgidx], r_start,
 			    r_end, r_count, RF_ACTIVE);
 	
 			if (dinfo->cfg[cfgidx] == NULL) {
 			     device_printf(dev, "failed allocating CFG_%u for "
 			     "core %d\n", cfgidx, i);
 			     error = ENXIO;
 			     goto cleanup;
 			}
 		}
 	}
 
 cleanup:
 	free(devs, M_BHND);
 	if (error)
 		return (error);
 
 	/* Delegate remainder to standard bhnd method implementation */
 	return (bhnd_generic_attach(dev));
 }
 
 int
 siba_detach(device_t dev)
 {
 	return (bhnd_generic_detach(dev));
 }
 
 int
 siba_resume(device_t dev)
 {
 	return (bhnd_generic_resume(dev));
 }
 
 int
 siba_suspend(device_t dev)
 {
 	return (bhnd_generic_suspend(dev));
 }
 
 static int
 siba_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
 {
 	const struct siba_devinfo *dinfo;
 	const struct bhnd_core_info *cfg;
 	
 	dinfo = device_get_ivars(child);
 	cfg = &dinfo->core_id.core_info;
 	
 	switch (index) {
 	case BHND_IVAR_VENDOR:
 		*result = cfg->vendor;
 		return (0);
 	case BHND_IVAR_DEVICE:
 		*result = cfg->device;
 		return (0);
 	case BHND_IVAR_HWREV:
 		*result = cfg->hwrev;
 		return (0);
 	case BHND_IVAR_DEVICE_CLASS:
 		*result = bhnd_core_class(cfg);
 		return (0);
 	case BHND_IVAR_VENDOR_NAME:
 		*result = (uintptr_t) bhnd_vendor_name(cfg->vendor);
 		return (0);
 	case BHND_IVAR_DEVICE_NAME:
 		*result = (uintptr_t) bhnd_core_name(cfg);
 		return (0);
 	case BHND_IVAR_CORE_INDEX:
 		*result = cfg->core_idx;
 		return (0);
 	case BHND_IVAR_CORE_UNIT:
 		*result = cfg->unit;
 		return (0);
 	default:
 		return (ENOENT);
 	}
 }
 
 static int
 siba_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
 {
 	switch (index) {
 	case BHND_IVAR_VENDOR:
 	case BHND_IVAR_DEVICE:
 	case BHND_IVAR_HWREV:
 	case BHND_IVAR_DEVICE_CLASS:
 	case BHND_IVAR_VENDOR_NAME:
 	case BHND_IVAR_DEVICE_NAME:
 	case BHND_IVAR_CORE_INDEX:
 	case BHND_IVAR_CORE_UNIT:
 		return (EINVAL);
 	default:
 		return (ENOENT);
 	}
 }
 
 static struct resource_list *
 siba_get_resource_list(device_t dev, device_t child)
 {
 	struct siba_devinfo *dinfo = device_get_ivars(child);
 	return (&dinfo->resources);
 }
 
 static device_t
 siba_find_hostb_device(device_t dev)
 {
 	struct siba_softc *sc = device_get_softc(dev);
 
 	/* This is set (or not) by the concrete siba driver subclass. */
 	return (sc->hostb_dev);
 }
 
 static int
 siba_reset_core(device_t dev, device_t child, uint16_t flags)
 {
 	struct siba_devinfo *dinfo;
 
 	if (device_get_parent(child) != dev)
 		BHND_BUS_RESET_CORE(device_get_parent(dev), child, flags);
 
 	dinfo = device_get_ivars(child);
 
 	/* Can't reset the core without access to the CFG0 registers */
 	if (dinfo->cfg[0] == NULL)
 		return (ENODEV);
 
 	// TODO - perform reset
 
 	return (ENXIO);
 }
 
 static int
 siba_suspend_core(device_t dev, device_t child)
 {
 	struct siba_devinfo *dinfo;
 
 	if (device_get_parent(child) != dev)
 		BHND_BUS_SUSPEND_CORE(device_get_parent(dev), child);
 
 	dinfo = device_get_ivars(child);
 
 	/* Can't suspend the core without access to the CFG0 registers */
 	if (dinfo->cfg[0] == NULL)
 		return (ENODEV);
 
 	// TODO - perform suspend
 
 	return (ENXIO);
 }
 
 static uint32_t
 siba_read_config(device_t dev, device_t child, bus_size_t offset, u_int width)
 {
 	struct siba_devinfo	*dinfo;
 	rman_res_t		 r_size;
 
 	/* Must be directly attached */
 	if (device_get_parent(child) != dev)
 		return (UINT32_MAX);
 
 	/* CFG0 registers must be available */
 	dinfo = device_get_ivars(child);
 	if (dinfo->cfg[0] == NULL)
 		return (UINT32_MAX);
 
 	/* Offset must fall within CFG0 */
 	r_size = rman_get_size(dinfo->cfg[0]->res);
 	if (r_size < offset || r_size - offset < width)
 		return (UINT32_MAX);
 
 	switch (width) {
 	case 1:
 		return (bhnd_bus_read_1(dinfo->cfg[0], offset));
 	case 2:
 		return (bhnd_bus_read_2(dinfo->cfg[0], offset));
 	case 4:
 		return (bhnd_bus_read_4(dinfo->cfg[0], offset));
 	}
 	
 	/* Unsuported */
 	return (UINT32_MAX);
 }
 
 static void
 siba_write_config(device_t dev, device_t child, bus_size_t offset, uint32_t val,
     u_int width)
 {
 	struct siba_devinfo	*dinfo;
 	rman_res_t		 r_size;
 
 	/* Must be directly attached */
 	if (device_get_parent(child) != dev)
 		return;
 
 	/* CFG0 registers must be available */
 	dinfo = device_get_ivars(child);
 	if (dinfo->cfg[0] == NULL)
 		return;
 
 	/* Offset must fall within CFG0 */
 	r_size = rman_get_size(dinfo->cfg[0]->res);
 	if (r_size < offset || r_size - offset < width)
 		return;
 
 	switch (width) {
 	case 1:
 		bhnd_bus_write_1(dinfo->cfg[0], offset, val);
 	case 2:
 		bhnd_bus_write_2(dinfo->cfg[0], offset, val);
 	case 4:
 		bhnd_bus_write_4(dinfo->cfg[0], offset, val);
 	}
 }
 
 static u_int
 siba_get_port_count(device_t dev, device_t child, bhnd_port_type type)
 {
 	struct siba_devinfo *dinfo;
 
 	/* delegate non-bus-attached devices to our parent */
 	if (device_get_parent(child) != dev)
 		return (BHND_BUS_GET_PORT_COUNT(device_get_parent(dev), child,
 		    type));
 
 	dinfo = device_get_ivars(child);
 	return (siba_addrspace_port_count(dinfo));
 }
 
 static u_int
 siba_get_region_count(device_t dev, device_t child, bhnd_port_type type,
     u_int port)
 {
 	struct siba_devinfo	*dinfo;
 
 	/* delegate non-bus-attached devices to our parent */
 	if (device_get_parent(child) != dev)
 		return (BHND_BUS_GET_REGION_COUNT(device_get_parent(dev), child,
 		    type, port));
 
 	dinfo = device_get_ivars(child);
 	if (!siba_is_port_valid(dinfo, type, port))
 		return (0);
 
 	return (siba_addrspace_region_count(dinfo, port));
 }
 
 static int
 siba_get_port_rid(device_t dev, device_t child, bhnd_port_type port_type,
     u_int port_num, u_int region_num)
 {
 	struct siba_devinfo	*dinfo;
 	struct siba_addrspace	*addrspace;
 
 	/* delegate non-bus-attached devices to our parent */
 	if (device_get_parent(child) != dev)
 		return (BHND_BUS_GET_PORT_RID(device_get_parent(dev), child,
 		    port_type, port_num, region_num));
 
 	dinfo = device_get_ivars(child);
 	addrspace = siba_find_addrspace(dinfo, port_type, port_num, region_num);
 	if (addrspace == NULL)
 		return (-1);
 
 	return (addrspace->sa_rid);
 }
 
 static int
 siba_decode_port_rid(device_t dev, device_t child, int type, int rid,
     bhnd_port_type *port_type, u_int *port_num, u_int *region_num)
 {
 	struct siba_devinfo	*dinfo;
 
 	/* delegate non-bus-attached devices to our parent */
 	if (device_get_parent(child) != dev)
 		return (BHND_BUS_DECODE_PORT_RID(device_get_parent(dev), child,
 		    type, rid, port_type, port_num, region_num));
 
 	dinfo = device_get_ivars(child);
 
 	/* Ports are always memory mapped */
 	if (type != SYS_RES_MEMORY)
 		return (EINVAL);
 
 	for (int i = 0; i < dinfo->core_id.num_addrspace; i++) {
 		if (dinfo->addrspace[i].sa_rid != rid)
 			continue;
 
 		*port_type = BHND_PORT_DEVICE;
 		*port_num = siba_addrspace_port(i);
 		*region_num = siba_addrspace_region(i);
 		return (0);
 	}
 
 	/* Not found */
 	return (ENOENT);
 }
 
 static int
 siba_get_region_addr(device_t dev, device_t child, bhnd_port_type port_type,
     u_int port_num, u_int region_num, bhnd_addr_t *addr, bhnd_size_t *size)
 {
 	struct siba_devinfo	*dinfo;
 	struct siba_addrspace	*addrspace;
 
 	/* delegate non-bus-attached devices to our parent */
 	if (device_get_parent(child) != dev) {
 		return (BHND_BUS_GET_REGION_ADDR(device_get_parent(dev), child,
 		    port_type, port_num, region_num, addr, size));
 	}
 
 	dinfo = device_get_ivars(child);
 	addrspace = siba_find_addrspace(dinfo, port_type, port_num, region_num);
 	if (addrspace == NULL)
 		return (ENOENT);
 
 	*addr = addrspace->sa_base;
 	*size = addrspace->sa_size - addrspace->sa_bus_reserved;
 	return (0);
 }
 
 
 /**
  * Register all address space mappings for @p di.
  *
  * @param dev The siba bus device.
  * @param di The device info instance on which to register all address
  * space entries.
  * @param r A resource mapping the enumeration table block for @p di.
  */
 static int
 siba_register_addrspaces(device_t dev, struct siba_devinfo *di,
     struct resource *r)
 {
 	struct siba_core_id	*cid;
 	uint32_t		 addr;
 	uint32_t		 size;
 	int			 error;
 
 	cid = &di->core_id;
 
 
 	/* Register the device address space entries */
 	for (uint8_t i = 0; i < di->core_id.num_addrspace; i++) {
 		uint32_t	adm;
 		u_int		adm_offset;
 		uint32_t	bus_reserved;
 
 		/* Determine the register offset */
 		adm_offset = siba_admatch_offset(i);
 		if (adm_offset == 0) {
 		    device_printf(dev, "addrspace %hhu is unsupported", i);
 		    return (ENODEV);
 		}
 
 		/* Fetch the address match register value */
 		adm = bus_read_4(r, adm_offset);
 
 		/* Parse the value */
 		if ((error = siba_parse_admatch(adm, &addr, &size))) {
 			device_printf(dev, "failed to decode address "
 			    " match register value 0x%x\n", adm);
 			return (error);
 		}
 
 		/* If this is the device's core/enumeration addrespace,
 		 * reserve the Sonics configuration register blocks for the
 		 * use of our bus. */
 		bus_reserved = 0;
 		if (i == SIBA_CORE_ADDRSPACE)
 			bus_reserved = cid->num_cfg_blocks * SIBA_CFG_SIZE;
 
 		/* Append the region info */
 		error = siba_append_dinfo_region(di, i, addr, size,
 		    bus_reserved);
 		if (error)
 			return (error);
 	}
 
 	return (0);
 }
 
 static struct bhnd_devinfo *
 siba_alloc_bhnd_dinfo(device_t dev)
 {
 	struct siba_devinfo *dinfo = siba_alloc_dinfo(dev);
 	return ((struct bhnd_devinfo *)dinfo);
 }
 
 static void
 siba_free_bhnd_dinfo(device_t dev, struct bhnd_devinfo *dinfo)
 {
 	siba_free_dinfo(dev, (struct siba_devinfo *)dinfo);
 }
 
 
 static int
 siba_get_core_table(device_t dev, device_t child, struct bhnd_core_info **cores,
     u_int *num_cores)
 {
 	const struct bhnd_chipid	*chipid;
 	struct bhnd_core_info		*table;
 	struct bhnd_resource		*r;
 	int				 error;
 	int				 rid;
 
 	/* Fetch the core count from our chip identification */
 	chipid = BHND_BUS_GET_CHIPID(dev, dev);
 
 	/* Allocate our local core table */
 	table = malloc(sizeof(*table) * chipid->ncores, M_BHND, M_NOWAIT);
 	if (table == NULL)
 		return (ENOMEM);
 
 	/* Enumerate all cores. */
 	for (u_int i = 0; i < chipid->ncores; i++) {
 		struct siba_core_id	 cid;
 		uint32_t		 idhigh, idlow;
 
 		/* Map the core's register block */
 		rid = 0;
 		r = bhnd_alloc_resource(dev, SYS_RES_MEMORY, &rid,
 		    SIBA_CORE_ADDR(i), SIBA_CORE_ADDR(i) + SIBA_CORE_SIZE - 1,
 		    SIBA_CORE_SIZE, RF_ACTIVE);
 		if (r == NULL) {
 			error = ENXIO;
 			goto failed;
 		}
 
 		/* Read the core info */
 		idhigh = bhnd_bus_read_4(r, SB0_REG_ABS(SIBA_CFG0_IDHIGH));
 		idlow = bhnd_bus_read_4(r, SB0_REG_ABS(SIBA_CFG0_IDLOW));
 
 		cid = siba_parse_core_id(idhigh, idlow, i, 0);
 		table[i] = cid.core_info;
 
 		/* Determine unit number */
 		for (u_int j = 0; j < i; j++) {
 			if (table[j].vendor == table[i].vendor &&
 			    table[j].device == table[i].device)
 				table[i].unit++;
 		}
 				
 		/* Release our resource */
 		bhnd_release_resource(dev, SYS_RES_MEMORY, rid, r);
 		r = NULL;
 	}
 
 	/* Provide the result values (performed last to avoid modifying
 	 * cores/num_cores if enumeration failed). */
 	*cores = table;
 	*num_cores = chipid->ncores;
 
 	return (0);
 
 failed:
 	if (table != NULL)
 		free(table, M_BHND);
 
 	if (r != NULL)
 		bhnd_release_resource(dev, SYS_RES_MEMORY, rid, r);
 
 	return (error);
 }
 
 /**
  * Scan the core table and add all valid discovered cores to
  * the bus.
  * 
  * @param dev The siba bus device.
  * @param chipid The chip identifier, if the device does not provide a
  * ChipCommon core. Should o NULL otherwise.
  */
 int
 siba_add_children(device_t dev, const struct bhnd_chipid *chipid)
 {
 	struct bhnd_chipid	 ccid;
 	struct bhnd_core_info	*cores;
 	struct siba_devinfo	*dinfo;
 	struct resource		*r;
 	int			 rid;
 	int			 error;
 
 	dinfo = NULL;
 	cores = NULL;
 	r = NULL;
 	
 	/*
 	 * Try to determine the number of device cores via the ChipCommon
 	 * identification registers.
 	 * 
 	 * A small number of very early devices do not include a ChipCommon
 	 * core, in which case our caller must supply the chip identification
 	 * information via a non-NULL chipid parameter.
 	 */
 	if (chipid == NULL) {
 		uint32_t	idhigh, ccreg;
 		uint16_t	vendor, device;
 		uint8_t		ccrev;
 
 		/* Map the first core's register block. If the ChipCommon core
 		 * exists, it will always be the first core. */
 		rid = 0;
 		r = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid,
 		    SIBA_CORE_ADDR(0), SIBA_CORE_SIZE, 
 		    SIBA_CORE_ADDR(0) + SIBA_CORE_SIZE - 1,
 		    RF_ACTIVE);
 
 		/* Identify the core */
 		idhigh = bus_read_4(r, SB0_REG_ABS(SIBA_CFG0_IDHIGH));
 		vendor = SIBA_REG_GET(idhigh, IDH_VENDOR);
 		device = SIBA_REG_GET(idhigh, IDH_DEVICE);
 		ccrev = SIBA_IDH_CORE_REV(idhigh);
 
 		if (vendor != OCP_VENDOR_BCM || device != BHND_COREID_CC) {
 			device_printf(dev,
 			    "cannot identify device: no chipcommon core "
 			    "found\n");
 			error = ENXIO;
 			goto cleanup;
 		}
 
 		/* Identify the chipset */
 		ccreg = bus_read_4(r, CHIPC_ID);
 		ccid = bhnd_parse_chipid(ccreg, SIBA_ENUM_ADDR);
 
-		if (!CHIPC_NCORES_MIN_HWREV(ccrev)) {
-			switch (ccid.chip_id) {
-			case BHND_CHIPID_BCM4306:
-				ccid.ncores = 6;
-				break;
-			case BHND_CHIPID_BCM4704:
-				ccid.ncores = 9;
-				break;
-			case BHND_CHIPID_BCM5365:
-				/*
-				* BCM5365 does support ID_NUMCORE in at least
-				* some of its revisions, but for unknown
-				* reasons, Broadcom's drivers always exclude
-				* the ChipCommon revision (0x5) used by BCM5365
-				* from the set of revisions supporting
-				* ID_NUMCORE, and instead supply a fixed value.
-				* 
-				* Presumably, at least some of these devices
-				* shipped with a broken ID_NUMCORE value.
-				*/
-				ccid.ncores = 7;
-				break;
-			default:
-				device_printf(dev, "unable to determine core "
-				    "count for unrecognized chipset 0x%hx\n",
-				    ccid.chip_id);
-				error = ENXIO;
-				goto cleanup;
-			}
+		/* Fix up the core count */
+		error = bhnd_chipid_fixed_ncores(&ccid, ccrev, &ccid.ncores);
+		if (error) {
+			device_printf(dev, "unable to determine core count for "
+			    "chipset 0x%hx\n", ccid.chip_id);
+			goto cleanup;
 		}
 
 		chipid = &ccid;
 		bus_release_resource(dev, SYS_RES_MEMORY, rid, r);
 	}
 
 	/* Allocate our temporary core table and enumerate all cores */
 	cores = malloc(sizeof(*cores) * chipid->ncores, M_BHND, M_NOWAIT);
 	if (cores == NULL)
 		return (ENOMEM);
 
 	/* Add all cores. */
 	for (u_int i = 0; i < chipid->ncores; i++) {
 		struct siba_core_id	 cid;
 		device_t		 child;
 		uint32_t		 idhigh, idlow;
 		rman_res_t		 r_count, r_end, r_start;
 
 		/* Map the core's register block */
 		rid = 0;
 		r_start = SIBA_CORE_ADDR(i);
 		r_count = SIBA_CORE_SIZE;
 		r_end = r_start + SIBA_CORE_SIZE - 1;
 		r = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, r_start,
 		    r_end, r_count, RF_ACTIVE);
 		if (r == NULL) {
 			error = ENXIO;
 			goto cleanup;
 		}
 
 		/* Add the child device */
 		child = BUS_ADD_CHILD(dev, 0, NULL, -1);
 		if (child == NULL) {
 			error = ENXIO;
 			goto cleanup;
 		}
 		
 		/* Read the core info */
 		idhigh = bus_read_4(r, SB0_REG_ABS(SIBA_CFG0_IDHIGH));
 		idlow = bus_read_4(r, SB0_REG_ABS(SIBA_CFG0_IDLOW));
 
 		cid = siba_parse_core_id(idhigh, idlow, i, 0);
 		cores[i] = cid.core_info;
 
 		/* Determine unit number */
 		for (u_int j = 0; j < i; j++) {
 			if (cores[j].vendor == cores[i].vendor &&
 			    cores[j].device == cores[i].device)
 				cores[i].unit++;
 		}
 
 		/* Initialize per-device bus info */
 		if ((dinfo = device_get_ivars(child)) == NULL) {
 			error = ENXIO;
 			goto cleanup;
 		}
 
 		if ((error = siba_init_dinfo(dev, dinfo, &cid)))
 			goto cleanup;
 
 		/* Register the core's address space(s). */
 		if ((error = siba_register_addrspaces(dev, dinfo, r)))
 			goto cleanup;
 
 		/* If pins are floating or the hardware is otherwise
 		 * unpopulated, the device shouldn't be used. */
 		if (bhnd_is_hw_disabled(child))
 			device_disable(child);
 				
 		/* Release our resource */
 		bus_release_resource(dev, SYS_RES_MEMORY, rid, r);
 		r = NULL;
 
 		/* Issue bus callback for fully initialized child. */
 		BHND_BUS_CHILD_ADDED(dev, child);
 	}
 	
 cleanup:
 	if (cores != NULL)
 		free(cores, M_BHND);
 
 	if (r != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY, rid, r);
 
 	return (error);
 }
 
 static device_method_t siba_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,			siba_probe),
 	DEVMETHOD(device_attach,		siba_attach),
 	DEVMETHOD(device_detach,		siba_detach),
 	DEVMETHOD(device_resume,		siba_resume),
 	DEVMETHOD(device_suspend,		siba_suspend),
 	
 	/* Bus interface */
 	DEVMETHOD(bus_read_ivar,		siba_read_ivar),
 	DEVMETHOD(bus_write_ivar,		siba_write_ivar),
 	DEVMETHOD(bus_get_resource_list,	siba_get_resource_list),
 
 	/* BHND interface */
 	DEVMETHOD(bhnd_bus_find_hostb_device,	siba_find_hostb_device),
 	DEVMETHOD(bhnd_bus_get_core_table,	siba_get_core_table),
 	DEVMETHOD(bhnd_bus_alloc_devinfo,	siba_alloc_bhnd_dinfo),
 	DEVMETHOD(bhnd_bus_free_devinfo,	siba_free_bhnd_dinfo),
 	DEVMETHOD(bhnd_bus_reset_core,		siba_reset_core),
 	DEVMETHOD(bhnd_bus_suspend_core,	siba_suspend_core),
 	DEVMETHOD(bhnd_bus_read_config,		siba_read_config),
 	DEVMETHOD(bhnd_bus_write_config,	siba_write_config),
 	DEVMETHOD(bhnd_bus_get_port_count,	siba_get_port_count),
 	DEVMETHOD(bhnd_bus_get_region_count,	siba_get_region_count),
 	DEVMETHOD(bhnd_bus_get_port_rid,	siba_get_port_rid),
 	DEVMETHOD(bhnd_bus_decode_port_rid,	siba_decode_port_rid),
 	DEVMETHOD(bhnd_bus_get_region_addr,	siba_get_region_addr),
 
 	DEVMETHOD_END
 };
 
 DEFINE_CLASS_1(bhnd, siba_driver, siba_methods, sizeof(struct siba_softc), bhnd_driver);
 
 MODULE_VERSION(siba, 1);
 MODULE_DEPEND(siba, bhnd, 1, 1, 1);
Index: projects/clang390-import/sys/dev/hyperv/netvsc/hv_net_vsc.h
===================================================================
--- projects/clang390-import/sys/dev/hyperv/netvsc/hv_net_vsc.h	(revision 305016)
+++ projects/clang390-import/sys/dev/hyperv/netvsc/hv_net_vsc.h	(revision 305017)
@@ -1,407 +1,411 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * HyperV vmbus (virtual machine bus) network VSC (virtual services client)
  * header file
  *
  * (Updated from unencumbered NvspProtocol.h)
  */
 
 #ifndef __HV_NET_VSC_H__
 #define __HV_NET_VSC_H__
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/taskqueue.h>
 #include <sys/sema.h>
 #include <sys/sx.h>
 
 #include <machine/bus.h>
 #include <sys/bus.h>
 #include <sys/bus_dma.h>
 
 #include <netinet/in.h>
 #include <netinet/tcp_lro.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_media.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/hyperv/include/vmbus.h>
 
+#include <dev/hyperv/netvsc/ndis.h>
+
 #define HN_USE_TXDESC_BUFRING
 
 MALLOC_DECLARE(M_NETVSC);
 
 #define NVSP_INVALID_PROTOCOL_VERSION           (0xFFFFFFFF)
 
 #define NVSP_PROTOCOL_VERSION_1                 2
 #define NVSP_PROTOCOL_VERSION_2                 0x30002
 #define NVSP_PROTOCOL_VERSION_4                 0x40000
 #define NVSP_PROTOCOL_VERSION_5                 0x50000
 #define NVSP_MIN_PROTOCOL_VERSION               (NVSP_PROTOCOL_VERSION_1)
 #define NVSP_MAX_PROTOCOL_VERSION               (NVSP_PROTOCOL_VERSION_2)
 
 #define NVSP_PROTOCOL_VERSION_CURRENT           NVSP_PROTOCOL_VERSION_2
 
 #define VERSION_4_OFFLOAD_SIZE                  22
 
 #define NVSP_OPERATIONAL_STATUS_OK              (0x00000000)
 #define NVSP_OPERATIONAL_STATUS_DEGRADED        (0x00000001)
 #define NVSP_OPERATIONAL_STATUS_NONRECOVERABLE  (0x00000002)
 #define NVSP_OPERATIONAL_STATUS_NO_CONTACT      (0x00000003)
 #define NVSP_OPERATIONAL_STATUS_LOST_COMMUNICATION (0x00000004)
 
 /*
  * Maximun number of transfer pages (packets) the VSP will use on a receive
  */
 #define NVSP_MAX_PACKETS_PER_RECEIVE            375
 
 /* vRSS stuff */
 #define RNDIS_OBJECT_TYPE_RSS_CAPABILITIES      0x88
 #define RNDIS_OBJECT_TYPE_RSS_PARAMETERS        0x89
 
 #define RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2     2
 #define RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2       2
 
 struct rndis_obj_header {
         uint8_t type;
         uint8_t rev;
         uint16_t size;
 } __packed;
 
 /* rndis_recv_scale_cap/cap_flag */
 #define RNDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS      0x01000000
 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_ISR            0x02000000
 #define RNDIS_RSS_CAPS_CLASSIFICATION_AT_DPC            0x04000000
 #define RNDIS_RSS_CAPS_USING_MSI_X                      0x08000000
 #define RNDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS           0x10000000
 #define RNDIS_RSS_CAPS_SUPPORTS_MSI_X                   0x20000000
 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4               0x00000100
 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6               0x00000200
 #define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX            0x00000400
 
 /* RNDIS_RECEIVE_SCALE_CAPABILITIES */
 struct rndis_recv_scale_cap {
         struct rndis_obj_header hdr;
         uint32_t cap_flag;
         uint32_t num_int_msg;
         uint32_t num_recv_que;
         uint16_t num_indirect_tabent;
 } __packed;
 
 /* rndis_recv_scale_param flags */
 #define RNDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED         0x0001
 #define RNDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED        0x0002
 #define RNDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED           0x0004
 #define RNDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED         0x0008
 #define RNDIS_RSS_PARAM_FLAG_DISABLE_RSS                0x0010
 
 /* Hash info bits */
 #define RNDIS_HASH_FUNC_TOEPLITZ                0x00000001
 #define RNDIS_HASH_IPV4                         0x00000100
 #define RNDIS_HASH_TCP_IPV4                     0x00000200
 #define RNDIS_HASH_IPV6                         0x00000400
 #define RNDIS_HASH_IPV6_EX                      0x00000800
 #define RNDIS_HASH_TCP_IPV6                     0x00001000
 #define RNDIS_HASH_TCP_IPV6_EX                  0x00002000
 
 #define RNDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
 #define RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2   40
 
 #define ITAB_NUM                                        128
 #define HASH_KEYLEN RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
 
 /* RNDIS_RECEIVE_SCALE_PARAMETERS */
 typedef struct rndis_recv_scale_param_ {
         struct rndis_obj_header hdr;
 
         /* Qualifies the rest of the information */
         uint16_t flag;
 
         /* The base CPU number to do receive processing. not used */
         uint16_t base_cpu_number;
 
         /* This describes the hash function and type being enabled */
         uint32_t hashinfo;
 
         /* The size of indirection table array */
         uint16_t indirect_tabsize;
 
         /* The offset of the indirection table from the beginning of this
          * structure
          */
         uint32_t indirect_taboffset;
 
         /* The size of the hash secret key */
         uint16_t hashkey_size;
 
         /* The offset of the secret key from the beginning of this structure */
         uint32_t hashkey_offset;
 
         uint32_t processor_masks_offset;
         uint32_t num_processor_masks;
         uint32_t processor_masks_entry_size;
 } rndis_recv_scale_param;
 
 /*
  * The following arguably belongs in a separate header file
  */
 
 /*
  * Defines
  */
 
 #define NETVSC_SEND_BUFFER_SIZE			(1024*1024*15)   /* 15M */
 #define NETVSC_SEND_BUFFER_ID			0xface
 
 #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY	(1024*1024*15) /* 15MB */
 #define NETVSC_RECEIVE_BUFFER_SIZE		(1024*1024*16) /* 16MB */
 
 #define NETVSC_RECEIVE_BUFFER_ID		0xcafe
 
 #define NETVSC_RECEIVE_SG_COUNT			1
 
 /* Preallocated receive packets */
 #define NETVSC_RECEIVE_PACKETLIST_COUNT		256
 
 /*
  * Maximum MTU we permit to be configured for a netvsc interface.
  * When the code was developed, a max MTU of 12232 was tested and
  * proven to work.  9K is a reasonable maximum for an Ethernet.
  */
 #define NETVSC_MAX_CONFIGURABLE_MTU		(9 * 1024)
 
 #define NETVSC_PACKET_SIZE			PAGE_SIZE
 #define VRSS_SEND_TABLE_SIZE			16
 
 /*
  * Data types
  */
 
 struct vmbus_channel;
 
 typedef void (*pfn_on_send_rx_completion)(struct vmbus_channel *, void *);
 
 #define NETVSC_DEVICE_RING_BUFFER_SIZE	(128 * PAGE_SIZE)
 #define NETVSC_PACKET_MAXPAGE		32
 
 #define NETVSC_VLAN_PRIO_MASK		0xe000
 #define NETVSC_VLAN_PRIO_SHIFT		13
 #define NETVSC_VLAN_VID_MASK		0x0fff
 
 #define TYPE_IPV4			2
 #define TYPE_IPV6			4
 #define TYPE_TCP			2
 #define TYPE_UDP			4
 
 #define TRANSPORT_TYPE_NOT_IP		0
 #define TRANSPORT_TYPE_IPV4_TCP		((TYPE_IPV4 << 16) | TYPE_TCP)
 #define TRANSPORT_TYPE_IPV4_UDP		((TYPE_IPV4 << 16) | TYPE_UDP)
 #define TRANSPORT_TYPE_IPV6_TCP		((TYPE_IPV6 << 16) | TYPE_TCP)
 #define TRANSPORT_TYPE_IPV6_UDP		((TYPE_IPV6 << 16) | TYPE_UDP)
 
 typedef struct {
 	uint8_t		mac_addr[6];  /* Assumption unsigned long */
 	uint8_t		link_state;
 } netvsc_device_info;
 
 #define HN_XACT_REQ_PGCNT		2
 #define HN_XACT_RESP_PGCNT		2
 #define HN_XACT_REQ_SIZE		(HN_XACT_REQ_PGCNT * PAGE_SIZE)
 #define HN_XACT_RESP_SIZE		(HN_XACT_RESP_PGCNT * PAGE_SIZE)
 
 #ifndef HN_USE_TXDESC_BUFRING
 struct hn_txdesc;
 SLIST_HEAD(hn_txdesc_list, hn_txdesc);
 #else
 struct buf_ring;
 #endif
 
 struct hn_tx_ring;
 
 struct hn_rx_ring {
 	struct ifnet	*hn_ifp;
 	struct hn_tx_ring *hn_txr;
 	void		*hn_rdbuf;
 	uint8_t		*hn_rxbuf;	/* shadow sc->hn_rxbuf */
 	int		hn_rx_idx;
 
 	/* Trust csum verification on host side */
 	int		hn_trust_hcsum;	/* HN_TRUST_HCSUM_ */
 	struct lro_ctrl	hn_lro;
 
 	u_long		hn_csum_ip;
 	u_long		hn_csum_tcp;
 	u_long		hn_csum_udp;
 	u_long		hn_csum_trusted;
 	u_long		hn_lro_tried;
 	u_long		hn_small_pkts;
 	u_long		hn_pkts;
 	u_long		hn_rss_pkts;
 
 	/* Rarely used stuffs */
 	struct sysctl_oid *hn_rx_sysctl_tree;
 	int		hn_rx_flags;
 } __aligned(CACHE_LINE_SIZE);
 
 #define HN_TRUST_HCSUM_IP	0x0001
 #define HN_TRUST_HCSUM_TCP	0x0002
 #define HN_TRUST_HCSUM_UDP	0x0004
 
 #define HN_RX_FLAG_ATTACHED	0x1
 
 struct hn_tx_ring {
 #ifndef HN_USE_TXDESC_BUFRING
 	struct mtx	hn_txlist_spin;
 	struct hn_txdesc_list hn_txlist;
 #else
 	struct buf_ring	*hn_txdesc_br;
 #endif
 	int		hn_txdesc_cnt;
 	int		hn_txdesc_avail;
 	u_short		hn_has_txeof;
 	u_short		hn_txdone_cnt;
 
 	int		hn_sched_tx;
 	void		(*hn_txeof)(struct hn_tx_ring *);
 	struct taskqueue *hn_tx_taskq;
 	struct task	hn_tx_task;
 	struct task	hn_txeof_task;
 
 	struct buf_ring	*hn_mbuf_br;
 	int		hn_oactive;
 	int		hn_tx_idx;
 
 	struct mtx	hn_tx_lock;
 	struct hn_softc	*hn_sc;
 	struct vmbus_channel *hn_chan;
 
 	int		hn_direct_tx_size;
 	int		hn_chim_size;
 	bus_dma_tag_t	hn_tx_data_dtag;
 	uint64_t	hn_csum_assist;
 
 	int		hn_gpa_cnt;
 	struct vmbus_gpa hn_gpa[NETVSC_PACKET_MAXPAGE];
 
 	u_long		hn_no_txdescs;
 	u_long		hn_send_failed;
 	u_long		hn_txdma_failed;
 	u_long		hn_tx_collapsed;
 	u_long		hn_tx_chimney_tried;
 	u_long		hn_tx_chimney;
 	u_long		hn_pkts;
 
 	/* Rarely used stuffs */
 	struct hn_txdesc *hn_txdesc;
 	bus_dma_tag_t	hn_tx_rndis_dtag;
 	struct sysctl_oid *hn_tx_sysctl_tree;
 	int		hn_tx_flags;
 } __aligned(CACHE_LINE_SIZE);
 
 #define HN_TX_FLAG_ATTACHED	0x1
 
 /*
  * Device-specific softc structure
  */
 typedef struct hn_softc {
 	struct ifnet    *hn_ifp;
 	struct ifmedia	hn_media;
 	device_t        hn_dev;
 	uint8_t         hn_unit;
 	int             hn_carrier;
 	int             hn_if_flags;
 	struct mtx      hn_lock;
 	int             hn_initdone;
 	/* See hv_netvsc_drv_freebsd.c for rules on how to use */
 	int             temp_unusable;
 	struct rndis_device_ *rndis_dev;
 	struct vmbus_channel *hn_prichan;
 
 	int		hn_rx_ring_cnt;
 	int		hn_rx_ring_inuse;
 	struct hn_rx_ring *hn_rx_ring;
 
 	int		hn_tx_ring_cnt;
 	int		hn_tx_ring_inuse;
 	struct hn_tx_ring *hn_tx_ring;
 
 	uint8_t		*hn_chim;
 	u_long		*hn_chim_bmap;
 	int		hn_chim_bmap_cnt;
 	int		hn_chim_cnt;
 	int		hn_chim_szmax;
 
 	int		hn_cpu;
 	struct taskqueue *hn_tx_taskq;
 	struct sysctl_oid *hn_tx_sysctl_tree;
 	struct sysctl_oid *hn_rx_sysctl_tree;
 	struct vmbus_xact_ctx *hn_xact;
 	uint32_t	hn_nvs_ver;
 
 	uint32_t		hn_flags;
 	void			*hn_rxbuf;
 	uint32_t		hn_rxbuf_gpadl;
 	struct hyperv_dma	hn_rxbuf_dma;
 
 	uint32_t		hn_chim_gpadl;
 	struct hyperv_dma	hn_chim_dma;
 
 	uint32_t		hn_rndis_rid;
 	uint32_t		hn_ndis_ver;
+
+	struct ndis_rssprm_toeplitz hn_rss;
 } hn_softc_t;
 
 #define HN_FLAG_RXBUF_CONNECTED		0x0001
 #define HN_FLAG_CHIM_CONNECTED		0x0002
 
 /*
  * Externs
  */
 extern int hv_promisc_mode;
 struct hn_send_ctx;
 
 void netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status);
 int hv_nv_on_device_add(struct hn_softc *sc, struct hn_rx_ring *rxr);
 int hv_nv_on_device_remove(struct hn_softc *sc,
     boolean_t destroy_channel);
 int hv_nv_on_send(struct vmbus_channel *chan, uint32_t rndis_mtype,
 	struct hn_send_ctx *sndc, struct vmbus_gpa *gpa, int gpa_cnt);
 void hv_nv_subchan_attach(struct vmbus_channel *chan,
     struct hn_rx_ring *rxr);
 
 #endif  /* __HV_NET_VSC_H__ */
 
Index: projects/clang390-import/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
===================================================================
--- projects/clang390-import/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c	(revision 305016)
+++ projects/clang390-import/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c	(revision 305017)
@@ -1,3096 +1,3098 @@
 /*-
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 2004-2006 Kip Macy
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/buf_ring.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/ethernet.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 
 #include <net/bpf.h>
 
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/if_ether.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 #include <netinet/ip6.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <machine/frame.h>
 
 #include <sys/bus.h>
 #include <sys/rman.h>
 #include <sys/mutex.h>
 #include <sys/errno.h>
 #include <sys/types.h>
 #include <machine/atomic.h>
 
 #include <machine/intr_machdep.h>
 
 #include <machine/in_cksum.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/hyperv_busdma.h>
 #include <dev/hyperv/include/vmbus_xact.h>
 
-#include "hv_net_vsc.h"
-#include "hv_rndis.h"
-#include "hv_rndis_filter.h"
+#include <dev/hyperv/netvsc/hv_net_vsc.h>
+#include <dev/hyperv/netvsc/hv_rndis.h>
+#include <dev/hyperv/netvsc/hv_rndis_filter.h>
+#include <dev/hyperv/netvsc/ndis.h>
+
 #include "vmbus_if.h"
 
 /* Short for Hyper-V network interface */
 #define NETVSC_DEVNAME    "hn"
 
 /*
  * It looks like offset 0 of buf is reserved to hold the softc pointer.
  * The sc pointer evidently not needed, and is not presently populated.
  * The packet offset is where the netvsc_packet starts in the buffer.
  */
 #define HV_NV_SC_PTR_OFFSET_IN_BUF         0
 #define HV_NV_PACKET_OFFSET_IN_BUF         16
 
 /* YYY should get it from the underlying channel */
 #define HN_TX_DESC_CNT			512
 
 #define HN_LROENT_CNT_DEF		128
 
 #define HN_RING_CNT_DEF_MAX		8
 
 #define HN_RNDIS_MSG_LEN		\
     (sizeof(rndis_msg) +		\
      RNDIS_HASHVAL_PPI_SIZE +		\
      RNDIS_VLAN_PPI_SIZE +		\
      RNDIS_TSO_PPI_SIZE +		\
      RNDIS_CSUM_PPI_SIZE)
 #define HN_RNDIS_MSG_BOUNDARY		PAGE_SIZE
 #define HN_RNDIS_MSG_ALIGN		CACHE_LINE_SIZE
 
 #define HN_TX_DATA_BOUNDARY		PAGE_SIZE
 #define HN_TX_DATA_MAXSIZE		IP_MAXPACKET
 #define HN_TX_DATA_SEGSIZE		PAGE_SIZE
 #define HN_TX_DATA_SEGCNT_MAX		\
     (NETVSC_PACKET_MAXPAGE - HV_RF_NUM_TX_RESERVED_PAGE_BUFS)
 
 #define HN_DIRECT_TX_SIZE_DEF		128
 
 #define HN_EARLY_TXEOF_THRESH		8
 
 struct hn_txdesc {
 #ifndef HN_USE_TXDESC_BUFRING
 	SLIST_ENTRY(hn_txdesc) link;
 #endif
 	struct mbuf	*m;
 	struct hn_tx_ring *txr;
 	int		refs;
 	uint32_t	flags;		/* HN_TXD_FLAG_ */
 	struct hn_send_ctx send_ctx;
 
 	bus_dmamap_t	data_dmap;
 
 	bus_addr_t	rndis_msg_paddr;
 	rndis_msg	*rndis_msg;
 	bus_dmamap_t	rndis_msg_dmap;
 };
 
 #define HN_TXD_FLAG_ONLIST	0x1
 #define HN_TXD_FLAG_DMAMAP	0x2
 
 /*
  * Only enable UDP checksum offloading when it is on 2012R2 or
  * later.  UDP checksum offloading doesn't work on earlier
  * Windows releases.
  */
 #define HN_CSUM_ASSIST_WIN8	(CSUM_IP | CSUM_TCP)
 #define HN_CSUM_ASSIST		(CSUM_IP | CSUM_UDP | CSUM_TCP)
 
 #define HN_LRO_LENLIM_MULTIRX_DEF	(12 * ETHERMTU)
 #define HN_LRO_LENLIM_DEF		(25 * ETHERMTU)
 /* YYY 2*MTU is a bit rough, but should be good enough. */
 #define HN_LRO_LENLIM_MIN(ifp)		(2 * (ifp)->if_mtu)
 
 #define HN_LRO_ACKCNT_DEF		1
 
 /*
  * Be aware that this sleepable mutex will exhibit WITNESS errors when
  * certain TCP and ARP code paths are taken.  This appears to be a
  * well-known condition, as all other drivers checked use a sleeping
  * mutex to protect their transmit paths.
  * Also Be aware that mutexes do not play well with semaphores, and there
  * is a conflicting semaphore in a certain channel code path.
  */
 #define NV_LOCK_INIT(_sc, _name) \
 	    mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF)
 #define NV_LOCK(_sc)		mtx_lock(&(_sc)->hn_lock)
 #define NV_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->hn_lock, MA_OWNED)
 #define NV_UNLOCK(_sc)		mtx_unlock(&(_sc)->hn_lock)
 #define NV_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->hn_lock)
 
 
 /*
  * Globals
  */
 
 int hv_promisc_mode = 0;    /* normal mode by default */
 
 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
     "Hyper-V network interface");
 
 /* Trust tcp segements verification on host side. */
 static int hn_trust_hosttcp = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
     &hn_trust_hosttcp, 0,
     "Trust tcp segement verification on host side, "
     "when csum info is missing (global setting)");
 
 /* Trust udp datagrams verification on host side. */
 static int hn_trust_hostudp = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
     &hn_trust_hostudp, 0,
     "Trust udp datagram verification on host side, "
     "when csum info is missing (global setting)");
 
 /* Trust ip packets verification on host side. */
 static int hn_trust_hostip = 1;
 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
     &hn_trust_hostip, 0,
     "Trust ip packet verification on host side, "
     "when csum info is missing (global setting)");
 
 #if __FreeBSD_version >= 1100045
 /* Limit TSO burst size */
 static int hn_tso_maxlen = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
     &hn_tso_maxlen, 0, "TSO burst limit");
 #endif
 
 /* Limit chimney send size */
 static int hn_tx_chimney_size = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
     &hn_tx_chimney_size, 0, "Chimney send packet size limit");
 
 /* Limit the size of packet for direct transmission */
 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
     &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
 
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
     &hn_lro_entry_count, 0, "LRO entry count");
 #endif
 #endif
 
 static int hn_share_tx_taskq = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN,
     &hn_share_tx_taskq, 0, "Enable shared TX taskqueue");
 
 static struct taskqueue	*hn_tx_taskq;
 
 #ifndef HN_USE_TXDESC_BUFRING
 static int hn_use_txdesc_bufring = 0;
 #else
 static int hn_use_txdesc_bufring = 1;
 #endif
 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
     &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
 
 static int hn_bind_tx_taskq = -1;
 SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN,
     &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu");
 
 static int hn_use_if_start = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
     &hn_use_if_start, 0, "Use if_start TX method");
 
 static int hn_chan_cnt = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
     &hn_chan_cnt, 0,
     "# of channels to use; each channel has one RX ring and one TX ring");
 
 static int hn_tx_ring_cnt = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
     &hn_tx_ring_cnt, 0, "# of TX rings to use");
 
 static int hn_tx_swq_depth = 0;
 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
     &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
 
 #if __FreeBSD_version >= 1100095
 static u_int hn_lro_mbufq_depth = 0;
 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
     &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
 #endif
 
 static u_int hn_cpu_index;
 
 /*
  * Forward declarations
  */
 static void hn_stop(hn_softc_t *sc);
 static void hn_ifinit_locked(hn_softc_t *sc);
 static void hn_ifinit(void *xsc);
 static int  hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
 static int hn_start_locked(struct hn_tx_ring *txr, int len);
 static void hn_start(struct ifnet *ifp);
 static void hn_start_txeof(struct hn_tx_ring *);
 static int hn_ifmedia_upd(struct ifnet *ifp);
 static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
 #if __FreeBSD_version >= 1100099
 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
 #endif
 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_check_iplen(const struct mbuf *, int);
 static int hn_create_tx_ring(struct hn_softc *, int);
 static void hn_destroy_tx_ring(struct hn_tx_ring *);
 static int hn_create_tx_data(struct hn_softc *, int);
 static void hn_destroy_tx_data(struct hn_softc *);
 static void hn_start_taskfunc(void *, int);
 static void hn_start_txeof_taskfunc(void *, int);
 static void hn_stop_tx_tasks(struct hn_softc *);
 static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **);
 static int hn_create_rx_data(struct hn_softc *sc, int);
 static void hn_destroy_rx_data(struct hn_softc *sc);
 static void hn_set_chim_size(struct hn_softc *, int);
 static void hn_channel_attach(struct hn_softc *, struct vmbus_channel *);
 static void hn_subchan_attach(struct hn_softc *, struct vmbus_channel *);
 static void hn_subchan_setup(struct hn_softc *);
 
 static int hn_transmit(struct ifnet *, struct mbuf *);
 static void hn_xmit_qflush(struct ifnet *);
 static int hn_xmit(struct hn_tx_ring *, int);
 static void hn_xmit_txeof(struct hn_tx_ring *);
 static void hn_xmit_taskfunc(void *, int);
 static void hn_xmit_txeof_taskfunc(void *, int);
 
 #if __FreeBSD_version >= 1100099
 static void
 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
 {
 	int i;
 
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
 		sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
 }
 #endif
 
 static int
 hn_get_txswq_depth(const struct hn_tx_ring *txr)
 {
 
 	KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
 	if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
 		return txr->hn_txdesc_cnt;
 	return hn_tx_swq_depth;
 }
 
 static int
 hn_ifmedia_upd(struct ifnet *ifp __unused)
 {
 
 	return EOPNOTSUPP;
 }
 
 static void
 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
 {
 	struct hn_softc *sc = ifp->if_softc;
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (!sc->hn_carrier) {
 		ifmr->ifm_active |= IFM_NONE;
 		return;
 	}
 	ifmr->ifm_status |= IFM_ACTIVE;
 	ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
 }
 
 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
 static const struct hyperv_guid g_net_vsc_device_type = {
 	.hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
 		0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
 };
 
 /*
  * Standard probe entry point.
  *
  */
 static int
 netvsc_probe(device_t dev)
 {
 	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
 	    &g_net_vsc_device_type) == 0) {
 		device_set_desc(dev, "Hyper-V Network Interface");
 		return BUS_PROBE_DEFAULT;
 	}
 	return ENXIO;
 }
 
 /*
  * Standard attach entry point.
  *
  * Called when the driver is loaded.  It allocates needed resources,
  * and initializes the "hardware" and software.
  */
 static int
 netvsc_attach(device_t dev)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	netvsc_device_info device_info;
 	hn_softc_t *sc;
 	int unit = device_get_unit(dev);
 	struct ifnet *ifp = NULL;
 	int error, ring_cnt, tx_ring_cnt;
 #if __FreeBSD_version >= 1100045
 	int tso_maxlen;
 #endif
 
 	sc = device_get_softc(dev);
 
 	sc->hn_unit = unit;
 	sc->hn_dev = dev;
 	sc->hn_prichan = vmbus_get_channel(dev);
 
 	if (hn_tx_taskq == NULL) {
 		sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
 		    taskqueue_thread_enqueue, &sc->hn_tx_taskq);
 		if (hn_bind_tx_taskq >= 0) {
 			int cpu = hn_bind_tx_taskq;
 			cpuset_t cpu_set;
 
 			if (cpu > mp_ncpus - 1)
 				cpu = mp_ncpus - 1;
 			CPU_SETOF(cpu, &cpu_set);
 			taskqueue_start_threads_cpuset(&sc->hn_tx_taskq, 1,
 			    PI_NET, &cpu_set, "%s tx",
 			    device_get_nameunit(dev));
 		} else {
 			taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET,
 			    "%s tx", device_get_nameunit(dev));
 		}
 	} else {
 		sc->hn_tx_taskq = hn_tx_taskq;
 	}
 	NV_LOCK_INIT(sc, "NetVSCLock");
 
 	ifp = sc->hn_ifp = if_alloc(IFT_ETHER);
 	ifp->if_softc = sc;
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 
 	/*
 	 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
 	 * to use (tx_ring_cnt).
 	 *
 	 * NOTE:
 	 * The # of RX rings to use is same as the # of channels to use.
 	 */
 	ring_cnt = hn_chan_cnt;
 	if (ring_cnt <= 0) {
 		/* Default */
 		ring_cnt = mp_ncpus;
 		if (ring_cnt > HN_RING_CNT_DEF_MAX)
 			ring_cnt = HN_RING_CNT_DEF_MAX;
 	} else if (ring_cnt > mp_ncpus) {
 		ring_cnt = mp_ncpus;
 	}
 
 	tx_ring_cnt = hn_tx_ring_cnt;
 	if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
 		tx_ring_cnt = ring_cnt;
 	if (hn_use_if_start) {
 		/* ifnet.if_start only needs one TX ring. */
 		tx_ring_cnt = 1;
 	}
 
 	/*
 	 * Set the leader CPU for channels.
 	 */
 	sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
 
 	error = hn_create_tx_data(sc, tx_ring_cnt);
 	if (error)
 		goto failed;
 	error = hn_create_rx_data(sc, ring_cnt);
 	if (error)
 		goto failed;
 
 	/*
 	 * Associate the first TX/RX ring w/ the primary channel.
 	 */
 	hn_channel_attach(sc, sc->hn_prichan);
 
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = hn_ioctl;
 	ifp->if_init = hn_ifinit;
 	/* needed by hv_rf_on_device_add() code */
 	ifp->if_mtu = ETHERMTU;
 	if (hn_use_if_start) {
 		int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
 
 		ifp->if_start = hn_start;
 		IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
 		ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
 		IFQ_SET_READY(&ifp->if_snd);
 	} else {
 		ifp->if_transmit = hn_transmit;
 		ifp->if_qflush = hn_xmit_qflush;
 	}
 
 	ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
 	ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
 	/* XXX ifmedia_set really should do this for us */
 	sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
 
 	/*
 	 * Tell upper layers that we support full VLAN capability.
 	 */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 	ifp->if_capabilities |=
 	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
 	    IFCAP_LRO;
 	ifp->if_capenable |=
 	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
 	    IFCAP_LRO;
 	ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO;
 
 	sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
 	    HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
 	if (sc->hn_xact == NULL)
 		goto failed;
 
 	error = hv_rf_on_device_add(sc, &device_info, &ring_cnt,
 	    &sc->hn_rx_ring[0]);
 	if (error)
 		goto failed;
 	KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_inuse,
 	    ("invalid channel count %d, should be less than %d",
 	     ring_cnt, sc->hn_rx_ring_inuse));
 
 	/*
 	 * Set the # of TX/RX rings that could be used according to
 	 * the # of channels that host offered.
 	 */
 	if (sc->hn_tx_ring_inuse > ring_cnt)
 		sc->hn_tx_ring_inuse = ring_cnt;
 	sc->hn_rx_ring_inuse = ring_cnt;
 	device_printf(dev, "%d TX ring, %d RX ring\n",
 	    sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
 
 	if (sc->hn_rx_ring_inuse > 1)
 		hn_subchan_setup(sc);
 
 #if __FreeBSD_version >= 1100099
 	if (sc->hn_rx_ring_inuse > 1) {
 		/*
 		 * Reduce TCP segment aggregation limit for multiple
 		 * RX rings to increase ACK timeliness.
 		 */
 		hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
 	}
 #endif
 
-	if (device_info.link_state == 0) {
+	if (device_info.link_state == NDIS_MEDIA_STATE_CONNECTED) {
 		sc->hn_carrier = 1;
 	}
 
 #if __FreeBSD_version >= 1100045
 	tso_maxlen = hn_tso_maxlen;
 	if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET)
 		tso_maxlen = IP_MAXPACKET;
 
 	ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
 	ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
 	ifp->if_hw_tsomax = tso_maxlen -
 	    (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
 #endif
 
 	ether_ifattach(ifp, device_info.mac_addr);
 
 #if __FreeBSD_version >= 1100045
 	if_printf(ifp, "TSO: %u/%u/%u\n", ifp->if_hw_tsomax,
 	    ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
 #endif
 
 	hn_set_chim_size(sc, sc->hn_chim_szmax);
 	if (hn_tx_chimney_size > 0 &&
 	    hn_tx_chimney_size < sc->hn_chim_szmax)
 		hn_set_chim_size(sc, hn_tx_chimney_size);
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
 	    &sc->hn_nvs_ver, 0, "NVS version");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
 	    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
 	    hn_ndis_version_sysctl, "A", "NDIS version");
 
 	return (0);
 failed:
 	hn_destroy_tx_data(sc);
 	if (ifp != NULL)
 		if_free(ifp);
 	return (error);
 }
 
 /*
  * Standard detach entry point
  */
 static int
 netvsc_detach(device_t dev)
 {
 	struct hn_softc *sc = device_get_softc(dev);
 
 	if (bootverbose)
 		printf("netvsc_detach\n");
 
 	/*
 	 * XXXKYS:  Need to clean up all our
 	 * driver state; this is the driver
 	 * unloading.
 	 */
 
 	/*
 	 * XXXKYS:  Need to stop outgoing traffic and unregister
 	 * the netdevice.
 	 */
 
 	hv_rf_on_device_remove(sc, HV_RF_NV_DESTROY_CHANNEL);
 
 	hn_stop_tx_tasks(sc);
 
 	ifmedia_removeall(&sc->hn_media);
 	hn_destroy_rx_data(sc);
 	hn_destroy_tx_data(sc);
 
 	if (sc->hn_tx_taskq != hn_tx_taskq)
 		taskqueue_free(sc->hn_tx_taskq);
 
 	vmbus_xact_ctx_destroy(sc->hn_xact);
 	return (0);
 }
 
 /*
  * Standard shutdown entry point
  */
 static int
 netvsc_shutdown(device_t dev)
 {
 	return (0);
 }
 
 static __inline int
 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
     struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
 {
 	struct mbuf *m = *m_head;
 	int error;
 
 	error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
 	    m, segs, nsegs, BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		struct mbuf *m_new;
 
 		m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
 		if (m_new == NULL)
 			return ENOBUFS;
 		else
 			*m_head = m = m_new;
 		txr->hn_tx_collapsed++;
 
 		error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
 		    txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
 	}
 	if (!error) {
 		bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
 		    BUS_DMASYNC_PREWRITE);
 		txd->flags |= HN_TXD_FLAG_DMAMAP;
 	}
 	return error;
 }
 
 static __inline void
 hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	if (txd->flags & HN_TXD_FLAG_DMAMAP) {
 		bus_dmamap_sync(txr->hn_tx_data_dtag,
 		    txd->data_dmap, BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(txr->hn_tx_data_dtag,
 		    txd->data_dmap);
 		txd->flags &= ~HN_TXD_FLAG_DMAMAP;
 	}
 }
 
 static __inline int
 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
 	    ("put an onlist txd %#x", txd->flags));
 
 	KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
 	if (atomic_fetchadd_int(&txd->refs, -1) != 1)
 		return 0;
 
 	hn_txdesc_dmamap_unload(txr, txd);
 	if (txd->m != NULL) {
 		m_freem(txd->m);
 		txd->m = NULL;
 	}
 
 	txd->flags |= HN_TXD_FLAG_ONLIST;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	KASSERT(txr->hn_txdesc_avail >= 0 &&
 	    txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
 	    ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
 	txr->hn_txdesc_avail++;
 	SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else
 	atomic_add_int(&txr->hn_txdesc_avail, 1);
 	buf_ring_enqueue(txr->hn_txdesc_br, txd);
 #endif
 
 	return 1;
 }
 
 static __inline struct hn_txdesc *
 hn_txdesc_get(struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_lock_spin(&txr->hn_txlist_spin);
 	txd = SLIST_FIRST(&txr->hn_txlist);
 	if (txd != NULL) {
 		KASSERT(txr->hn_txdesc_avail > 0,
 		    ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
 		txr->hn_txdesc_avail--;
 		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
 	}
 	mtx_unlock_spin(&txr->hn_txlist_spin);
 #else
 	txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
 #endif
 
 	if (txd != NULL) {
 #ifdef HN_USE_TXDESC_BUFRING
 		atomic_subtract_int(&txr->hn_txdesc_avail, 1);
 #endif
 		KASSERT(txd->m == NULL && txd->refs == 0 &&
 		    (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd"));
 		txd->flags &= ~HN_TXD_FLAG_ONLIST;
 		txd->refs = 1;
 	}
 	return txd;
 }
 
 static __inline void
 hn_txdesc_hold(struct hn_txdesc *txd)
 {
 
 	/* 0->1 transition will never work */
 	KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs));
 	atomic_add_int(&txd->refs, 1);
 }
 
 static __inline void
 hn_txeof(struct hn_tx_ring *txr)
 {
 	txr->hn_has_txeof = 0;
 	txr->hn_txeof(txr);
 }
 
 static void
 hn_tx_done(struct hn_send_ctx *sndc, struct hn_softc *sc,
     struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
 {
 	struct hn_txdesc *txd = sndc->hn_cbarg;
 	struct hn_tx_ring *txr;
 
 	if (sndc->hn_chim_idx != HN_NVS_CHIM_IDX_INVALID)
 		hn_chim_free(sc, sndc->hn_chim_idx);
 
 	txr = txd->txr;
 	KASSERT(txr->hn_chan == chan,
 	    ("channel mismatch, on chan%u, should be chan%u",
 	     vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan)));
 
 	txr->hn_has_txeof = 1;
 	hn_txdesc_put(txr, txd);
 
 	++txr->hn_txdone_cnt;
 	if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
 		txr->hn_txdone_cnt = 0;
 		if (txr->hn_oactive)
 			hn_txeof(txr);
 	}
 }
 
 void
 netvsc_channel_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
 {
 #if defined(INET) || defined(INET6)
 	tcp_lro_flush_all(&rxr->hn_lro);
 #endif
 
 	/*
 	 * NOTE:
 	 * 'txr' could be NULL, if multiple channels and
 	 * ifnet.if_start method are enabled.
 	 */
 	if (txr == NULL || !txr->hn_has_txeof)
 		return;
 
 	txr->hn_txdone_cnt = 0;
 	hn_txeof(txr);
 }
 
 /*
  * NOTE:
  * If this function fails, then both txd and m_head0 will be freed.
  */
 static int
 hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
 {
 	bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
 	int error, nsegs, i;
 	struct mbuf *m_head = *m_head0;
 	rndis_msg *rndis_mesg;
 	rndis_packet *rndis_pkt;
 	rndis_per_packet_info *rppi;
 	struct rndis_hash_value *hash_value;
 	uint32_t rndis_msg_size, tot_data_buf_len, send_buf_section_idx;
 	int send_buf_section_size;
 
 	tot_data_buf_len = m_head->m_pkthdr.len;
 
 	/*
 	 * extension points to the area reserved for the
 	 * rndis_filter_packet, which is placed just after
 	 * the netvsc_packet (and rppi struct, if present;
 	 * length is updated later).
 	 */
 	rndis_mesg = txd->rndis_msg;
 	/* XXX not necessary */
 	memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN);
 	rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
 
 	rndis_pkt = &rndis_mesg->msg.packet;
 	rndis_pkt->data_offset = sizeof(rndis_packet);
 	rndis_pkt->data_length = tot_data_buf_len;
 	rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet);
 
 	rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet);
 
 	/*
 	 * Set the hash value for this packet, so that the host could
 	 * dispatch the TX done event for this packet back to this TX
 	 * ring's channel.
 	 */
 	rndis_msg_size += RNDIS_HASHVAL_PPI_SIZE;
 	rppi = hv_set_rppi_data(rndis_mesg, RNDIS_HASHVAL_PPI_SIZE,
 	    nbl_hash_value);
 	hash_value = (struct rndis_hash_value *)((uint8_t *)rppi +
 	    rppi->per_packet_info_offset);
 	hash_value->hash_value = txr->hn_tx_idx;
 
 	if (m_head->m_flags & M_VLANTAG) {
 		ndis_8021q_info *rppi_vlan_info;
 
 		rndis_msg_size += RNDIS_VLAN_PPI_SIZE;
 		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE,
 		    ieee_8021q_info);
 
 		rppi_vlan_info = (ndis_8021q_info *)((uint8_t *)rppi +
 		    rppi->per_packet_info_offset);
 		rppi_vlan_info->u1.s1.vlan_id =
 		    m_head->m_pkthdr.ether_vtag & 0xfff;
 	}
 
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 		rndis_tcp_tso_info *tso_info;	
 		struct ether_vlan_header *eh;
 		int ether_len;
 
 		/*
 		 * XXX need m_pullup and use mtodo
 		 */
 		eh = mtod(m_head, struct ether_vlan_header*);
 		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
 			ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
 		else
 			ether_len = ETHER_HDR_LEN;
 
 		rndis_msg_size += RNDIS_TSO_PPI_SIZE;
 		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE,
 		    tcp_large_send_info);
 
 		tso_info = (rndis_tcp_tso_info *)((uint8_t *)rppi +
 		    rppi->per_packet_info_offset);
 		tso_info->lso_v2_xmit.type =
 		    RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
 
 #ifdef INET
 		if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
 			struct ip *ip =
 			    (struct ip *)(m_head->m_data + ether_len);
 			unsigned long iph_len = ip->ip_hl << 2;
 			struct tcphdr *th =
 			    (struct tcphdr *)((caddr_t)ip + iph_len);
 
 			tso_info->lso_v2_xmit.ip_version =
 			    RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
 			ip->ip_len = 0;
 			ip->ip_sum = 0;
 
 			th->th_sum = in_pseudo(ip->ip_src.s_addr,
 			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 		}
 #endif
 #if defined(INET6) && defined(INET)
 		else
 #endif
 #ifdef INET6
 		{
 			struct ip6_hdr *ip6 = (struct ip6_hdr *)
 			    (m_head->m_data + ether_len);
 			struct tcphdr *th = (struct tcphdr *)(ip6 + 1);
 
 			tso_info->lso_v2_xmit.ip_version =
 			    RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
 			ip6->ip6_plen = 0;
 			th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
 		}
 #endif
 		tso_info->lso_v2_xmit.tcp_header_offset = 0;
 		tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz;
 	} else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
 		rndis_tcp_ip_csum_info *csum_info;
 
 		rndis_msg_size += RNDIS_CSUM_PPI_SIZE;
 		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE,
 		    tcpip_chksum_info);
 		csum_info = (rndis_tcp_ip_csum_info *)((uint8_t *)rppi +
 		    rppi->per_packet_info_offset);
 
 		csum_info->xmit.is_ipv4 = 1;
 		if (m_head->m_pkthdr.csum_flags & CSUM_IP)
 			csum_info->xmit.ip_header_csum = 1;
 
 		if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
 			csum_info->xmit.tcp_csum = 1;
 			csum_info->xmit.tcp_header_offset = 0;
 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
 			csum_info->xmit.udp_csum = 1;
 		}
 	}
 
 	rndis_mesg->msg_len = tot_data_buf_len + rndis_msg_size;
 	tot_data_buf_len = rndis_mesg->msg_len;
 
 	/*
 	 * Chimney send, if the packet could fit into one chimney buffer.
 	 */
 	if (tot_data_buf_len < txr->hn_chim_size) {
 		txr->hn_tx_chimney_tried++;
 		send_buf_section_idx = hn_chim_alloc(txr->hn_sc);
 		if (send_buf_section_idx != HN_NVS_CHIM_IDX_INVALID) {
 			uint8_t *dest = txr->hn_sc->hn_chim +
 			    (send_buf_section_idx * txr->hn_sc->hn_chim_szmax);
 
 			memcpy(dest, rndis_mesg, rndis_msg_size);
 			dest += rndis_msg_size;
 			m_copydata(m_head, 0, m_head->m_pkthdr.len, dest);
 
 			send_buf_section_size = tot_data_buf_len;
 			txr->hn_gpa_cnt = 0;
 			txr->hn_tx_chimney++;
 			goto done;
 		}
 	}
 
 	error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
 	if (error) {
 		int freed;
 
 		/*
 		 * This mbuf is not linked w/ the txd yet, so free it now.
 		 */
 		m_freem(m_head);
 		*m_head0 = NULL;
 
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed != 0,
 		    ("fail to free txd upon txdma error"));
 
 		txr->hn_txdma_failed++;
 		if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1);
 		return error;
 	}
 	*m_head0 = m_head;
 
 	txr->hn_gpa_cnt = nsegs + HV_RF_NUM_TX_RESERVED_PAGE_BUFS;
 
 	/* send packet with page buffer */
 	txr->hn_gpa[0].gpa_page = atop(txd->rndis_msg_paddr);
 	txr->hn_gpa[0].gpa_ofs = txd->rndis_msg_paddr & PAGE_MASK;
 	txr->hn_gpa[0].gpa_len = rndis_msg_size;
 
 	/*
 	 * Fill the page buffers with mbuf info starting at index
 	 * HV_RF_NUM_TX_RESERVED_PAGE_BUFS.
 	 */
 	for (i = 0; i < nsegs; ++i) {
 		struct vmbus_gpa *gpa = &txr->hn_gpa[
 		    i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS];
 
 		gpa->gpa_page = atop(segs[i].ds_addr);
 		gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
 		gpa->gpa_len = segs[i].ds_len;
 	}
 
 	send_buf_section_idx = HN_NVS_CHIM_IDX_INVALID;
 	send_buf_section_size = 0;
 done:
 	txd->m = m_head;
 
 	/* Set the completion routine */
 	hn_send_ctx_init(&txd->send_ctx, hn_tx_done, txd,
 	    send_buf_section_idx, send_buf_section_size);
 
 	return 0;
 }
 
 /*
  * NOTE:
  * If this function fails, then txd will be freed, but the mbuf
  * associated w/ the txd will _not_ be freed.
  */
 static int
 hn_send_pkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 	int error, send_failed = 0;
 
 again:
 	/*
 	 * Make sure that txd is not freed before ETHER_BPF_MTAP.
 	 */
 	hn_txdesc_hold(txd);
 	error = hv_nv_on_send(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
 	    &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt);
 	if (!error) {
 		ETHER_BPF_MTAP(ifp, txd->m);
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		if (!hn_use_if_start) {
 			if_inc_counter(ifp, IFCOUNTER_OBYTES,
 			    txd->m->m_pkthdr.len);
 			if (txd->m->m_flags & M_MCAST)
 				if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 		}
 		txr->hn_pkts++;
 	}
 	hn_txdesc_put(txr, txd);
 
 	if (__predict_false(error)) {
 		int freed;
 
 		/*
 		 * This should "really rarely" happen.
 		 *
 		 * XXX Too many RX to be acked or too many sideband
 		 * commands to run?  Ask netvsc_channel_rollup()
 		 * to kick start later.
 		 */
 		txr->hn_has_txeof = 1;
 		if (!send_failed) {
 			txr->hn_send_failed++;
 			send_failed = 1;
 			/*
 			 * Try sending again after set hn_has_txeof;
 			 * in case that we missed the last
 			 * netvsc_channel_rollup().
 			 */
 			goto again;
 		}
 		if_printf(ifp, "send failed\n");
 
 		/*
 		 * Caller will perform further processing on the
 		 * associated mbuf, so don't free it in hn_txdesc_put();
 		 * only unload it from the DMA map in hn_txdesc_put(),
 		 * if it was loaded.
 		 */
 		txd->m = NULL;
 		freed = hn_txdesc_put(txr, txd);
 		KASSERT(freed != 0,
 		    ("fail to free txd upon send error"));
 
 		txr->hn_send_failed++;
 	}
 	return error;
 }
 
 /*
  * Start a transmit of one or more packets
  */
 static int
 hn_start_locked(struct hn_tx_ring *txr, int len)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 
 	KASSERT(hn_use_if_start,
 	    ("hn_start_locked is called, when if_start is disabled"));
 	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
 	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
 
 	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return 0;
 
 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
 		struct hn_txdesc *txd;
 		struct mbuf *m_head;
 		int error;
 
 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
 		if (m_head == NULL)
 			break;
 
 		if (len > 0 && m_head->m_pkthdr.len > len) {
 			/*
 			 * This sending could be time consuming; let callers
 			 * dispatch this packet sending (and sending of any
 			 * following up packets) to tx taskqueue.
 			 */
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			return 1;
 		}
 
 		txd = hn_txdesc_get(txr);
 		if (txd == NULL) {
 			txr->hn_no_txdescs++;
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 			break;
 		}
 
 		error = hn_encap(txr, txd, &m_head);
 		if (error) {
 			/* Both txd and m_head are freed */
 			continue;
 		}
 
 		error = hn_send_pkt(ifp, txr, txd);
 		if (__predict_false(error)) {
 			/* txd is freed, but m_head is not */
 			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
 			atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 			break;
 		}
 	}
 	return 0;
 }
 
 /*
  * Link up/down notification
  */
 void
 netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status)
 {
 	if (status == 1) {
 		sc->hn_carrier = 1;
 	} else {
 		sc->hn_carrier = 0;
 	}
 }
 
 /*
  * Append the specified data to the indicated mbuf chain,
  * Extend the mbuf chain if the new data does not fit in
  * existing space.
  *
  * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
  * There should be an equivalent in the kernel mbuf code,
  * but there does not appear to be one yet.
  *
  * Differs from m_append() in that additional mbufs are
  * allocated with cluster size MJUMPAGESIZE, and filled
  * accordingly.
  *
  * Return 1 if able to complete the job; otherwise 0.
  */
 static int
 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
 {
 	struct mbuf *m, *n;
 	int remainder, space;
 
 	for (m = m0; m->m_next != NULL; m = m->m_next)
 		;
 	remainder = len;
 	space = M_TRAILINGSPACE(m);
 	if (space > 0) {
 		/*
 		 * Copy into available space.
 		 */
 		if (space > remainder)
 			space = remainder;
 		bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
 		m->m_len += space;
 		cp += space;
 		remainder -= space;
 	}
 	while (remainder > 0) {
 		/*
 		 * Allocate a new mbuf; could check space
 		 * and allocate a cluster instead.
 		 */
 		n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE);
 		if (n == NULL)
 			break;
 		n->m_len = min(MJUMPAGESIZE, remainder);
 		bcopy(cp, mtod(n, caddr_t), n->m_len);
 		cp += n->m_len;
 		remainder -= n->m_len;
 		m->m_next = n;
 		m = n;
 	}
 	if (m0->m_flags & M_PKTHDR)
 		m0->m_pkthdr.len += len - remainder;
 
 	return (remainder == 0);
 }
 
 #if defined(INET) || defined(INET6)
 static __inline int
 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
 {
 #if __FreeBSD_version >= 1100095
 	if (hn_lro_mbufq_depth) {
 		tcp_lro_queue_mbuf(lc, m);
 		return 0;
 	}
 #endif
 	return tcp_lro_rx(lc, m, 0);
 }
 #endif
 
 /*
  * Called when we receive a data packet from the "wire" on the
  * specified device
  *
  * Note:  This is no longer used as a callback
  */
 int
 netvsc_recv(struct hn_rx_ring *rxr, const void *data, int dlen,
     const struct hn_recvinfo *info)
 {
 	struct ifnet *ifp = rxr->hn_ifp;
 	struct mbuf *m_new;
 	int size, do_lro = 0, do_csum = 1;
 	int hash_type = M_HASHTYPE_OPAQUE_HASH;
 
 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 		return (0);
 
 	/*
 	 * Bail out if packet contains more data than configured MTU.
 	 */
 	if (dlen > (ifp->if_mtu + ETHER_HDR_LEN)) {
 		return (0);
 	} else if (dlen <= MHLEN) {
 		m_new = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m_new == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 		memcpy(mtod(m_new, void *), data, dlen);
 		m_new->m_pkthdr.len = m_new->m_len = dlen;
 		rxr->hn_small_pkts++;
 	} else {
 		/*
 		 * Get an mbuf with a cluster.  For packets 2K or less,
 		 * get a standard 2K cluster.  For anything larger, get a
 		 * 4K cluster.  Any buffers larger than 4K can cause problems
 		 * if looped around to the Hyper-V TX channel, so avoid them.
 		 */
 		size = MCLBYTES;
 		if (dlen > MCLBYTES) {
 			/* 4096 */
 			size = MJUMPAGESIZE;
 		}
 
 		m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
 		if (m_new == NULL) {
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 
 		hv_m_append(m_new, dlen, data);
 	}
 	m_new->m_pkthdr.rcvif = ifp;
 
 	if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
 		do_csum = 0;
 
 	/* receive side checksum offload */
 	if (info->csum_info != NULL) {
 		/* IP csum offload */
 		if (info->csum_info->receive.ip_csum_succeeded && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			rxr->hn_csum_ip++;
 		}
 
 		/* TCP/UDP csum offload */
 		if ((info->csum_info->receive.tcp_csum_succeeded ||
 		     info->csum_info->receive.udp_csum_succeeded) && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			m_new->m_pkthdr.csum_data = 0xffff;
 			if (info->csum_info->receive.tcp_csum_succeeded)
 				rxr->hn_csum_tcp++;
 			else
 				rxr->hn_csum_udp++;
 		}
 
 		if (info->csum_info->receive.ip_csum_succeeded &&
 		    info->csum_info->receive.tcp_csum_succeeded)
 			do_lro = 1;
 	} else {
 		const struct ether_header *eh;
 		uint16_t etype;
 		int hoff;
 
 		hoff = sizeof(*eh);
 		if (m_new->m_len < hoff)
 			goto skip;
 		eh = mtod(m_new, struct ether_header *);
 		etype = ntohs(eh->ether_type);
 		if (etype == ETHERTYPE_VLAN) {
 			const struct ether_vlan_header *evl;
 
 			hoff = sizeof(*evl);
 			if (m_new->m_len < hoff)
 				goto skip;
 			evl = mtod(m_new, struct ether_vlan_header *);
 			etype = ntohs(evl->evl_proto);
 		}
 
 		if (etype == ETHERTYPE_IP) {
 			int pr;
 
 			pr = hn_check_iplen(m_new, hoff);
 			if (pr == IPPROTO_TCP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_TCP)) {
 					rxr->hn_csum_trusted++;
 					m_new->m_pkthdr.csum_flags |=
 					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
 				do_lro = 1;
 			} else if (pr == IPPROTO_UDP) {
 				if (do_csum &&
 				    (rxr->hn_trust_hcsum &
 				     HN_TRUST_HCSUM_UDP)) {
 					rxr->hn_csum_trusted++;
 					m_new->m_pkthdr.csum_flags |=
 					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 					m_new->m_pkthdr.csum_data = 0xffff;
 				}
 			} else if (pr != IPPROTO_DONE && do_csum &&
 			    (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
 				rxr->hn_csum_trusted++;
 				m_new->m_pkthdr.csum_flags |=
 				    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			}
 		}
 	}
 skip:
 	if (info->vlan_info != NULL) {
 		m_new->m_pkthdr.ether_vtag = info->vlan_info->u1.s1.vlan_id;
 		m_new->m_flags |= M_VLANTAG;
 	}
 
 	if (info->hash_info != NULL && info->hash_value != NULL) {
 		rxr->hn_rss_pkts++;
 		m_new->m_pkthdr.flowid = info->hash_value->hash_value;
 		if ((info->hash_info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
 		    NDIS_HASH_FUNCTION_TOEPLITZ) {
 			uint32_t type =
 			    (info->hash_info->hash_info & NDIS_HASH_TYPE_MASK);
 
 			switch (type) {
 			case NDIS_HASH_IPV4:
 				hash_type = M_HASHTYPE_RSS_IPV4;
 				break;
 
 			case NDIS_HASH_TCP_IPV4:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV4;
 				break;
 
 			case NDIS_HASH_IPV6:
 				hash_type = M_HASHTYPE_RSS_IPV6;
 				break;
 
 			case NDIS_HASH_IPV6_EX:
 				hash_type = M_HASHTYPE_RSS_IPV6_EX;
 				break;
 
 			case NDIS_HASH_TCP_IPV6:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV6;
 				break;
 
 			case NDIS_HASH_TCP_IPV6_EX:
 				hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
 				break;
 			}
 		}
 	} else {
 		if (info->hash_value != NULL) {
 			m_new->m_pkthdr.flowid = info->hash_value->hash_value;
 		} else {
 			m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
 			hash_type = M_HASHTYPE_OPAQUE;
 		}
 	}
 	M_HASHTYPE_SET(m_new, hash_type);
 
 	/*
 	 * Note:  Moved RX completion back to hv_nv_on_receive() so all
 	 * messages (not just data messages) will trigger a response.
 	 */
 
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
 	rxr->hn_pkts++;
 
 	if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
 #if defined(INET) || defined(INET6)
 		struct lro_ctrl *lro = &rxr->hn_lro;
 
 		if (lro->lro_cnt) {
 			rxr->hn_lro_tried++;
 			if (hn_lro_rx(lro, m_new) == 0) {
 				/* DONE! */
 				return 0;
 			}
 		}
 #endif
 	}
 
 	/* We're not holding the lock here, so don't release it */
 	(*ifp->if_input)(ifp, m_new);
 
 	return (0);
 }
 
 /*
  * Rules for using sc->temp_unusable:
  * 1.  sc->temp_unusable can only be read or written while holding NV_LOCK()
  * 2.  code reading sc->temp_unusable under NV_LOCK(), and finding 
  *     sc->temp_unusable set, must release NV_LOCK() and exit
  * 3.  to retain exclusive control of the interface,
  *     sc->temp_unusable must be set by code before releasing NV_LOCK()
  * 4.  only code setting sc->temp_unusable can clear sc->temp_unusable
  * 5.  code setting sc->temp_unusable must eventually clear sc->temp_unusable
  */
 
 /*
  * Standard ioctl entry point.  Called when the user wants to configure
  * the interface.
  */
 static int
 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	hn_softc_t *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 #ifdef INET
 	struct ifaddr *ifa = (struct ifaddr *)data;
 #endif
 	netvsc_device_info device_info;
 	int mask, error = 0, ring_cnt;
 	int retry_cnt = 500;
 	
 	switch(cmd) {
 
 	case SIOCSIFADDR:
 #ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 			ifp->if_flags |= IFF_UP;
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 				hn_ifinit(sc);
 			arp_ifinit(ifp, ifa);
 		} else
 #endif
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	case SIOCSIFMTU:
 		/* Check MTU value change */
 		if (ifp->if_mtu == ifr->ifr_mtu)
 			break;
 
 		if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) {
 			error = EINVAL;
 			break;
 		}
 
 		/* Obtain and record requested MTU */
 		ifp->if_mtu = ifr->ifr_mtu;
 
 #if __FreeBSD_version >= 1100099
 		/*
 		 * Make sure that LRO aggregation length limit is still
 		 * valid, after the MTU change.
 		 */
 		NV_LOCK(sc);
 		if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
 		    HN_LRO_LENLIM_MIN(ifp))
 			hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
 		NV_UNLOCK(sc);
 #endif
 
 		do {
 			NV_LOCK(sc);
 			if (!sc->temp_unusable) {
 				sc->temp_unusable = TRUE;
 				retry_cnt = -1;
 			}
 			NV_UNLOCK(sc);
 			if (retry_cnt > 0) {
 				retry_cnt--;
 				DELAY(5 * 1000);
 			}
 		} while (retry_cnt > 0);
 
 		if (retry_cnt == 0) {
 			error = EINVAL;
 			break;
 		}
 
 		/* We must remove and add back the device to cause the new
 		 * MTU to take effect.  This includes tearing down, but not
 		 * deleting the channel, then bringing it back up.
 		 */
 		error = hv_rf_on_device_remove(sc, HV_RF_NV_RETAIN_CHANNEL);
 		if (error) {
 			NV_LOCK(sc);
 			sc->temp_unusable = FALSE;
 			NV_UNLOCK(sc);
 			break;
 		}
 
 		/* Wait for subchannels to be destroyed */
 		vmbus_subchan_drain(sc->hn_prichan);
 
 		ring_cnt = sc->hn_rx_ring_inuse;
 		error = hv_rf_on_device_add(sc, &device_info, &ring_cnt,
 		    &sc->hn_rx_ring[0]);
 		if (error) {
 			NV_LOCK(sc);
 			sc->temp_unusable = FALSE;
 			NV_UNLOCK(sc);
 			break;
 		}
 		/* # of channels can _not_ be changed */
 		KASSERT(sc->hn_rx_ring_inuse == ring_cnt,
 		    ("RX ring count %d and channel count %u mismatch",
 		     sc->hn_rx_ring_cnt, ring_cnt));
 		if (sc->hn_rx_ring_inuse > 1) {
 			int r;
 
 			/*
 			 * Skip the rings on primary channel; they are
 			 * handled by the hv_rf_on_device_add() above.
 			 */
 			for (r = 1; r < sc->hn_rx_ring_cnt; ++r) {
 				sc->hn_rx_ring[r].hn_rx_flags &=
 				    ~HN_RX_FLAG_ATTACHED;
 			}
 			for (r = 1; r < sc->hn_tx_ring_cnt; ++r) {
 				sc->hn_tx_ring[r].hn_tx_flags &=
 				    ~HN_TX_FLAG_ATTACHED;
 			}
 			hn_subchan_setup(sc);
 		}
 
 		if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
 			hn_set_chim_size(sc, sc->hn_chim_szmax);
 
 		hn_ifinit_locked(sc);
 
 		NV_LOCK(sc);
 		sc->temp_unusable = FALSE;
 		NV_UNLOCK(sc);
 		break;
 	case SIOCSIFFLAGS:
 		do {
                        NV_LOCK(sc);
                        if (!sc->temp_unusable) {
                                sc->temp_unusable = TRUE;
                                retry_cnt = -1;
                        }
                        NV_UNLOCK(sc);
                        if (retry_cnt > 0) {
                       	        retry_cnt--;
                         	DELAY(5 * 1000);
                        }
                 } while (retry_cnt > 0);
 
                 if (retry_cnt == 0) {
                        error = EINVAL;
                        break;
                 }
 
 		if (ifp->if_flags & IFF_UP) {
 			/*
 			 * If only the state of the PROMISC flag changed,
 			 * then just use the 'set promisc mode' command
 			 * instead of reinitializing the entire NIC. Doing
 			 * a full re-init means reloading the firmware and
 			 * waiting for it to start up, which may take a
 			 * second or two.
 			 */
 #ifdef notyet
 			/* Fixme:  Promiscuous mode? */
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 			    ifp->if_flags & IFF_PROMISC &&
 			    !(sc->hn_if_flags & IFF_PROMISC)) {
 				/* do something here for Hyper-V */
 			} else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 			    !(ifp->if_flags & IFF_PROMISC) &&
 			    sc->hn_if_flags & IFF_PROMISC) {
 				/* do something here for Hyper-V */
 			} else
 #endif
 				hn_ifinit_locked(sc);
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 				hn_stop(sc);
 			}
 		}
 		NV_LOCK(sc);
 		sc->temp_unusable = FALSE;
 		NV_UNLOCK(sc);
 		sc->hn_if_flags = ifp->if_flags;
 		error = 0;
 		break;
 	case SIOCSIFCAP:
 		NV_LOCK(sc);
 
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		if (mask & IFCAP_TXCSUM) {
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 			if (ifp->if_capenable & IFCAP_TXCSUM) {
 				ifp->if_hwassist |=
 				    sc->hn_tx_ring[0].hn_csum_assist;
 			} else {
 				ifp->if_hwassist &=
 				    ~sc->hn_tx_ring[0].hn_csum_assist;
 			}
 		}
 
 		if (mask & IFCAP_RXCSUM)
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 
 		if (mask & IFCAP_LRO)
 			ifp->if_capenable ^= IFCAP_LRO;
 
 		if (mask & IFCAP_TSO4) {
 			ifp->if_capenable ^= IFCAP_TSO4;
 			if (ifp->if_capenable & IFCAP_TSO4)
 				ifp->if_hwassist |= CSUM_IP_TSO;
 			else
 				ifp->if_hwassist &= ~CSUM_IP_TSO;
 		}
 
 		if (mask & IFCAP_TSO6) {
 			ifp->if_capenable ^= IFCAP_TSO6;
 			if (ifp->if_capenable & IFCAP_TSO6)
 				ifp->if_hwassist |= CSUM_IP6_TSO;
 			else
 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
 		}
 
 		NV_UNLOCK(sc);
 		error = 0;
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 #ifdef notyet
 		/* Fixme:  Multicast mode? */
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			NV_LOCK(sc);
 			netvsc_setmulti(sc);
 			NV_UNLOCK(sc);
 			error = 0;
 		}
 #endif
 		error = EINVAL;
 		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
 		break;
 	default:
 		error = ether_ioctl(ifp, cmd, data);
 		break;
 	}
 
 	return (error);
 }
 
 /*
  *
  */
 static void
 hn_stop(hn_softc_t *sc)
 {
 	struct ifnet *ifp;
 	int ret, i;
 
 	ifp = sc->hn_ifp;
 
 	if (bootverbose)
 		printf(" Closing Device ...\n");
 
 	atomic_clear_int(&ifp->if_drv_flags,
 	    (IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_oactive = 0;
 
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 	sc->hn_initdone = 0;
 
 	ret = hv_rf_on_close(sc);
 }
 
 /*
  * FreeBSD transmit entry point
  */
 static void
 hn_start(struct ifnet *ifp)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (!sched)
 			return;
 	}
 do_sched:
 	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
 }
 
 static void
 hn_start_txeof(struct hn_tx_ring *txr)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 
 	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (sched) {
 			taskqueue_enqueue(txr->hn_tx_taskq,
 			    &txr->hn_tx_task);
 		}
 	} else {
 do_sched:
 		/*
 		 * Release the OACTIVE earlier, with the hope, that
 		 * others could catch up.  The task will clear the
 		 * flag again with the hn_tx_lock to avoid possible
 		 * races.
 		 */
 		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 /*
  *
  */
 static void
 hn_ifinit_locked(hn_softc_t *sc)
 {
 	struct ifnet *ifp;
 	int ret, i;
 
 	ifp = sc->hn_ifp;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		return;
 	}
 
 	hv_promisc_mode = 1;
 
 	ret = hv_rf_on_open(sc);
 	if (ret != 0) {
 		return;
 	} else {
 		sc->hn_initdone = 1;
 	}
 
 	atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_oactive = 0;
 
 	atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
 	if_link_state_change(ifp, LINK_STATE_UP);
 }
 
 /*
  *
  */
 static void
 hn_ifinit(void *xsc)
 {
 	hn_softc_t *sc = xsc;
 
 	NV_LOCK(sc);
 	if (sc->temp_unusable) {
 		NV_UNLOCK(sc);
 		return;
 	}
 	sc->temp_unusable = TRUE;
 	NV_UNLOCK(sc);
 
 	hn_ifinit_locked(sc);
 
 	NV_LOCK(sc);
 	sc->temp_unusable = FALSE;
 	NV_UNLOCK(sc);
 }
 
 #ifdef LATER
 /*
  *
  */
 static void
 hn_watchdog(struct ifnet *ifp)
 {
 	hn_softc_t *sc;
 	sc = ifp->if_softc;
 
 	printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit);
 	hn_ifinit(sc);    /*???*/
 	if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
 }
 #endif
 
 #if __FreeBSD_version >= 1100099
 
 static int
 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	unsigned int lenlim;
 	int error;
 
 	lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
 	error = sysctl_handle_int(oidp, &lenlim, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
 	    lenlim > TCP_LRO_LENGTH_MAX)
 		return EINVAL;
 
 	NV_LOCK(sc);
 	hn_set_lro_lenlim(sc, lenlim);
 	NV_UNLOCK(sc);
 	return 0;
 }
 
 static int
 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ackcnt, error, i;
 
 	/*
 	 * lro_ackcnt_lim is append count limit,
 	 * +1 to turn it into aggregation limit.
 	 */
 	ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
 	error = sysctl_handle_int(oidp, &ackcnt, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
 		return EINVAL;
 
 	/*
 	 * Convert aggregation limit back to append
 	 * count limit.
 	 */
 	--ackcnt;
 	NV_LOCK(sc);
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
 		sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
 	NV_UNLOCK(sc);
 	return 0;
 }
 
 #endif
 
 static int
 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int hcsum = arg2;
 	int on, error, i;
 
 	on = 0;
 	if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
 		on = 1;
 
 	error = sysctl_handle_int(oidp, &on, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	NV_LOCK(sc);
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (on)
 			rxr->hn_trust_hcsum |= hcsum;
 		else
 			rxr->hn_trust_hcsum &= ~hcsum;
 	}
 	NV_UNLOCK(sc);
 	return 0;
 }
 
 static int
 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int chim_size, error;
 
 	chim_size = sc->hn_tx_ring[0].hn_chim_size;
 	error = sysctl_handle_int(oidp, &chim_size, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
 		return EINVAL;
 
 	hn_set_chim_size(sc, chim_size);
 	return 0;
 }
 
 static int
 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	u_long stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((u_long *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_long(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((u_long *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_rx_ring *rxr;
 	uint64_t stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		stat += *((uint64_t *)((uint8_t *)rxr + ofs));
 	}
 
 	error = sysctl_handle_64(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
 		rxr = &sc->hn_rx_ring[i];
 		*((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error;
 	struct hn_tx_ring *txr;
 	u_long stat;
 
 	stat = 0;
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		stat += *((u_long *)((uint8_t *)txr + ofs));
 	}
 
 	error = sysctl_handle_long(oidp, &stat, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	/* Zero out this stat. */
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		*((u_long *)((uint8_t *)txr + ofs)) = 0;
 	}
 	return 0;
 }
 
 static int
 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	int ofs = arg2, i, error, conf;
 	struct hn_tx_ring *txr;
 
 	txr = &sc->hn_tx_ring[0];
 	conf = *((int *)((uint8_t *)txr + ofs));
 
 	error = sysctl_handle_int(oidp, &conf, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
 	NV_LOCK(sc);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		txr = &sc->hn_tx_ring[i];
 		*((int *)((uint8_t *)txr + ofs)) = conf;
 	}
 	NV_UNLOCK(sc);
 
 	return 0;
 }
 
 static int
 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
 	char verstr[16];
 
 	snprintf(verstr, sizeof(verstr), "%u.%u",
 	    NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
 	    NDIS_VERSION_MINOR(sc->hn_ndis_ver));
 	return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
 }
 
 static int
 hn_check_iplen(const struct mbuf *m, int hoff)
 {
 	const struct ip *ip;
 	int len, iphlen, iplen;
 	const struct tcphdr *th;
 	int thoff;				/* TCP data offset */
 
 	len = hoff + sizeof(struct ip);
 
 	/* The packet must be at least the size of an IP header. */
 	if (m->m_pkthdr.len < len)
 		return IPPROTO_DONE;
 
 	/* The fixed IP header must reside completely in the first mbuf. */
 	if (m->m_len < len)
 		return IPPROTO_DONE;
 
 	ip = mtodo(m, hoff);
 
 	/* Bound check the packet's stated IP header length. */
 	iphlen = ip->ip_hl << 2;
 	if (iphlen < sizeof(struct ip))		/* minimum header length */
 		return IPPROTO_DONE;
 
 	/* The full IP header must reside completely in the one mbuf. */
 	if (m->m_len < hoff + iphlen)
 		return IPPROTO_DONE;
 
 	iplen = ntohs(ip->ip_len);
 
 	/*
 	 * Check that the amount of data in the buffers is as
 	 * at least much as the IP header would have us expect.
 	 */
 	if (m->m_pkthdr.len < hoff + iplen)
 		return IPPROTO_DONE;
 
 	/*
 	 * Ignore IP fragments.
 	 */
 	if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
 		return IPPROTO_DONE;
 
 	/*
 	 * The TCP/IP or UDP/IP header must be entirely contained within
 	 * the first fragment of a packet.
 	 */
 	switch (ip->ip_p) {
 	case IPPROTO_TCP:
 		if (iplen < iphlen + sizeof(struct tcphdr))
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
 			return IPPROTO_DONE;
 		th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
 		thoff = th->th_off << 2;
 		if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + thoff)
 			return IPPROTO_DONE;
 		break;
 	case IPPROTO_UDP:
 		if (iplen < iphlen + sizeof(struct udphdr))
 			return IPPROTO_DONE;
 		if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
 			return IPPROTO_DONE;
 		break;
 	default:
 		if (iplen < iphlen)
 			return IPPROTO_DONE;
 		break;
 	}
 	return ip->ip_p;
 }
 
 static int
 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	device_t dev = sc->hn_dev;
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 	int lroent_cnt;
 #endif
 #endif
 	int i;
 
 	/*
 	 * Create RXBUF for reception.
 	 *
 	 * NOTE:
 	 * - It is shared by all channels.
 	 * - A large enough buffer is allocated, certain version of NVSes
 	 *   may further limit the usable space.
 	 */
 	sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
 	    PAGE_SIZE, 0, NETVSC_RECEIVE_BUFFER_SIZE, &sc->hn_rxbuf_dma,
 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (sc->hn_rxbuf == NULL) {
 		device_printf(sc->hn_dev, "allocate rxbuf failed\n");
 		return (ENOMEM);
 	}
 
 	sc->hn_rx_ring_cnt = ring_cnt;
 	sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
 
 	sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
 	    M_NETVSC, M_WAITOK | M_ZERO);
 
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 	lroent_cnt = hn_lro_entry_count;
 	if (lroent_cnt < TCP_LRO_ENTRIES)
 		lroent_cnt = TCP_LRO_ENTRIES;
 	device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
 #endif
 #endif	/* INET || INET6 */
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 
 	/* Create dev.hn.UNIT.rx sysctl tree */
 	sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 		if (hn_trust_hosttcp)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
 		if (hn_trust_hostudp)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
 		if (hn_trust_hostip)
 			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
 		rxr->hn_ifp = sc->hn_ifp;
 		if (i < sc->hn_tx_ring_cnt)
 			rxr->hn_txr = &sc->hn_tx_ring[i];
 		rxr->hn_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK);
 		rxr->hn_rx_idx = i;
 		rxr->hn_rxbuf = sc->hn_rxbuf;
 
 		/*
 		 * Initialize LRO.
 		 */
 #if defined(INET) || defined(INET6)
 #if __FreeBSD_version >= 1100095
 		tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
 		    hn_lro_mbufq_depth);
 #else
 		tcp_lro_init(&rxr->hn_lro);
 		rxr->hn_lro.ifp = sc->hn_ifp;
 #endif
 #if __FreeBSD_version >= 1100099
 		rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
 		rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
 #endif
 #endif	/* INET || INET6 */
 
 		if (sc->hn_rx_sysctl_tree != NULL) {
 			char name[16];
 
 			/*
 			 * Create per RX ring sysctl tree:
 			 * dev.hn.UNIT.rx.RINGID
 			 */
 			snprintf(name, sizeof(name), "%d", i);
 			rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
 			    SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
 			    OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 			if (rxr->hn_rx_sysctl_tree != NULL) {
 				SYSCTL_ADD_ULONG(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "packets", CTLFLAG_RW,
 				    &rxr->hn_pkts, "# of packets received");
 				SYSCTL_ADD_ULONG(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "rss_pkts", CTLFLAG_RW,
 				    &rxr->hn_rss_pkts,
 				    "# of packets w/ RSS info received");
 			}
 		}
 	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
 	    CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
 	    hn_rx_stat_u64_sysctl, "LU", "LRO queued");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
 	    CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
 	    hn_rx_stat_u64_sysctl, "LU", "LRO flushed");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_lro_tried),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
 #if __FreeBSD_version >= 1100099
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
 	    CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_lro_lenlim_sysctl, "IU",
 	    "Max # of data bytes to be aggregated by LRO");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_lro_ackcnt_sysctl, "I",
 	    "Max # of ACKs to be aggregated by LRO");
 #endif
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust tcp segement verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust udp datagram verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
 	    hn_trust_hcsum_sysctl, "I",
 	    "Trust ip packet verification on host side, "
 	    "when csum info is missing");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_ip),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_tcp),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_udp),
 	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_csum_trusted),
 	    hn_rx_stat_ulong_sysctl, "LU",
 	    "# of packets that we trust host's csum verification");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_rx_ring, hn_small_pkts),
 	    hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
 	    CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
 	    CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
 
 	return (0);
 }
 
 static void
 hn_destroy_rx_data(struct hn_softc *sc)
 {
 	int i;
 
 	if (sc->hn_rxbuf != NULL) {
 		hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
 		sc->hn_rxbuf = NULL;
 	}
 
 	if (sc->hn_rx_ring_cnt == 0)
 		return;
 
 	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
 		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
 
 #if defined(INET) || defined(INET6)
 		tcp_lro_free(&rxr->hn_lro);
 #endif
 		free(rxr->hn_rdbuf, M_NETVSC);
 	}
 	free(sc->hn_rx_ring, M_NETVSC);
 	sc->hn_rx_ring = NULL;
 
 	sc->hn_rx_ring_cnt = 0;
 	sc->hn_rx_ring_inuse = 0;
 }
 
 static int
 hn_create_tx_ring(struct hn_softc *sc, int id)
 {
 	struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
 	device_t dev = sc->hn_dev;
 	bus_dma_tag_t parent_dtag;
 	int error, i;
 	uint32_t version;
 
 	txr->hn_sc = sc;
 	txr->hn_tx_idx = id;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
 #endif
 	mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
 
 	txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
 	txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
 	    M_NETVSC, M_WAITOK | M_ZERO);
 #ifndef HN_USE_TXDESC_BUFRING
 	SLIST_INIT(&txr->hn_txlist);
 #else
 	txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_NETVSC,
 	    M_WAITOK, &txr->hn_tx_lock);
 #endif
 
 	txr->hn_tx_taskq = sc->hn_tx_taskq;
 
 	if (hn_use_if_start) {
 		txr->hn_txeof = hn_start_txeof;
 		TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
 		TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
 	} else {
 		int br_depth;
 
 		txr->hn_txeof = hn_xmit_txeof;
 		TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
 		TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
 
 		br_depth = hn_get_txswq_depth(txr);
 		txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_NETVSC,
 		    M_WAITOK, &txr->hn_tx_lock);
 	}
 
 	txr->hn_direct_tx_size = hn_direct_tx_size;
 	version = VMBUS_GET_VERSION(device_get_parent(dev), dev);
 	if (version >= VMBUS_VERSION_WIN8_1) {
 		txr->hn_csum_assist = HN_CSUM_ASSIST;
 	} else {
 		txr->hn_csum_assist = HN_CSUM_ASSIST_WIN8;
 		if (id == 0) {
 			device_printf(dev, "bus version %u.%u, "
 			    "no UDP checksum offloading\n",
 			    VMBUS_VERSION_MAJOR(version),
 			    VMBUS_VERSION_MINOR(version));
 		}
 	}
 
 	/*
 	 * Always schedule transmission instead of trying to do direct
 	 * transmission.  This one gives the best performance so far.
 	 */
 	txr->hn_sched_tx = 1;
 
 	parent_dtag = bus_get_dma_tag(dev);
 
 	/* DMA tag for RNDIS messages. */
 	error = bus_dma_tag_create(parent_dtag, /* parent */
 	    HN_RNDIS_MSG_ALIGN,		/* alignment */
 	    HN_RNDIS_MSG_BOUNDARY,	/* boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    HN_RNDIS_MSG_LEN,		/* maxsize */
 	    1,				/* nsegments */
 	    HN_RNDIS_MSG_LEN,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
 	    &txr->hn_tx_rndis_dtag);
 	if (error) {
 		device_printf(dev, "failed to create rndis dmatag\n");
 		return error;
 	}
 
 	/* DMA tag for data. */
 	error = bus_dma_tag_create(parent_dtag, /* parent */
 	    1,				/* alignment */
 	    HN_TX_DATA_BOUNDARY,	/* boundary */
 	    BUS_SPACE_MAXADDR,		/* lowaddr */
 	    BUS_SPACE_MAXADDR,		/* highaddr */
 	    NULL, NULL,			/* filter, filterarg */
 	    HN_TX_DATA_MAXSIZE,		/* maxsize */
 	    HN_TX_DATA_SEGCNT_MAX,	/* nsegments */
 	    HN_TX_DATA_SEGSIZE,		/* maxsegsize */
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
 	    &txr->hn_tx_data_dtag);
 	if (error) {
 		device_printf(dev, "failed to create data dmatag\n");
 		return error;
 	}
 
 	for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
 		struct hn_txdesc *txd = &txr->hn_txdesc[i];
 
 		txd->txr = txr;
 
 		/*
 		 * Allocate and load RNDIS messages.
 		 */
         	error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
 		    (void **)&txd->rndis_msg,
 		    BUS_DMA_WAITOK | BUS_DMA_COHERENT,
 		    &txd->rndis_msg_dmap);
 		if (error) {
 			device_printf(dev,
 			    "failed to allocate rndis_msg, %d\n", i);
 			return error;
 		}
 
 		error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
 		    txd->rndis_msg_dmap,
 		    txd->rndis_msg, HN_RNDIS_MSG_LEN,
 		    hyperv_dma_map_paddr, &txd->rndis_msg_paddr,
 		    BUS_DMA_NOWAIT);
 		if (error) {
 			device_printf(dev,
 			    "failed to load rndis_msg, %d\n", i);
 			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg, txd->rndis_msg_dmap);
 			return error;
 		}
 
 		/* DMA map for TX data. */
 		error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
 		    &txd->data_dmap);
 		if (error) {
 			device_printf(dev,
 			    "failed to allocate tx data dmamap\n");
 			bus_dmamap_unload(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg_dmap);
 			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg, txd->rndis_msg_dmap);
 			return error;
 		}
 
 		/* All set, put it to list */
 		txd->flags |= HN_TXD_FLAG_ONLIST;
 #ifndef HN_USE_TXDESC_BUFRING
 		SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
 #else
 		buf_ring_enqueue(txr->hn_txdesc_br, txd);
 #endif
 	}
 	txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
 
 	if (sc->hn_tx_sysctl_tree != NULL) {
 		struct sysctl_oid_list *child;
 		struct sysctl_ctx_list *ctx;
 		char name[16];
 
 		/*
 		 * Create per TX ring sysctl tree:
 		 * dev.hn.UNIT.tx.RINGID
 		 */
 		ctx = device_get_sysctl_ctx(dev);
 		child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
 
 		snprintf(name, sizeof(name), "%d", id);
 		txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
 		    name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 		if (txr->hn_tx_sysctl_tree != NULL) {
 			child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
 
 			SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
 			    CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
 			    "# of available TX descs");
 			if (!hn_use_if_start) {
 				SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
 				    CTLFLAG_RD, &txr->hn_oactive, 0,
 				    "over active");
 			}
 			SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
 			    CTLFLAG_RW, &txr->hn_pkts,
 			    "# of packets transmitted");
 		}
 	}
 
 	return 0;
 }
 
 static void
 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
 {
 	struct hn_tx_ring *txr = txd->txr;
 
 	KASSERT(txd->m == NULL, ("still has mbuf installed"));
 	KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
 
 	bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap);
 	bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg,
 	    txd->rndis_msg_dmap);
 	bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
 }
 
 static void
 hn_destroy_tx_ring(struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 
 	if (txr->hn_txdesc == NULL)
 		return;
 
 #ifndef HN_USE_TXDESC_BUFRING
 	while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) {
 		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
 		hn_txdesc_dmamap_destroy(txd);
 	}
 #else
 	mtx_lock(&txr->hn_tx_lock);
 	while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL)
 		hn_txdesc_dmamap_destroy(txd);
 	mtx_unlock(&txr->hn_tx_lock);
 #endif
 
 	if (txr->hn_tx_data_dtag != NULL)
 		bus_dma_tag_destroy(txr->hn_tx_data_dtag);
 	if (txr->hn_tx_rndis_dtag != NULL)
 		bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
 
 #ifdef HN_USE_TXDESC_BUFRING
 	buf_ring_free(txr->hn_txdesc_br, M_NETVSC);
 #endif
 
 	free(txr->hn_txdesc, M_NETVSC);
 	txr->hn_txdesc = NULL;
 
 	if (txr->hn_mbuf_br != NULL)
 		buf_ring_free(txr->hn_mbuf_br, M_NETVSC);
 
 #ifndef HN_USE_TXDESC_BUFRING
 	mtx_destroy(&txr->hn_txlist_spin);
 #endif
 	mtx_destroy(&txr->hn_tx_lock);
 }
 
 static int
 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
 {
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 	int i;
 
 	/*
 	 * Create TXBUF for chimney sending.
 	 *
 	 * NOTE: It is shared by all channels.
 	 */
 	sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev),
 	    PAGE_SIZE, 0, NETVSC_SEND_BUFFER_SIZE, &sc->hn_chim_dma,
 	    BUS_DMA_WAITOK | BUS_DMA_ZERO);
 	if (sc->hn_chim == NULL) {
 		device_printf(sc->hn_dev, "allocate txbuf failed\n");
 		return (ENOMEM);
 	}
 
 	sc->hn_tx_ring_cnt = ring_cnt;
 	sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
 
 	sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
 	    M_NETVSC, M_WAITOK | M_ZERO);
 
 	ctx = device_get_sysctl_ctx(sc->hn_dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
 
 	/* Create dev.hn.UNIT.tx sysctl tree */
 	sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
 	    CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
 		int error;
 
 		error = hn_create_tx_ring(sc, i);
 		if (error)
 			return error;
 	}
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_no_txdescs),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_send_failed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_txdma_failed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_collapsed),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_chimney),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
 	    CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
 	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
 	    CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
 	    "# of total TX descs");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
 	    CTLFLAG_RD, &sc->hn_chim_szmax, 0,
 	    "Chimney send packet size upper boundary");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
 	    hn_chim_size_sysctl, "I", "Chimney send packet size limit");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_direct_tx_size),
 	    hn_tx_conf_int_sysctl, "I",
 	    "Size of the packet for direct transmission");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
 	    __offsetof(struct hn_tx_ring, hn_sched_tx),
 	    hn_tx_conf_int_sysctl, "I",
 	    "Always schedule transmission "
 	    "instead of doing direct transmission");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
 	    CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
 	    CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
 
 	return 0;
 }
 
 static void
 hn_set_chim_size(struct hn_softc *sc, int chim_size)
 {
 	int i;
 
 	NV_LOCK(sc);
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
 		sc->hn_tx_ring[i].hn_chim_size = chim_size;
 	NV_UNLOCK(sc);
 }
 
 static void
 hn_destroy_tx_data(struct hn_softc *sc)
 {
 	int i;
 
 	if (sc->hn_chim != NULL) {
 		hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
 		sc->hn_chim = NULL;
 	}
 
 	if (sc->hn_tx_ring_cnt == 0)
 		return;
 
 	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
 		hn_destroy_tx_ring(&sc->hn_tx_ring[i]);
 
 	free(sc->hn_tx_ring, M_NETVSC);
 	sc->hn_tx_ring = NULL;
 
 	sc->hn_tx_ring_cnt = 0;
 	sc->hn_tx_ring_inuse = 0;
 }
 
 static void
 hn_start_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	hn_start_locked(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
 	hn_start_locked(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_stop_tx_tasks(struct hn_softc *sc)
 {
 	int i;
 
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 
 		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
 		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static int
 hn_xmit(struct hn_tx_ring *txr, int len)
 {
 	struct hn_softc *sc = txr->hn_sc;
 	struct ifnet *ifp = sc->hn_ifp;
 	struct mbuf *m_head;
 
 	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
 	KASSERT(hn_use_if_start == 0,
 	    ("hn_xmit is called, when if_start is enabled"));
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
 		return 0;
 
 	while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
 		struct hn_txdesc *txd;
 		int error;
 
 		if (len > 0 && m_head->m_pkthdr.len > len) {
 			/*
 			 * This sending could be time consuming; let callers
 			 * dispatch this packet sending (and sending of any
 			 * following up packets) to tx taskqueue.
 			 */
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			return 1;
 		}
 
 		txd = hn_txdesc_get(txr);
 		if (txd == NULL) {
 			txr->hn_no_txdescs++;
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			txr->hn_oactive = 1;
 			break;
 		}
 
 		error = hn_encap(txr, txd, &m_head);
 		if (error) {
 			/* Both txd and m_head are freed; discard */
 			drbr_advance(ifp, txr->hn_mbuf_br);
 			continue;
 		}
 
 		error = hn_send_pkt(ifp, txr, txd);
 		if (__predict_false(error)) {
 			/* txd is freed, but m_head is not */
 			drbr_putback(ifp, txr->hn_mbuf_br, m_head);
 			txr->hn_oactive = 1;
 			break;
 		}
 
 		/* Sent */
 		drbr_advance(ifp, txr->hn_mbuf_br);
 	}
 	return 0;
 }
 
 static int
 hn_transmit(struct ifnet *ifp, struct mbuf *m)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	struct hn_tx_ring *txr;
 	int error, idx = 0;
 
 	/*
 	 * Select the TX ring based on flowid
 	 */
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
 	txr = &sc->hn_tx_ring[idx];
 
 	error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
 	if (error) {
 		if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
 		return error;
 	}
 
 	if (txr->hn_oactive)
 		return 0;
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		sched = hn_xmit(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (!sched)
 			return 0;
 	}
 do_sched:
 	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
 	return 0;
 }
 
 static void
 hn_xmit_qflush(struct ifnet *ifp)
 {
 	struct hn_softc *sc = ifp->if_softc;
 	int i;
 
 	for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
 		struct mbuf *m;
 
 		mtx_lock(&txr->hn_tx_lock);
 		while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
 			m_freem(m);
 		mtx_unlock(&txr->hn_tx_lock);
 	}
 	if_qflush(ifp);
 }
 
 static void
 hn_xmit_txeof(struct hn_tx_ring *txr)
 {
 
 	if (txr->hn_sched_tx)
 		goto do_sched;
 
 	if (mtx_trylock(&txr->hn_tx_lock)) {
 		int sched;
 
 		txr->hn_oactive = 0;
 		sched = hn_xmit(txr, txr->hn_direct_tx_size);
 		mtx_unlock(&txr->hn_tx_lock);
 		if (sched) {
 			taskqueue_enqueue(txr->hn_tx_taskq,
 			    &txr->hn_tx_task);
 		}
 	} else {
 do_sched:
 		/*
 		 * Release the oactive earlier, with the hope, that
 		 * others could catch up.  The task will clear the
 		 * oactive again with the hn_tx_lock to avoid possible
 		 * races.
 		 */
 		txr->hn_oactive = 0;
 		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
 }
 
 static void
 hn_xmit_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	hn_xmit(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
 {
 	struct hn_tx_ring *txr = xtxr;
 
 	mtx_lock(&txr->hn_tx_lock);
 	txr->hn_oactive = 0;
 	hn_xmit(txr, 0);
 	mtx_unlock(&txr->hn_tx_lock);
 }
 
 static void
 hn_channel_attach(struct hn_softc *sc, struct vmbus_channel *chan)
 {
 	struct hn_rx_ring *rxr;
 	int idx;
 
 	idx = vmbus_chan_subidx(chan);
 
 	KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
 	    ("invalid channel index %d, should > 0 && < %d",
 	     idx, sc->hn_rx_ring_inuse));
 	rxr = &sc->hn_rx_ring[idx];
 	KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
 	    ("RX ring %d already attached", idx));
 	rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
 
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "link RX ring %d to channel%u\n",
 		    idx, vmbus_chan_id(chan));
 	}
 
 	if (idx < sc->hn_tx_ring_inuse) {
 		struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
 
 		KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
 		    ("TX ring %d already attached", idx));
 		txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
 
 		txr->hn_chan = chan;
 		if (bootverbose) {
 			if_printf(sc->hn_ifp, "link TX ring %d to channel%u\n",
 			    idx, vmbus_chan_id(chan));
 		}
 	}
 
 	/* Bind channel to a proper CPU */
 	vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus);
 }
 
 static void
 hn_subchan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
 {
 
 	KASSERT(!vmbus_chan_is_primary(chan),
 	    ("subchannel callback on primary channel"));
 	hn_channel_attach(sc, chan);
 }
 
 static void
 hn_subchan_setup(struct hn_softc *sc)
 {
 	struct vmbus_channel **subchans;
 	int subchan_cnt = sc->hn_rx_ring_inuse - 1;
 	int i;
 
 	/* Wait for sub-channels setup to complete. */
 	subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
 
 	/* Attach the sub-channels. */
 	for (i = 0; i < subchan_cnt; ++i) {
 		struct vmbus_channel *subchan = subchans[i];
 
 		/* NOTE: Calling order is critical. */
 		hn_subchan_attach(sc, subchan);
 		hv_nv_subchan_attach(subchan,
 		    &sc->hn_rx_ring[vmbus_chan_subidx(subchan)]);
 	}
 
 	/* Release the sub-channels */
 	vmbus_subchan_rel(subchans, subchan_cnt);
 	if_printf(sc->hn_ifp, "%d sub-channels setup done\n", subchan_cnt);
 }
 
 static void
 hn_tx_taskq_create(void *arg __unused)
 {
 	if (!hn_share_tx_taskq)
 		return;
 
 	hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
 	    taskqueue_thread_enqueue, &hn_tx_taskq);
 	if (hn_bind_tx_taskq >= 0) {
 		int cpu = hn_bind_tx_taskq;
 		cpuset_t cpu_set;
 
 		if (cpu > mp_ncpus - 1)
 			cpu = mp_ncpus - 1;
 		CPU_SETOF(cpu, &cpu_set);
 		taskqueue_start_threads_cpuset(&hn_tx_taskq, 1, PI_NET,
 		    &cpu_set, "hn tx");
 	} else {
 		taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx");
 	}
 }
 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_FIRST,
     hn_tx_taskq_create, NULL);
 
 static void
 hn_tx_taskq_destroy(void *arg __unused)
 {
 	if (hn_tx_taskq != NULL)
 		taskqueue_free(hn_tx_taskq);
 }
 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_FIRST,
     hn_tx_taskq_destroy, NULL);
 
 static device_method_t netvsc_methods[] = {
         /* Device interface */
         DEVMETHOD(device_probe,         netvsc_probe),
         DEVMETHOD(device_attach,        netvsc_attach),
         DEVMETHOD(device_detach,        netvsc_detach),
         DEVMETHOD(device_shutdown,      netvsc_shutdown),
 
         { 0, 0 }
 };
 
 static driver_t netvsc_driver = {
         NETVSC_DEVNAME,
         netvsc_methods,
         sizeof(hn_softc_t)
 };
 
 static devclass_t netvsc_devclass;
 
 DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0);
 MODULE_VERSION(hn, 1);
 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
Index: projects/clang390-import/sys/dev/hyperv/netvsc/hv_rndis.h
===================================================================
--- projects/clang390-import/sys/dev/hyperv/netvsc/hv_rndis.h	(revision 305016)
+++ projects/clang390-import/sys/dev/hyperv/netvsc/hv_rndis.h	(revision 305017)
@@ -1,937 +1,923 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __HV_RNDIS_H__
 #define __HV_RNDIS_H__
 
 #include <net/rndis.h>
 
 /*
  * NDIS protocol version numbers
  */
 #define NDIS_VERSION_5_0                        0x00050000
 #define NDIS_VERSION_5_1                        0x00050001
 #define NDIS_VERSION_6_0                        0x00060000
 #define NDIS_VERSION_6_1                        0x00060001
 #define NDIS_VERSION_6_30                       0x0006001e
 
 #define NDIS_VERSION_MAJOR(ver)			(((ver) & 0xffff0000) >> 16)
 #define NDIS_VERSION_MINOR(ver)			((ver) & 0xffff)
 
 /*
  * Object Identifiers used by NdisRequest Query/Set Information
  */
 
 /*
  * General Objects
  */
 
 #define RNDIS_OID_GEN_SUPPORTED_LIST                    0x00010101
 #define RNDIS_OID_GEN_HARDWARE_STATUS                   0x00010102
 #define RNDIS_OID_GEN_MEDIA_SUPPORTED                   0x00010103
 #define RNDIS_OID_GEN_MEDIA_IN_USE                      0x00010104
 #define RNDIS_OID_GEN_MAXIMUM_LOOKAHEAD                 0x00010105
 #define RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE                0x00010106
 #define RNDIS_OID_GEN_LINK_SPEED                        0x00010107
 #define RNDIS_OID_GEN_TRANSMIT_BUFFER_SPACE             0x00010108
 #define RNDIS_OID_GEN_RECEIVE_BUFFER_SPACE              0x00010109
 #define RNDIS_OID_GEN_TRANSMIT_BLOCK_SIZE               0x0001010A
 #define RNDIS_OID_GEN_RECEIVE_BLOCK_SIZE                0x0001010B
 #define RNDIS_OID_GEN_VENDOR_ID                         0x0001010C
 #define RNDIS_OID_GEN_VENDOR_DESCRIPTION                0x0001010D
 #define RNDIS_OID_GEN_CURRENT_PACKET_FILTER             0x0001010E
 #define RNDIS_OID_GEN_CURRENT_LOOKAHEAD                 0x0001010F
 #define RNDIS_OID_GEN_DRIVER_VERSION                    0x00010110
 #define RNDIS_OID_GEN_MAXIMUM_TOTAL_SIZE                0x00010111
 #define RNDIS_OID_GEN_PROTOCOL_OPTIONS                  0x00010112
 #define RNDIS_OID_GEN_MAC_OPTIONS                       0x00010113
 #define RNDIS_OID_GEN_MEDIA_CONNECT_STATUS              0x00010114
 #define RNDIS_OID_GEN_MAXIMUM_SEND_PACKETS              0x00010115
 #define RNDIS_OID_GEN_VENDOR_DRIVER_VERSION             0x00010116
 #define RNDIS_OID_GEN_NETWORK_LAYER_ADDRESSES           0x00010118
 #define RNDIS_OID_GEN_TRANSPORT_HEADER_OFFSET           0x00010119
 #define RNDIS_OID_GEN_MACHINE_NAME                      0x0001021A
 #define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER            0x0001021B
 
 /*
  * For receive side scale
  */
 /* Query only */
 #define RNDIS_OID_GEN_RSS_CAPABILITIES			0x00010203
 /* Query and set */
 #define RNDIS_OID_GEN_RSS_PARAMETERS			0x00010204
 
 #define RNDIS_OID_GEN_XMIT_OK                           0x00020101
 #define RNDIS_OID_GEN_RCV_OK                            0x00020102
 #define RNDIS_OID_GEN_XMIT_ERROR                        0x00020103
 #define RNDIS_OID_GEN_RCV_ERROR                         0x00020104
 #define RNDIS_OID_GEN_RCV_NO_BUFFER                     0x00020105
 
 #define RNDIS_OID_GEN_DIRECTED_BYTES_XMIT               0x00020201
 #define RNDIS_OID_GEN_DIRECTED_FRAMES_XMIT              0x00020202
 #define RNDIS_OID_GEN_MULTICAST_BYTES_XMIT              0x00020203
 #define RNDIS_OID_GEN_MULTICAST_FRAMES_XMIT             0x00020204
 #define RNDIS_OID_GEN_BROADCAST_BYTES_XMIT              0x00020205
 #define RNDIS_OID_GEN_BROADCAST_FRAMES_XMIT             0x00020206
 #define RNDIS_OID_GEN_DIRECTED_BYTES_RCV                0x00020207
 #define RNDIS_OID_GEN_DIRECTED_FRAMES_RCV               0x00020208
 #define RNDIS_OID_GEN_MULTICAST_BYTES_RCV               0x00020209
 #define RNDIS_OID_GEN_MULTICAST_FRAMES_RCV              0x0002020A
 #define RNDIS_OID_GEN_BROADCAST_BYTES_RCV               0x0002020B
 #define RNDIS_OID_GEN_BROADCAST_FRAMES_RCV              0x0002020C
 
 #define RNDIS_OID_GEN_RCV_CRC_ERROR                     0x0002020D
 #define RNDIS_OID_GEN_TRANSMIT_QUEUE_LENGTH             0x0002020E
 
 #define RNDIS_OID_GEN_GET_TIME_CAPS                     0x0002020F
 #define RNDIS_OID_GEN_GET_NETCARD_TIME                  0x00020210
 
 /*
  * These are connection-oriented general OIDs.
  * These replace the above OIDs for connection-oriented media.
  */
 #define RNDIS_OID_GEN_CO_SUPPORTED_LIST                 0x00010101
 #define RNDIS_OID_GEN_CO_HARDWARE_STATUS                0x00010102
 #define RNDIS_OID_GEN_CO_MEDIA_SUPPORTED                0x00010103
 #define RNDIS_OID_GEN_CO_MEDIA_IN_USE                   0x00010104
 #define RNDIS_OID_GEN_CO_LINK_SPEED                     0x00010105
 #define RNDIS_OID_GEN_CO_VENDOR_ID                      0x00010106
 #define RNDIS_OID_GEN_CO_VENDOR_DESCRIPTION             0x00010107
 #define RNDIS_OID_GEN_CO_DRIVER_VERSION                 0x00010108
 #define RNDIS_OID_GEN_CO_PROTOCOL_OPTIONS               0x00010109
 #define RNDIS_OID_GEN_CO_MAC_OPTIONS                    0x0001010A
 #define RNDIS_OID_GEN_CO_MEDIA_CONNECT_STATUS           0x0001010B
 #define RNDIS_OID_GEN_CO_VENDOR_DRIVER_VERSION          0x0001010C
 #define RNDIS_OID_GEN_CO_MINIMUM_LINK_SPEED             0x0001010D
 
 #define RNDIS_OID_GEN_CO_GET_TIME_CAPS                  0x00010201
 #define RNDIS_OID_GEN_CO_GET_NETCARD_TIME               0x00010202
 
 /*
  * These are connection-oriented statistics OIDs.
  */
 #define RNDIS_OID_GEN_CO_XMIT_PDUS_OK                   0x00020101
 #define RNDIS_OID_GEN_CO_RCV_PDUS_OK                    0x00020102
 #define RNDIS_OID_GEN_CO_XMIT_PDUS_ERROR                0x00020103
 #define RNDIS_OID_GEN_CO_RCV_PDUS_ERROR                 0x00020104
 #define RNDIS_OID_GEN_CO_RCV_PDUS_NO_BUFFER             0x00020105
 
 
 #define RNDIS_OID_GEN_CO_RCV_CRC_ERROR                  0x00020201
 #define RNDIS_OID_GEN_CO_TRANSMIT_QUEUE_LENGTH          0x00020202
 #define RNDIS_OID_GEN_CO_BYTES_XMIT                     0x00020203
 #define RNDIS_OID_GEN_CO_BYTES_RCV                      0x00020204
 #define RNDIS_OID_GEN_CO_BYTES_XMIT_OUTSTANDING         0x00020205
 #define RNDIS_OID_GEN_CO_NETCARD_LOAD                   0x00020206
 
 /*
  * These are objects for Connection-oriented media call-managers.
  */
 #define RNDIS_OID_CO_ADD_PVC                            0xFF000001
 #define RNDIS_OID_CO_DELETE_PVC                         0xFF000002
 #define RNDIS_OID_CO_GET_CALL_INFORMATION               0xFF000003
 #define RNDIS_OID_CO_ADD_ADDRESS                        0xFF000004
 #define RNDIS_OID_CO_DELETE_ADDRESS                     0xFF000005
 #define RNDIS_OID_CO_GET_ADDRESSES                      0xFF000006
 #define RNDIS_OID_CO_ADDRESS_CHANGE                     0xFF000007
 #define RNDIS_OID_CO_SIGNALING_ENABLED                  0xFF000008
 #define RNDIS_OID_CO_SIGNALING_DISABLED                 0xFF000009
 
 
 /*
  * 802.3 Objects (Ethernet)
  */
 
 #define RNDIS_OID_802_3_PERMANENT_ADDRESS               0x01010101
 #define RNDIS_OID_802_3_CURRENT_ADDRESS                 0x01010102
 #define RNDIS_OID_802_3_MULTICAST_LIST                  0x01010103
 #define RNDIS_OID_802_3_MAXIMUM_LIST_SIZE               0x01010104
 #define RNDIS_OID_802_3_MAC_OPTIONS                     0x01010105
 
 /*
  *
  */
 #define NDIS_802_3_MAC_OPTION_PRIORITY                  0x00000001
 
 #define RNDIS_OID_802_3_RCV_ERROR_ALIGNMENT             0x01020101
 #define RNDIS_OID_802_3_XMIT_ONE_COLLISION              0x01020102
 #define RNDIS_OID_802_3_XMIT_MORE_COLLISIONS            0x01020103
 
 #define RNDIS_OID_802_3_XMIT_DEFERRED                   0x01020201
 #define RNDIS_OID_802_3_XMIT_MAX_COLLISIONS             0x01020202
 #define RNDIS_OID_802_3_RCV_OVERRUN                     0x01020203
 #define RNDIS_OID_802_3_XMIT_UNDERRUN                   0x01020204
 #define RNDIS_OID_802_3_XMIT_HEARTBEAT_FAILURE          0x01020205
 #define RNDIS_OID_802_3_XMIT_TIMES_CRS_LOST             0x01020206
 #define RNDIS_OID_802_3_XMIT_LATE_COLLISIONS            0x01020207
 
 
 /*
  * RNDIS MP custom OID for test
  */
 #define OID_RNDISMP_GET_RECEIVE_BUFFERS                 0xFFA0C90D // Query only
 
 /*
  * Remote NDIS offload parameters
  */
 #define RNDIS_OBJECT_TYPE_DEFAULT			0x80
  
 #define RNDIS_OFFLOAD_PARAMETERS_REVISION_3		3
 #define RNDIS_OFFLOAD_PARAMETERS_NO_CHANGE		0
 #define RNDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED		1
 #define RNDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED 		2
 #define RNDIS_OFFLOAD_PARAMETERS_LSOV1_ENABLED		2
 #define RNDIS_OFFLOAD_PARAMETERS_RSC_DISABLED		1
 #define RNDIS_OFFLOAD_PARAMETERS_RSC_ENABLED		2
 #define RNDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED		1
 #define RNDIS_OFFLOAD_PARAMETERS_TX_ENABLED_RX_DISABLED	2
 #define RNDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED	3
 #define RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED		4
 
 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE		1
 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4		0
 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6		1
 
 
 #define RNDIS_OID_TCP_OFFLOAD_CURRENT_CONFIG		0xFC01020B /* query only */
 #define RNDIS_OID_TCP_OFFLOAD_PARAMETERS		0xFC01020C /* set only */
 #define RNDIS_OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES	0xFC01020D/* query only */
 #define RNDIS_OID_TCP_CONNECTION_OFFLOAD_CURRENT_CONFIG	0xFC01020E /* query only */
 #define RNDIS_OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES	0xFC01020F /* query */
 #define RNDIS_OID_OFFLOAD_ENCAPSULATION			0x0101010A /* set/query */
 
 /*
  * NdisInitialize message
  */
 typedef struct rndis_initialize_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     uint32_t                                major_version;
     uint32_t                                minor_version;
     uint32_t                                max_xfer_size;
 } rndis_initialize_request;
 
 /*
  * Response to NdisInitialize
  */
 typedef struct rndis_initialize_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
     uint32_t                                major_version;
     uint32_t                                minor_version;
     uint32_t                                device_flags;
     /* RNDIS medium */
     uint32_t                                medium;
     uint32_t                                max_pkts_per_msg;
     uint32_t                                max_xfer_size;
     uint32_t                                pkt_align_factor;
     uint32_t                                af_list_offset;
     uint32_t                                af_list_size;
 } rndis_initialize_complete;
 
 /*
  * Call manager devices only: Information about an address family
  * supported by the device is appended to the response to NdisInitialize.
  */
 typedef struct rndis_co_address_family_ {
     /* RNDIS AF */
     uint32_t                                address_family;
     uint32_t                                major_version;
     uint32_t                                minor_version;
 } rndis_co_address_family;
 
 /*
  * NdisHalt message
  */
 typedef struct rndis_halt_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
 } rndis_halt_request;
 
 /*
  * NdisQueryRequest message
  */
 typedef struct rndis_query_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS OID */
     uint32_t                                oid;
     uint32_t                                info_buffer_length;
     uint32_t                                info_buffer_offset;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
 } rndis_query_request;
 
 /*
  * Response to NdisQueryRequest
  */
 typedef struct rndis_query_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
     uint32_t                                info_buffer_length;
     uint32_t                                info_buffer_offset;
 } rndis_query_complete;
 
 /*
  * NdisSetRequest message
  */
 typedef struct rndis_set_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS OID */
     uint32_t                                oid;
     uint32_t                                info_buffer_length;
     uint32_t                                info_buffer_offset;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
 } rndis_set_request;
 
 /*
  * Response to NdisSetRequest
  */
 typedef struct rndis_set_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
 } rndis_set_complete;
 
 /*
  * NdisReset message
  */
 typedef struct rndis_reset_request_ {
     uint32_t                                reserved;
 } rndis_reset_request;
 
 /*
  * Response to NdisReset
  */
 typedef struct rndis_reset_complete_ {
     /* RNDIS status */
     uint32_t                                status;
     uint32_t                                addressing_reset;
 } rndis_reset_complete;
 
 /*
  * NdisMIndicateStatus message
  */
 typedef struct rndis_indicate_status_ {
     /* RNDIS status */
     uint32_t                                status;
     uint32_t                                status_buf_length;
     uint32_t                                status_buf_offset;
 } rndis_indicate_status;
 
 /*
  * Diagnostic information passed as the status buffer in
  * rndis_indicate_status messages signifying error conditions.
  */
 typedef struct rndis_diagnostic_info_ {
     /* RNDIS status */
     uint32_t                                diag_status;
     uint32_t                                error_offset;
 } rndis_diagnostic_info;
 
 /*
  * NdisKeepAlive message
  */
 typedef struct rndis_keepalive_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
 } rndis_keepalive_request;
 
 /*
  * Response to NdisKeepAlive
  */  
 typedef struct rndis_keepalive_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
 } rndis_keepalive_complete;
 
 /*
  * Data message. All offset fields contain byte offsets from the beginning
  * of the rndis_packet structure. All length fields are in bytes.
  * VcHandle is set to 0 for connectionless data, otherwise it
  * contains the VC handle.
  */
 typedef struct rndis_packet_ {
     uint32_t                                data_offset;
     uint32_t                                data_length;
     uint32_t                                oob_data_offset;
     uint32_t                                oob_data_length;
     uint32_t                                num_oob_data_elements;
     uint32_t                                per_pkt_info_offset;
     uint32_t                                per_pkt_info_length;
     /* RNDIS handle */
     uint32_t                                vc_handle;
     uint32_t                                reserved;
 } rndis_packet;
 
 typedef struct rndis_packet_ex_ {
     uint32_t                                data_offset;
     uint32_t                                data_length;
     uint32_t                                oob_data_offset;
     uint32_t                                oob_data_length;
     uint32_t                                num_oob_data_elements;
     uint32_t                                per_pkt_info_offset;
     uint32_t                                per_pkt_info_length;
     /* RNDIS handle */
     uint32_t                                vc_handle;
     uint32_t                                reserved;
     uint64_t                                data_buf_id;
     uint32_t                                data_buf_offset;
     uint64_t                                next_header_buf_id;
     uint32_t                                next_header_byte_offset;
     uint32_t                                next_header_byte_count;
 } rndis_packet_ex;
 
 /*
  * Optional Out of Band data associated with a Data message.
  */
 typedef struct rndis_oobd_ {
     uint32_t                                size;
     /* RNDIS class ID */
     uint32_t                                type;
     uint32_t                                class_info_offset;
 } rndis_oobd;
 
 /*
  * Packet extension field contents associated with a Data message.
  */
 typedef struct rndis_per_packet_info_ {
     uint32_t                                size;
     uint32_t                                type;
     uint32_t                                per_packet_info_offset;
 } rndis_per_packet_info;
 
 typedef enum ndis_per_pkt_infotype_ {
 	tcpip_chksum_info,
 	ipsec_info,
 	tcp_large_send_info,
 	classification_handle_info,
 	ndis_reserved,
 	sgl_info,
 	ieee_8021q_info,
 	original_pkt_info,
 	pkt_cancel_id,
 	original_netbuf_list,
 	cached_netbuf_list,
 	short_pkt_padding_info,
 	max_perpkt_info
 } ndis_per_pkt_infotype;
 
 #define nbl_hash_value	pkt_cancel_id
 #define nbl_hash_info	original_netbuf_list
 
 typedef struct ndis_8021q_info_ {
 	union {
 		struct {
 			uint32_t   user_pri : 3;  /* User Priority */
 			uint32_t   cfi      : 1;  /* Canonical Format ID */
 			uint32_t   vlan_id  : 12;
 			uint32_t   reserved : 16;
 		} s1;
 		uint32_t    value;
 	} u1;
 } ndis_8021q_info;
 
 struct rndis_object_header {
 	uint8_t type;
 	uint8_t revision;
 	uint16_t size;
 };
 
 typedef struct rndis_offload_params_ {
 	struct rndis_object_header header;
 	uint8_t ipv4_csum;
 	uint8_t tcp_ipv4_csum;
 	uint8_t udp_ipv4_csum;
 	uint8_t tcp_ipv6_csum;
 	uint8_t udp_ipv6_csum;
 	uint8_t lso_v1;
 	uint8_t ip_sec_v1;
 	uint8_t lso_v2_ipv4;
 	uint8_t lso_v2_ipv6;
 	uint8_t tcp_connection_ipv4;
 	uint8_t tcp_connection_ipv6;
 	uint32_t flags;
 	uint8_t ip_sec_v2;
 	uint8_t ip_sec_v2_ipv4;
 	struct {
 		uint8_t rsc_ipv4;
 		uint8_t rsc_ipv6;
 	};
 	struct {
 		uint8_t encapsulated_packet_task_offload;
 		uint8_t encapsulation_types;
 	};
 
 } rndis_offload_params;
 
 
 typedef struct rndis_tcp_ip_csum_info_ {
 	union {
 		struct {
 			uint32_t is_ipv4:1;
 			uint32_t is_ipv6:1;
 			uint32_t tcp_csum:1;
 			uint32_t udp_csum:1;
 			uint32_t ip_header_csum:1;
 			uint32_t reserved:11;
 			uint32_t tcp_header_offset:10;
 		} xmit;
 		struct {
 			uint32_t tcp_csum_failed:1;
 			uint32_t udp_csum_failed:1;
 			uint32_t ip_csum_failed:1;
 			uint32_t tcp_csum_succeeded:1;
 			uint32_t udp_csum_succeeded:1;
 			uint32_t ip_csum_succeeded:1;
 			uint32_t loopback:1;
 			uint32_t tcp_csum_value_invalid:1;
 			uint32_t ip_csum_value_invalid:1;
 		} receive;
 		uint32_t  value;
 	};
 } rndis_tcp_ip_csum_info;
 
 struct rndis_hash_value {
 	uint32_t	hash_value;
 } __packed;
 
 struct rndis_hash_info {
 	uint32_t	hash_info;
 } __packed;
 
-#define NDIS_HASH_FUNCTION_MASK		0x000000FF	/* see hash function */
-#define NDIS_HASH_TYPE_MASK		0x00FFFF00	/* see hash type */
-
-/* hash function */
-#define NDIS_HASH_FUNCTION_TOEPLITZ	0x00000001
-
-/* hash type */
-#define NDIS_HASH_IPV4			0x00000100
-#define NDIS_HASH_TCP_IPV4		0x00000200
-#define NDIS_HASH_IPV6			0x00000400
-#define NDIS_HASH_IPV6_EX		0x00000800
-#define NDIS_HASH_TCP_IPV6		0x00001000
-#define NDIS_HASH_TCP_IPV6_EX		0x00002000
-
 typedef struct rndis_tcp_tso_info_ {
 	union {
 		struct {
 			uint32_t unused:30;
 			uint32_t type:1;
 			uint32_t reserved2:1;
 		} xmit;
 		struct {
 			uint32_t mss:20;
 			uint32_t tcp_header_offset:10;
 			uint32_t type:1;
 			uint32_t reserved2:1;
 		} lso_v1_xmit;
 		struct {
 			uint32_t tcp_payload:30;
 			uint32_t type:1;
 			uint32_t reserved2:1;
 		} lso_v1_xmit_complete;
 		struct {
 			uint32_t mss:20;
 			uint32_t tcp_header_offset:10;
 			uint32_t type:1;
 			uint32_t ip_version:1;
 		} lso_v2_xmit;
 		struct {
 			uint32_t reserved:30;
 			uint32_t type:1;
 			uint32_t reserved2:1;
 		} lso_v2_xmit_complete;
 		uint32_t  value;
 	};
 } rndis_tcp_tso_info;
 
 #define RNDIS_HASHVAL_PPI_SIZE	(sizeof(rndis_per_packet_info) + \
 				sizeof(struct rndis_hash_value))
 
 #define RNDIS_VLAN_PPI_SIZE	(sizeof(rndis_per_packet_info) + \
 				sizeof(ndis_8021q_info))
 
 #define RNDIS_CSUM_PPI_SIZE	(sizeof(rndis_per_packet_info) + \
 				sizeof(rndis_tcp_ip_csum_info))
 
 #define RNDIS_TSO_PPI_SIZE	(sizeof(rndis_per_packet_info) + \
 				sizeof(rndis_tcp_tso_info))
 
 /*
  * Format of Information buffer passed in a SetRequest for the OID
  * OID_GEN_RNDIS_CONFIG_PARAMETER.
  */
 typedef struct rndis_config_parameter_info_ {
     uint32_t                                parameter_name_offset;
     uint32_t                                parameter_name_length;
     uint32_t                                parameter_type;
     uint32_t                                parameter_value_offset;
     uint32_t                                parameter_value_length;
 } rndis_config_parameter_info;
 
 /*
  * Values for ParameterType in rndis_config_parameter_info
  */
 #define RNDIS_CONFIG_PARAM_TYPE_INTEGER     0
 #define RNDIS_CONFIG_PARAM_TYPE_STRING      2
 
 
 /*
  * CONDIS Miniport messages for connection oriented devices
  * that do not implement a call manager.
  */
 
 /*
  * CoNdisMiniportCreateVc message
  */
 typedef struct rcondis_mp_create_vc_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS handle */
     uint32_t                                ndis_vc_handle;
 } rcondis_mp_create_vc;
 
 /*
  * Response to CoNdisMiniportCreateVc
  */
 typedef struct rcondis_mp_create_vc_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
     /* RNDIS status */
     uint32_t                                status;
 } rcondis_mp_create_vc_complete;
 
 /*
  * CoNdisMiniportDeleteVc message
  */
 typedef struct rcondis_mp_delete_vc_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
 } rcondis_mp_delete_vc;
 
 /*
  * Response to CoNdisMiniportDeleteVc
  */
 typedef struct rcondis_mp_delete_vc_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
 } rcondis_mp_delete_vc_complete;
 
 /*
  * CoNdisMiniportQueryRequest message
  */
 typedef struct rcondis_mp_query_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS request type */
     uint32_t                                request_type;
     /* RNDIS OID */
     uint32_t                                oid;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
     uint32_t                                info_buf_length;
     uint32_t                                info_buf_offset;
 } rcondis_mp_query_request;
 
 /*
  * CoNdisMiniportSetRequest message
  */
 typedef struct rcondis_mp_set_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS request type */
     uint32_t                                request_type;
     /* RNDIS OID */
     uint32_t                                oid;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
     uint32_t                                info_buf_length;
     uint32_t                                info_buf_offset;
 } rcondis_mp_set_request;
 
 /*
  * CoNdisIndicateStatus message
  */
 typedef struct rcondis_indicate_status_ {
     /* RNDIS handle */
     uint32_t                                ndis_vc_handle;
     /* RNDIS status */
     uint32_t                                status;
     uint32_t                                status_buf_length;
     uint32_t                                status_buf_offset;
 } rcondis_indicate_status;
 
 /*
  * CONDIS Call/VC parameters
  */
 
 typedef struct rcondis_specific_parameters_ {
     uint32_t                                parameter_type;
     uint32_t                                parameter_length;
     uint32_t                                parameter_offset;
 } rcondis_specific_parameters;
 
 typedef struct rcondis_media_parameters_ {
     uint32_t                                flags;
     uint32_t                                reserved1;
     uint32_t                                reserved2;
     rcondis_specific_parameters             media_specific;
 } rcondis_media_parameters;
 
 typedef struct rndis_flowspec_ {
     uint32_t                                token_rate;
     uint32_t                                token_bucket_size;
     uint32_t                                peak_bandwidth;
     uint32_t                                latency;
     uint32_t                                delay_variation;
     uint32_t                                service_type;
     uint32_t                                max_sdu_size;
     uint32_t                                minimum_policed_size;
 } rndis_flowspec;
 
 typedef struct rcondis_call_manager_parameters_ {
     rndis_flowspec                          transmit;
     rndis_flowspec                          receive;
     rcondis_specific_parameters             call_mgr_specific;
 } rcondis_call_manager_parameters;
 
 /*
  * CoNdisMiniportActivateVc message
  */
 typedef struct rcondis_mp_activate_vc_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     uint32_t                                flags;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
     uint32_t                                media_params_offset;
     uint32_t                                media_params_length;
     uint32_t                                call_mgr_params_offset;
     uint32_t                                call_mgr_params_length;
 } rcondis_mp_activate_vc_request;
 
 /*
  * Response to CoNdisMiniportActivateVc
  */
 typedef struct rcondis_mp_activate_vc_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
 } rcondis_mp_activate_vc_complete;
 
 /*
  * CoNdisMiniportDeactivateVc message
  */
 typedef struct rcondis_mp_deactivate_vc_request_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     uint32_t                                flags;
     /* RNDIS handle */
     uint32_t                                device_vc_handle;
 } rcondis_mp_deactivate_vc_request;
 
 /*
  * Response to CoNdisMiniportDeactivateVc
  */
 typedef struct rcondis_mp_deactivate_vc_complete_ {
     /* RNDIS request ID */
     uint32_t                                request_id;
     /* RNDIS status */
     uint32_t                                status;
 } rcondis_mp_deactivate_vc_complete;
 
 /*
  * union with all of the RNDIS messages
  */
 typedef union rndis_msg_container_ {
     rndis_packet                            packet;
     rndis_initialize_request                init_request;
     rndis_halt_request                      halt_request;
     rndis_query_request                     query_request;
     rndis_set_request                       set_request;
     rndis_reset_request                     reset_request;
     rndis_keepalive_request                 keepalive_request;
     rndis_indicate_status                   indicate_status;
     rndis_initialize_complete               init_complete;
     rndis_query_complete                    query_complete;
     rndis_set_complete                      set_complete;
     rndis_reset_complete                    reset_complete;
     rndis_keepalive_complete                keepalive_complete;
     rcondis_mp_create_vc                    co_miniport_create_vc;
     rcondis_mp_delete_vc                    co_miniport_delete_vc;
     rcondis_indicate_status                 co_miniport_status;
     rcondis_mp_activate_vc_request          co_miniport_activate_vc;
     rcondis_mp_deactivate_vc_request        co_miniport_deactivate_vc;
     rcondis_mp_create_vc_complete           co_miniport_create_vc_complete;
     rcondis_mp_delete_vc_complete           co_miniport_delete_vc_complete;
     rcondis_mp_activate_vc_complete         co_miniport_activate_vc_complete;
     rcondis_mp_deactivate_vc_complete       co_miniport_deactivate_vc_complete;
     rndis_packet_ex                         packet_ex;
 } rndis_msg_container;
 
 /*
  * Remote NDIS message format
  */
 typedef struct rndis_msg_ {
     uint32_t                                ndis_msg_type;
 
     /*
      * Total length of this message, from the beginning
      * of the rndis_msg struct, in bytes.
      */
     uint32_t                                msg_len;
 
     /* Actual message */
     rndis_msg_container                     msg;
 } rndis_msg;
 
 
 /*
  * Handy macros
  */
 
 /*
  * get the size of an RNDIS message. Pass in the message type, 
  * rndis_set_request, rndis_packet for example
  */
 #define RNDIS_MESSAGE_SIZE(message)                             \
     (sizeof(message) + (sizeof(rndis_msg) - sizeof(rndis_msg_container)))
 
 /*
  * get pointer to info buffer with message pointer
  */
 #define MESSAGE_TO_INFO_BUFFER(message)                         \
     (((PUCHAR)(message)) + message->InformationBufferOffset)
 
 /*
  * get pointer to status buffer with message pointer
  */
 #define MESSAGE_TO_STATUS_BUFFER(message)                       \
     (((PUCHAR)(message)) + message->StatusBufferOffset)
 
 /*
  * get pointer to OOBD buffer with message pointer
  */
 #define MESSAGE_TO_OOBD_BUFFER(message)                         \
     (((PUCHAR)(message)) + message->OOBDataOffset)
 
 /*
  * get pointer to data buffer with message pointer
  */
 #define MESSAGE_TO_DATA_BUFFER(message)                         \
     (((PUCHAR)(message)) + message->PerPacketInfoOffset)
 
 /*
  * get pointer to contained message from NDIS_MESSAGE pointer
  */
 #define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_message)         \
     ((void *) &rndis_message->Message)
 
 /*
  * get pointer to contained message from NDIS_MESSAGE pointer
  */
 #define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_message)     \
     ((void *) rndis_message)
 
 
 
 /*
  * Structures used in OID_RNDISMP_GET_RECEIVE_BUFFERS
  */
 
 #define RNDISMP_RECEIVE_BUFFER_ELEM_FLAG_VMQ_RECEIVE_BUFFER 0x00000001
 
 typedef struct rndismp_rx_buf_elem_ {
     uint32_t                            flags;
     uint32_t                            length;
     uint64_t                            rx_buf_id;
     uint32_t                            gpadl_handle;
     void                                *rx_buf;
 } rndismp_rx_buf_elem;
 
 typedef struct rndismp_rx_bufs_info_ {
     uint32_t                            num_rx_bufs;
     rndismp_rx_buf_elem                 rx_buf_elems[1];
 } rndismp_rx_bufs_info;
 
 
 
 #define RNDIS_HEADER_SIZE (sizeof(rndis_msg) - sizeof(rndis_msg_container))
 
 #define NDIS_PACKET_TYPE_DIRECTED	0x00000001
 #define NDIS_PACKET_TYPE_MULTICAST	0x00000002
 #define NDIS_PACKET_TYPE_ALL_MULTICAST	0x00000004
 #define NDIS_PACKET_TYPE_BROADCAST	0x00000008
 #define NDIS_PACKET_TYPE_SOURCE_ROUTING	0x00000010
 #define NDIS_PACKET_TYPE_PROMISCUOUS	0x00000020
 #define NDIS_PACKET_TYPE_SMT		0x00000040
 #define NDIS_PACKET_TYPE_ALL_LOCAL	0x00000080
 #define NDIS_PACKET_TYPE_GROUP		0x00000100
 #define NDIS_PACKET_TYPE_ALL_FUNCTIONAL	0x00000200
 #define NDIS_PACKET_TYPE_FUNCTIONAL	0x00000400
 #define NDIS_PACKET_TYPE_MAC_FRAME	0x00000800
 
 /*
  * Externs
  */
 struct hn_rx_ring;
 struct hn_tx_ring;
 struct hn_recvinfo;
 
 int netvsc_recv(struct hn_rx_ring *rxr, const void *data, int dlen,
     const struct hn_recvinfo *info);
 void netvsc_channel_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr);
 
 void* hv_set_rppi_data(rndis_msg *rndis_mesg,
     uint32_t rppi_size,
     int pkt_type);
 
 void* hv_get_ppi_data(rndis_packet *rpkt, uint32_t type);
 
 #endif  /* __HV_RNDIS_H__ */
 
Index: projects/clang390-import/sys/dev/hyperv/netvsc/hv_rndis_filter.c
===================================================================
--- projects/clang390-import/sys/dev/hyperv/netvsc/hv_rndis_filter.c	(revision 305016)
+++ projects/clang390-import/sys/dev/hyperv/netvsc/hv_rndis_filter.c	(revision 305017)
@@ -1,1551 +1,1490 @@
 /*-
  * Copyright (c) 2009-2012,2016 Microsoft Corp.
  * Copyright (c) 2010-2012 Citrix Inc.
  * Copyright (c) 2012 NetApp Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_var.h>
 #include <net/ethernet.h>
 #include <sys/types.h>
 #include <machine/atomic.h>
 #include <sys/sema.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/include/vmbus_xact.h>
 #include <dev/hyperv/netvsc/hv_net_vsc.h>
 #include <dev/hyperv/netvsc/hv_rndis.h>
 #include <dev/hyperv/netvsc/hv_rndis_filter.h>
 #include <dev/hyperv/netvsc/if_hnreg.h>
 #include <dev/hyperv/netvsc/ndis.h>
 
 #define HV_RF_RECVINFO_VLAN	0x1
 #define HV_RF_RECVINFO_CSUM	0x2
 #define HV_RF_RECVINFO_HASHINF	0x4
 #define HV_RF_RECVINFO_HASHVAL	0x8
 #define HV_RF_RECVINFO_ALL		\
 	(HV_RF_RECVINFO_VLAN |		\
 	 HV_RF_RECVINFO_CSUM |		\
 	 HV_RF_RECVINFO_HASHINF |	\
 	 HV_RF_RECVINFO_HASHVAL)
 
 #define HN_RNDIS_RID_COMPAT_MASK	0xffff
 #define HN_RNDIS_RID_COMPAT_MAX		HN_RNDIS_RID_COMPAT_MASK
 
 #define HN_RNDIS_XFER_SIZE		2048
 
 /*
  * Forward declarations
  */
 static int  hv_rf_send_request(rndis_device *device, rndis_request *request,
 			       uint32_t message_type);
 static void hv_rf_receive_response(rndis_device *device,
     const rndis_msg *response);
 static void hv_rf_receive_indicate_status(rndis_device *device,
     const rndis_msg *response);
 static void hv_rf_receive_data(struct hn_rx_ring *rxr,
     const void *data, int dlen);
-static int  hv_rf_query_device(rndis_device *device, uint32_t oid,
-			       void *result, uint32_t *result_size);
 static inline int hv_rf_query_device_mac(rndis_device *device);
 static inline int hv_rf_query_device_link_status(rndis_device *device);
 static int  hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter);
 static int  hv_rf_init_device(rndis_device *device);
 static int  hv_rf_open_device(rndis_device *device);
 static int  hv_rf_close_device(rndis_device *device);
 int
 hv_rf_send_offload_request(struct hn_softc *sc,
     rndis_offload_params *offloads);
 
 static void hn_rndis_sent_halt(struct hn_send_ctx *sndc,
     struct hn_softc *sc, struct vmbus_channel *chan,
     const void *data, int dlen);
 static void hn_rndis_sent_cb(struct hn_send_ctx *sndc,
     struct hn_softc *sc, struct vmbus_channel *chan,
     const void *data, int dlen);
 static int hn_rndis_query(struct hn_softc *sc, uint32_t oid,
     const void *idata, size_t idlen, void *odata, size_t *odlen0);
 static int hn_rndis_set(struct hn_softc *sc, uint32_t oid, const void *data,
     size_t dlen);
 static int hn_rndis_conf_offload(struct hn_softc *sc);
+static int hn_rndis_get_rsscaps(struct hn_softc *sc, int *rxr_cnt);
+static int hn_rndis_conf_rss(struct hn_softc *sc, int nchan);
 
 static __inline uint32_t
 hn_rndis_rid(struct hn_softc *sc)
 {
 	uint32_t rid;
 
 again:
 	rid = atomic_fetchadd_int(&sc->hn_rndis_rid, 1);
 	if (rid == 0)
 		goto again;
 
 	/* Use upper 16 bits for non-compat RNDIS messages. */
 	return ((rid & 0xffff) << 16);
 }
 
 /*
  * Set the Per-Packet-Info with the specified type
  */
 void *
 hv_set_rppi_data(rndis_msg *rndis_mesg, uint32_t rppi_size,
 	int pkt_type)
 {
 	rndis_packet *rndis_pkt;
 	rndis_per_packet_info *rppi;
 
 	rndis_pkt = &rndis_mesg->msg.packet;
 	rndis_pkt->data_offset += rppi_size;
 
 	rppi = (rndis_per_packet_info *)((char *)rndis_pkt +
 	    rndis_pkt->per_pkt_info_offset + rndis_pkt->per_pkt_info_length);
 
 	rppi->size = rppi_size;
 	rppi->type = pkt_type;
 	rppi->per_packet_info_offset = sizeof(rndis_per_packet_info);
 
 	rndis_pkt->per_pkt_info_length += rppi_size;
 
 	return (rppi);
 }
 
 /*
  * Get the Per-Packet-Info with the specified type
  * return NULL if not found.
  */
 void *
 hv_get_ppi_data(rndis_packet *rpkt, uint32_t type)
 {
 	rndis_per_packet_info *ppi;
 	int len;
 
 	if (rpkt->per_pkt_info_offset == 0)
 		return (NULL);
 
 	ppi = (rndis_per_packet_info *)((unsigned long)rpkt +
 	    rpkt->per_pkt_info_offset);
 	len = rpkt->per_pkt_info_length;
 
 	while (len > 0) {
 		if (ppi->type == type)
 			return (void *)((unsigned long)ppi +
 			    ppi->per_packet_info_offset);
 
 		len -= ppi->size;
 		ppi = (rndis_per_packet_info *)((unsigned long)ppi + ppi->size);
 	}
 
 	return (NULL);
 }
 
 
 /*
  * Allow module_param to work and override to switch to promiscuous mode.
  */
 static inline rndis_device *
 hv_get_rndis_device(void)
 {
 	rndis_device *device;
 
 	device = malloc(sizeof(rndis_device), M_NETVSC, M_WAITOK | M_ZERO);
 
 	mtx_init(&device->req_lock, "HV-FRL", NULL, MTX_DEF);
 
 	/* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
 	STAILQ_INIT(&device->myrequest_list);
 
 	device->state = RNDIS_DEV_UNINITIALIZED;
 
 	return (device);
 }
 
 /*
  *
  */
 static inline void
 hv_put_rndis_device(rndis_device *device)
 {
 	mtx_destroy(&device->req_lock);
 	free(device, M_NETVSC);
 }
 
 /*
  *
  */
 static inline rndis_request *
 hv_rndis_request(rndis_device *device, uint32_t message_type,
 		 uint32_t message_length)
 {
 	rndis_request *request;
 	rndis_msg *rndis_mesg;
 	rndis_set_request *set;
 
 	request = malloc(sizeof(rndis_request), M_NETVSC, M_WAITOK | M_ZERO);
 
 	sema_init(&request->wait_sema, 0, "rndis sema");
 	
 	rndis_mesg = &request->request_msg;
 	rndis_mesg->ndis_msg_type = message_type;
 	rndis_mesg->msg_len = message_length;
 
 	/*
 	 * Set the request id. This field is always after the rndis header
 	 * for request/response packet types so we just use the set_request
 	 * as a template.
 	 */
 	set = &rndis_mesg->msg.set_request;
 	set->request_id = atomic_fetchadd_int(&device->new_request_id, 1) &
 	    HN_RNDIS_RID_COMPAT_MASK;
 
 	/* Add to the request list */
 	mtx_lock(&device->req_lock);
 	STAILQ_INSERT_TAIL(&device->myrequest_list, request, mylist_entry);
 	mtx_unlock(&device->req_lock);
 
 	return (request);
 }
 
 /*
  *
  */
 static inline void
 hv_put_rndis_request(rndis_device *device, rndis_request *request)
 {
 	mtx_lock(&device->req_lock);
 	/* Fixme:  Has O(n) performance */
 	/*
 	 * XXXKYS: Use Doubly linked lists.
 	 */
 	STAILQ_REMOVE(&device->myrequest_list, request, rndis_request_,
 	    mylist_entry);
 	mtx_unlock(&device->req_lock);
 
 	sema_destroy(&request->wait_sema);
 	free(request, M_NETVSC);
 }
 
 /*
  *
  */
 static int
 hv_rf_send_request(rndis_device *device, rndis_request *request,
     uint32_t message_type)
 {
 	struct hn_softc *sc = device->sc;
 	uint32_t send_buf_section_idx, tot_data_buf_len;
 	struct vmbus_gpa gpa[2];
 	int gpa_cnt, send_buf_section_size;
 	hn_sent_callback_t cb;
 
 	/* Set up the packet to send it */
 	tot_data_buf_len = request->request_msg.msg_len;
 
 	gpa_cnt = 1;
 	gpa[0].gpa_page = hv_get_phys_addr(&request->request_msg) >> PAGE_SHIFT;
 	gpa[0].gpa_len = request->request_msg.msg_len;
 	gpa[0].gpa_ofs = (unsigned long)&request->request_msg & (PAGE_SIZE - 1);
 
 	if (gpa[0].gpa_ofs + gpa[0].gpa_len > PAGE_SIZE) {
 		gpa_cnt = 2;
 		gpa[0].gpa_len = PAGE_SIZE - gpa[0].gpa_ofs;
 		gpa[1].gpa_page =
 		    hv_get_phys_addr((char*)&request->request_msg +
 		    gpa[0].gpa_len) >> PAGE_SHIFT;
 		gpa[1].gpa_ofs = 0;
 		gpa[1].gpa_len = request->request_msg.msg_len - gpa[0].gpa_len;
 	}
 
 	if (message_type != REMOTE_NDIS_HALT_MSG)
 		cb = hn_rndis_sent_cb;
 	else
 		cb = hn_rndis_sent_halt;
 
 	if (tot_data_buf_len < sc->hn_chim_szmax) {
 		send_buf_section_idx = hn_chim_alloc(sc);
 		if (send_buf_section_idx != HN_NVS_CHIM_IDX_INVALID) {
 			uint8_t *dest = sc->hn_chim +
 				(send_buf_section_idx * sc->hn_chim_szmax);
 
 			memcpy(dest, &request->request_msg, request->request_msg.msg_len);
 			send_buf_section_size = tot_data_buf_len;
 			gpa_cnt = 0;
 			goto sendit;
 		}
 		/* Failed to allocate chimney send buffer; move on */
 	}
 	send_buf_section_idx = HN_NVS_CHIM_IDX_INVALID;
 	send_buf_section_size = 0;
 
 sendit:
 	hn_send_ctx_init(&request->send_ctx, cb, request,
 	    send_buf_section_idx, send_buf_section_size);
 	return hv_nv_on_send(sc->hn_prichan, HN_NVS_RNDIS_MTYPE_CTRL,
 	    &request->send_ctx, gpa, gpa_cnt);
 }
 
 /*
  * RNDIS filter receive response
  */
 static void 
 hv_rf_receive_response(rndis_device *device, const rndis_msg *response)
 {
 	rndis_request *request = NULL;
 	rndis_request *next_request;
 	boolean_t found = FALSE;
 
 	mtx_lock(&device->req_lock);
 	request = STAILQ_FIRST(&device->myrequest_list);
 	while (request != NULL) {
 		/*
 		 * All request/response message contains request_id as the
 		 * first field
 		 */
 		if (request->request_msg.msg.init_request.request_id ==
 				      response->msg.init_complete.request_id) {
 			found = TRUE;
 			break;
 		}
 		next_request = STAILQ_NEXT(request, mylist_entry);
 		request = next_request;
 	}
 	mtx_unlock(&device->req_lock);
 
 	if (found) {
 		if (response->msg_len <= sizeof(rndis_msg)) {
 			memcpy(&request->response_msg, response,
 			    response->msg_len);
 		} else {
 			request->response_msg.msg.init_complete.status =
 			    RNDIS_STATUS_BUFFER_OVERFLOW;
 		}
 		sema_post(&request->wait_sema);
 	}
 }
 
 int
 hv_rf_send_offload_request(struct hn_softc *sc,
     rndis_offload_params *offloads)
 {
 	rndis_request *request;
 	rndis_set_request *set;
 	rndis_offload_params *offload_req;
 	rndis_set_complete *set_complete;	
 	rndis_device *rndis_dev = sc->rndis_dev;
 	device_t dev = sc->hn_dev;
 	uint32_t extlen = sizeof(rndis_offload_params);
 	int ret;
 
 	if (sc->hn_nvs_ver <= NVSP_PROTOCOL_VERSION_4) {
 		extlen = VERSION_4_OFFLOAD_SIZE;
 		/* On NVSP_PROTOCOL_VERSION_4 and below, we do not support
 		 * UDP checksum offload.
 		 */
 		offloads->udp_ipv4_csum = 0;
 		offloads->udp_ipv6_csum = 0;
 	}
 
 	request = hv_rndis_request(rndis_dev, REMOTE_NDIS_SET_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen);
 	if (!request)
 		return (ENOMEM);
 
 	set = &request->request_msg.msg.set_request;
 	set->oid = RNDIS_OID_TCP_OFFLOAD_PARAMETERS;
 	set->info_buffer_length = extlen;
 	set->info_buffer_offset = sizeof(rndis_set_request);
 	set->device_vc_handle = 0;
 
 	offload_req = (rndis_offload_params *)((unsigned long)set +
 	    set->info_buffer_offset);
 	*offload_req = *offloads;
 	offload_req->header.type = RNDIS_OBJECT_TYPE_DEFAULT;
 	offload_req->header.revision = RNDIS_OFFLOAD_PARAMETERS_REVISION_3;
 	offload_req->header.size = extlen;
 
 	ret = hv_rf_send_request(rndis_dev, request, REMOTE_NDIS_SET_MSG);
 	if (ret != 0) {
 		device_printf(dev, "hv send offload request failed, ret=%d!\n",
 		    ret);
 		goto cleanup;
 	}
 
 	ret = sema_timedwait(&request->wait_sema, 5 * hz);
 	if (ret != 0) {
 		device_printf(dev, "hv send offload request timeout\n");
 		goto cleanup;
 	}
 
 	set_complete = &request->response_msg.msg.set_complete;
 	if (set_complete->status == RNDIS_STATUS_SUCCESS) {
 		device_printf(dev, "hv send offload request succeeded\n");
 		ret = 0;
 	} else {
 		if (set_complete->status == RNDIS_STATUS_NOT_SUPPORTED) {
 			device_printf(dev, "HV Not support offload\n");
 			ret = 0;
 		} else {
 			ret = set_complete->status;
 		}
 	}
 
 cleanup:
 	hv_put_rndis_request(rndis_dev, request);
 
 	return (ret);
 }
 
 /*
  * RNDIS filter receive indicate status
  */
 static void 
 hv_rf_receive_indicate_status(rndis_device *device, const rndis_msg *response)
 {
 	const rndis_indicate_status *indicate = &response->msg.indicate_status;
 		
 	switch(indicate->status) {
 	case RNDIS_STATUS_MEDIA_CONNECT:
 		netvsc_linkstatus_callback(device->sc, 1);
 		break;
 	case RNDIS_STATUS_MEDIA_DISCONNECT:
 		netvsc_linkstatus_callback(device->sc, 0);
 		break;
 	default:
 		/* TODO: */
 		device_printf(device->sc->hn_dev,
 		    "unknown status %d received\n", indicate->status);
 		break;
 	}
 }
 
 static int
 hv_rf_find_recvinfo(const rndis_packet *rpkt, struct hn_recvinfo *info)
 {
 	const rndis_per_packet_info *ppi;
 	uint32_t mask, len;
 
 	info->vlan_info = NULL;
 	info->csum_info = NULL;
 	info->hash_info = NULL;
 	info->hash_value = NULL;
 
 	if (rpkt->per_pkt_info_offset == 0)
 		return 0;
 
 	ppi = (const rndis_per_packet_info *)
 	    ((const uint8_t *)rpkt + rpkt->per_pkt_info_offset);
 	len = rpkt->per_pkt_info_length;
 	mask = 0;
 
 	while (len != 0) {
 		const void *ppi_dptr;
 		uint32_t ppi_dlen;
 
 		if (__predict_false(ppi->size < ppi->per_packet_info_offset))
 			return EINVAL;
 		ppi_dlen = ppi->size - ppi->per_packet_info_offset;
 		ppi_dptr = (const uint8_t *)ppi + ppi->per_packet_info_offset;
 
 		switch (ppi->type) {
 		case ieee_8021q_info:
 			if (__predict_false(ppi_dlen < sizeof(ndis_8021q_info)))
 				return EINVAL;
 			info->vlan_info = ppi_dptr;
 			mask |= HV_RF_RECVINFO_VLAN;
 			break;
 
 		case tcpip_chksum_info:
 			if (__predict_false(ppi_dlen <
 			    sizeof(rndis_tcp_ip_csum_info)))
 				return EINVAL;
 			info->csum_info = ppi_dptr;
 			mask |= HV_RF_RECVINFO_CSUM;
 			break;
 
 		case nbl_hash_value:
 			if (__predict_false(ppi_dlen <
 			    sizeof(struct rndis_hash_value)))
 				return EINVAL;
 			info->hash_value = ppi_dptr;
 			mask |= HV_RF_RECVINFO_HASHVAL;
 			break;
 
 		case nbl_hash_info:
 			if (__predict_false(ppi_dlen <
 			    sizeof(struct rndis_hash_info)))
 				return EINVAL;
 			info->hash_info = ppi_dptr;
 			mask |= HV_RF_RECVINFO_HASHINF;
 			break;
 
 		default:
 			goto skip;
 		}
 
 		if (mask == HV_RF_RECVINFO_ALL) {
 			/* All found; done */
 			break;
 		}
 skip:
 		if (__predict_false(len < ppi->size))
 			return EINVAL;
 		len -= ppi->size;
 		ppi = (const rndis_per_packet_info *)
 		    ((const uint8_t *)ppi + ppi->size);
 	}
 	return 0;
 }
 
 /*
  * RNDIS filter receive data
  */
 static void
 hv_rf_receive_data(struct hn_rx_ring *rxr, const void *data, int dlen)
 {
 	const rndis_msg *message = data;
 	const rndis_packet *rndis_pkt;
 	uint32_t data_offset;
 	struct hn_recvinfo info;
 
 	rndis_pkt = &message->msg.packet;
 
 	/*
 	 * Fixme:  Handle multiple rndis pkt msgs that may be enclosed in this
 	 * netvsc packet (ie tot_data_buf_len != message_length)
 	 */
 
 	/* Remove rndis header, then pass data packet up the stack */
 	data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
 
 	dlen -= data_offset;
 	if (dlen < rndis_pkt->data_length) {
 		if_printf(rxr->hn_ifp,
 		    "total length %u is less than data length %u\n",
 		    dlen, rndis_pkt->data_length);
 		return;
 	}
 
 	dlen = rndis_pkt->data_length;
 	data = (const uint8_t *)data + data_offset;
 
 	if (hv_rf_find_recvinfo(rndis_pkt, &info)) {
 		if_printf(rxr->hn_ifp, "recvinfo parsing failed\n");
 		return;
 	}
 	netvsc_recv(rxr, data, dlen, &info);
 }
 
 /*
  * RNDIS filter on receive
  */
 int
 hv_rf_on_receive(struct hn_softc *sc, struct hn_rx_ring *rxr,
     const void *data, int dlen)
 {
 	rndis_device *rndis_dev;
 	const rndis_msg *rndis_hdr;
 	const struct rndis_comp_hdr *comp;
 
 	rndis_dev = sc->rndis_dev;
 	if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED)
 		return (EINVAL);
 
 	rndis_hdr = data;
 	switch (rndis_hdr->ndis_msg_type) {
 	/* data message */
 	case REMOTE_NDIS_PACKET_MSG:
 		hv_rf_receive_data(rxr, data, dlen);
 		break;
 
 	/* completion messages */
 	case REMOTE_NDIS_INITIALIZE_CMPLT:
 	case REMOTE_NDIS_QUERY_CMPLT:
 	case REMOTE_NDIS_SET_CMPLT:
 	case REMOTE_NDIS_KEEPALIVE_CMPLT:
 		comp = data;
 		if (comp->rm_rid <= HN_RNDIS_RID_COMPAT_MAX) {
 			/* Transition time compat code */
 			hv_rf_receive_response(rndis_dev, rndis_hdr);
 		} else {
 			vmbus_xact_ctx_wakeup(sc->hn_xact, data, dlen);
 		}
 		break;
 
 	/* notification message */
 	case REMOTE_NDIS_INDICATE_STATUS_MSG:
 		hv_rf_receive_indicate_status(rndis_dev, rndis_hdr);
 		break;
 
 	case REMOTE_NDIS_RESET_CMPLT:
 		/*
 		 * Reset completed, no rid.
 		 *
 		 * NOTE:
 		 * RESET is not issued by hn(4), so this message should
 		 * _not_ be observed.
 		 */
 		if_printf(sc->hn_ifp, "RESET CMPLT received\n");
 		break;
 
 	default:
 		if_printf(sc->hn_ifp, "unknown RNDIS message 0x%x\n",
 			rndis_hdr->ndis_msg_type);
 		break;
 	}
 	return (0);
 }
 
 /*
- * RNDIS filter query device
- */
-static int
-hv_rf_query_device(rndis_device *device, uint32_t oid, void *result,
-		   uint32_t *result_size)
-{
-	rndis_request *request;
-	uint32_t in_result_size = *result_size;
-	rndis_query_request *query;
-	rndis_query_complete *query_complete;
-	int ret = 0;
-
-	*result_size = 0;
-	request = hv_rndis_request(device, REMOTE_NDIS_QUERY_MSG,
-	    RNDIS_MESSAGE_SIZE(rndis_query_request));
-	if (request == NULL) {
-		ret = -1;
-		goto cleanup;
-	}
-
-	/* Set up the rndis query */
-	query = &request->request_msg.msg.query_request;
-	query->oid = oid;
-	query->info_buffer_offset = sizeof(rndis_query_request); 
-	query->info_buffer_length = 0;
-	query->device_vc_handle = 0;
-
-	if (oid == RNDIS_OID_GEN_RSS_CAPABILITIES) {
-		struct rndis_recv_scale_cap *cap;
-
-		request->request_msg.msg_len += 
-			sizeof(struct rndis_recv_scale_cap);
-		query->info_buffer_length = sizeof(struct rndis_recv_scale_cap);
-		cap = (struct rndis_recv_scale_cap *)((unsigned long)query + 
-						query->info_buffer_offset);
-		cap->hdr.type = RNDIS_OBJECT_TYPE_RSS_CAPABILITIES;
-		cap->hdr.rev = RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
-		cap->hdr.size = sizeof(struct rndis_recv_scale_cap);
-	}
-
-	ret = hv_rf_send_request(device, request, REMOTE_NDIS_QUERY_MSG);
-	if (ret != 0) {
-		/* Fixme:  printf added */
-		printf("RNDISFILTER request failed to Send!\n");
-		goto cleanup;
-	}
-
-	sema_wait(&request->wait_sema);
-
-	/* Copy the response back */
-	query_complete = &request->response_msg.msg.query_complete;
-	
-	if (query_complete->info_buffer_length > in_result_size) {
-		ret = EINVAL;
-		goto cleanup;
-	}
-
-	memcpy(result, (void *)((unsigned long)query_complete +
-	    query_complete->info_buffer_offset),
-	    query_complete->info_buffer_length);
-
-	*result_size = query_complete->info_buffer_length;
-
-cleanup:
-	if (request != NULL)
-		hv_put_rndis_request(device, request);
-
-	return (ret);
-}
-
-/*
  * RNDIS filter query device MAC address
  */
 static int
 hv_rf_query_device_mac(rndis_device *device)
 {
 	struct hn_softc *sc = device->sc;
 	size_t hwaddr_len;
 	int error;
 
 	hwaddr_len = ETHER_ADDR_LEN;
 	error = hn_rndis_query(sc, OID_802_3_PERMANENT_ADDRESS, NULL, 0,
 	    device->hw_mac_addr, &hwaddr_len);
 	if (error)
-		return error;
+		return (error);
 	if (hwaddr_len != ETHER_ADDR_LEN) {
 		if_printf(sc->hn_ifp, "invalid hwaddr len %zu\n", hwaddr_len);
-		return EINVAL;
+		return (EINVAL);
 	}
-	return 0;
+	return (0);
 }
 
 /*
  * RNDIS filter query device link status
  */
 static inline int
 hv_rf_query_device_link_status(rndis_device *device)
 {
-	uint32_t size = sizeof(uint32_t);
+	struct hn_softc *sc = device->sc;
+	size_t size;
+	int error;
 
-	return (hv_rf_query_device(device,
-	    RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &device->link_status, &size));
+	size = sizeof(uint32_t);
+	error = hn_rndis_query(sc, OID_GEN_MEDIA_CONNECT_STATUS, NULL, 0,
+	    &device->link_status, &size);
+	if (error)
+		return (error);
+	if (size != sizeof(uint32_t)) {
+		if_printf(sc->hn_ifp, "invalid link status len %zu\n", size);
+		return (EINVAL);
+	}
+	return (0);
 }
 
-static uint8_t netvsc_hash_key[HASH_KEYLEN] = {
+static uint8_t netvsc_hash_key[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
 };
 
 /*
- * RNDIS set vRSS parameters
- */
-static int
-hv_rf_set_rss_param(rndis_device *device, int num_queue)
-{
-	rndis_request *request;
-	rndis_set_request *set;
-	rndis_set_complete *set_complete;
-	rndis_recv_scale_param *rssp;
-	uint32_t extlen = sizeof(rndis_recv_scale_param) +
-	    (4 * ITAB_NUM) + HASH_KEYLEN;
-	uint32_t *itab, status;
-	uint8_t *keyp;
-	int i, ret;
-
-
-	request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG,
-	    RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen);
-	if (request == NULL) {
-		if (bootverbose)
-			printf("Netvsc: No memory to set vRSS parameters.\n");
-		ret = -1;
-		goto cleanup;
-	}
-
-	set = &request->request_msg.msg.set_request;
-	set->oid = RNDIS_OID_GEN_RSS_PARAMETERS;
-	set->info_buffer_length = extlen;
-	set->info_buffer_offset = sizeof(rndis_set_request);
-	set->device_vc_handle = 0;
-
-	/* Fill out the rssp parameter structure */
-	rssp = (rndis_recv_scale_param *)(set + 1);
-	rssp->hdr.type = RNDIS_OBJECT_TYPE_RSS_PARAMETERS;
-	rssp->hdr.rev = RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
-	rssp->hdr.size = sizeof(rndis_recv_scale_param);
-	rssp->flag = 0;
-	rssp->hashinfo = RNDIS_HASH_FUNC_TOEPLITZ | RNDIS_HASH_IPV4 |
-	    RNDIS_HASH_TCP_IPV4 | RNDIS_HASH_IPV6 | RNDIS_HASH_TCP_IPV6;
-	rssp->indirect_tabsize = 4 * ITAB_NUM;
-	rssp->indirect_taboffset = sizeof(rndis_recv_scale_param);
-	rssp->hashkey_size = HASH_KEYLEN;
-	rssp->hashkey_offset = rssp->indirect_taboffset +
-	    rssp->indirect_tabsize;
-
-	/* Set indirection table entries */
-	itab = (uint32_t *)(rssp + 1);
-	for (i = 0; i < ITAB_NUM; i++)
-		itab[i] = i % num_queue;
-
-	/* Set hash key values */
-	keyp = (uint8_t *)((unsigned long)rssp + rssp->hashkey_offset);
-	for (i = 0; i < HASH_KEYLEN; i++)
-		keyp[i] = netvsc_hash_key[i];
-
-	ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG);
-	if (ret != 0) {
-		goto cleanup;
-	}
-
-	/*
-	 * Wait for the response from the host.  Another thread will signal
-	 * us when the response has arrived.  In the failure case,
-	 * sema_timedwait() returns a non-zero status after waiting 5 seconds.
-	 */
-	ret = sema_timedwait(&request->wait_sema, 5 * hz);
-	if (ret == 0) {
-		/* Response received, check status */
-		set_complete = &request->response_msg.msg.set_complete;
-		status = set_complete->status;
-		if (status != RNDIS_STATUS_SUCCESS) {
-			/* Bad response status, return error */
-			if (bootverbose)
-				printf("Netvsc: Failed to set vRSS "
-				    "parameters.\n");
-			ret = -2;
-		} else {
-			if (bootverbose)
-				printf("Netvsc: Successfully set vRSS "
-				    "parameters.\n");
-		}
-	} else {
-		/*
-		 * We cannot deallocate the request since we may still
-		 * receive a send completion for it.
-		 */
-		printf("Netvsc: vRSS set timeout, id = %u, ret = %d\n",
-		    request->request_msg.msg.init_request.request_id, ret);
-		goto exit;
-	}
-
-cleanup:
-	if (request != NULL) {
-		hv_put_rndis_request(device, request);
-	}
-exit:
-	return (ret);
-}
-
-/*
  * RNDIS filter set packet filter
  * Sends an rndis request with the new filter, then waits for a response
  * from the host.
  * Returns zero on success, non-zero on failure.
  */
 static int
 hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter)
 {
 	rndis_request *request;
 	rndis_set_request *set;
 	rndis_set_complete *set_complete;
 	uint32_t status;
 	int ret;
 
 	request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_set_request) + sizeof(uint32_t));
 	if (request == NULL) {
 		ret = -1;
 		goto cleanup;
 	}
 
 	/* Set up the rndis set */
 	set = &request->request_msg.msg.set_request;
 	set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
 	set->info_buffer_length = sizeof(uint32_t);
 	set->info_buffer_offset = sizeof(rndis_set_request); 
 
 	memcpy((void *)((unsigned long)set + sizeof(rndis_set_request)),
 	    &new_filter, sizeof(uint32_t));
 
 	ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG);
 	if (ret != 0) {
 		goto cleanup;
 	}
 
 	/*
 	 * Wait for the response from the host.  Another thread will signal
 	 * us when the response has arrived.  In the failure case,
 	 * sema_timedwait() returns a non-zero status after waiting 5 seconds.
 	 */
 	ret = sema_timedwait(&request->wait_sema, 5 * hz);
 	if (ret == 0) {
 		/* Response received, check status */
 		set_complete = &request->response_msg.msg.set_complete;
 		status = set_complete->status;
 		if (status != RNDIS_STATUS_SUCCESS) {
 			/* Bad response status, return error */
 			ret = -2;
 		}
 	} else {
 		/*
 		 * We cannot deallocate the request since we may still
 		 * receive a send completion for it.
 		 */
 		goto exit;
 	}
 
 cleanup:
 	if (request != NULL) {
 		hv_put_rndis_request(device, request);
 	}
 exit:
 	return (ret);
 }
 
 static const void *
 hn_rndis_xact_execute(struct hn_softc *sc, struct vmbus_xact *xact, uint32_t rid,
     size_t reqlen, size_t *comp_len0, uint32_t comp_type)
 {
 	struct vmbus_gpa gpa[HN_XACT_REQ_PGCNT];
 	const struct rndis_comp_hdr *comp;
 	bus_addr_t paddr;
 	size_t comp_len, min_complen = *comp_len0;
 	int gpa_cnt, error;
 
 	KASSERT(rid > HN_RNDIS_RID_COMPAT_MAX, ("invalid rid %u\n", rid));
 	KASSERT(reqlen <= HN_XACT_REQ_SIZE && reqlen > 0,
 	    ("invalid request length %zu", reqlen));
 	KASSERT(min_complen >= sizeof(*comp),
 	    ("invalid minimum complete len %zu", min_complen));
 
 	/*
 	 * Setup the SG list.
 	 */
 	paddr = vmbus_xact_req_paddr(xact);
 	KASSERT((paddr & PAGE_MASK) == 0,
 	    ("vmbus xact request is not page aligned 0x%jx", (uintmax_t)paddr));
 	for (gpa_cnt = 0; gpa_cnt < HN_XACT_REQ_PGCNT; ++gpa_cnt) {
 		int len = PAGE_SIZE;
 
 		if (reqlen == 0)
 			break;
 		if (reqlen < len)
 			len = reqlen;
 
 		gpa[gpa_cnt].gpa_page = atop(paddr) + gpa_cnt;
 		gpa[gpa_cnt].gpa_len = len;
 		gpa[gpa_cnt].gpa_ofs = 0;
 
 		reqlen -= len;
 	}
 	KASSERT(reqlen == 0, ("still have %zu request data left", reqlen));
 
 	/*
 	 * Send this RNDIS control message and wait for its completion
 	 * message.
 	 */
 	vmbus_xact_activate(xact);
 	error = hv_nv_on_send(sc->hn_prichan, HN_NVS_RNDIS_MTYPE_CTRL,
 	    &hn_send_ctx_none, gpa, gpa_cnt);
 	if (error) {
 		vmbus_xact_deactivate(xact);
 		if_printf(sc->hn_ifp, "RNDIS ctrl send failed: %d\n", error);
 		return (NULL);
 	}
 	comp = vmbus_xact_wait(xact, &comp_len);
 
 	/*
 	 * Check this RNDIS complete message.
 	 */
 	if (comp_len < min_complen) {
 		if (comp_len >= sizeof(*comp)) {
 			/* rm_status field is valid */
 			if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu, "
 			    "status 0x%08x\n", comp_len, comp->rm_status);
 		} else {
 			if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu\n",
 			    comp_len);
 		}
 		return (NULL);
 	}
 	if (comp->rm_len < min_complen) {
 		if_printf(sc->hn_ifp, "invalid RNDIS comp msglen %u\n",
 		    comp->rm_len);
 		return (NULL);
 	}
 	if (comp->rm_type != comp_type) {
 		if_printf(sc->hn_ifp, "unexpected RNDIS comp 0x%08x, "
 		    "expect 0x%08x\n", comp->rm_type, comp_type);
 		return (NULL);
 	}
 	if (comp->rm_rid != rid) {
 		if_printf(sc->hn_ifp, "RNDIS comp rid mismatch %u, "
 		    "expect %u\n", comp->rm_rid, rid);
 		return (NULL);
 	}
 	/* All pass! */
 	*comp_len0 = comp_len;
 	return (comp);
 }
 
 static int
 hn_rndis_query(struct hn_softc *sc, uint32_t oid,
     const void *idata, size_t idlen, void *odata, size_t *odlen0)
 {
 	struct rndis_query_req *req;
 	const struct rndis_query_comp *comp;
 	struct vmbus_xact *xact;
 	size_t reqlen, odlen = *odlen0, comp_len;
 	int error, ofs;
 	uint32_t rid;
 
 	reqlen = sizeof(*req) + idlen;
 	xact = vmbus_xact_get(sc->hn_xact, reqlen);
 	if (xact == NULL) {
 		if_printf(sc->hn_ifp, "no xact for RNDIS query 0x%08x\n", oid);
 		return (ENXIO);
 	}
 	rid = hn_rndis_rid(sc);
 	req = vmbus_xact_req_data(xact);
 	req->rm_type = REMOTE_NDIS_QUERY_MSG;
 	req->rm_len = reqlen;
 	req->rm_rid = rid;
 	req->rm_oid = oid;
 	/*
 	 * XXX
 	 * This is _not_ RNDIS Spec conforming:
 	 * "This MUST be set to 0 when there is no input data
 	 *  associated with the OID."
 	 *
 	 * If this field was set to 0 according to the RNDIS Spec,
 	 * Hyper-V would set non-SUCCESS status in the query
 	 * completion.
 	 */
 	req->rm_infobufoffset = RNDIS_QUERY_REQ_INFOBUFOFFSET;
 
 	if (idlen > 0) {
 		req->rm_infobuflen = idlen;
 		/* Input data immediately follows RNDIS query. */
 		memcpy(req + 1, idata, idlen);
 	}
 
 	comp_len = sizeof(*comp) + odlen;
 	comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len,
 	    REMOTE_NDIS_QUERY_CMPLT);
 	if (comp == NULL) {
 		if_printf(sc->hn_ifp, "exec RNDIS query 0x%08x failed\n", oid);
 		error = EIO;
 		goto done;
 	}
 
 	if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
 		if_printf(sc->hn_ifp, "RNDIS query 0x%08x failed: "
 		    "status 0x%08x\n", oid, comp->rm_status);
 		error = EIO;
 		goto done;
 	}
 	if (comp->rm_infobuflen == 0 || comp->rm_infobufoffset == 0) {
 		/* No output data! */
 		if_printf(sc->hn_ifp, "RNDIS query 0x%08x, no data\n", oid);
 		*odlen0 = 0;
 		error = 0;
 		goto done;
 	}
 
 	/*
 	 * Check output data length and offset.
 	 */
 	/* ofs is the offset from the beginning of comp. */
 	ofs = RNDIS_QUERY_COMP_INFOBUFABS(comp->rm_infobufoffset);
 	if (ofs < sizeof(*comp) || ofs + comp->rm_infobuflen > comp_len) {
 		if_printf(sc->hn_ifp, "RNDIS query invalid comp ib off/len, "
 		    "%u/%u\n", comp->rm_infobufoffset, comp->rm_infobuflen);
 		error = EINVAL;
 		goto done;
 	}
 
 	/*
 	 * Save output data.
 	 */
 	if (comp->rm_infobuflen < odlen)
 		odlen = comp->rm_infobuflen;
 	memcpy(odata, ((const uint8_t *)comp) + ofs, odlen);
 	*odlen0 = odlen;
 
 	error = 0;
 done:
 	vmbus_xact_put(xact);
 	return (error);
 }
 
 static int
+hn_rndis_get_rsscaps(struct hn_softc *sc, int *rxr_cnt)
+{
+	struct ndis_rss_caps in, caps;
+	size_t caps_len;
+	int error;
+
+	/*
+	 * Only NDIS 6.30+ is supported.
+	 */
+	KASSERT(sc->hn_ndis_ver >= NDIS_VERSION_6_30,
+	    ("NDIS 6.30+ is required, NDIS version 0x%08x", sc->hn_ndis_ver));
+	*rxr_cnt = 0;
+
+	memset(&in, 0, sizeof(in));
+	in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS;
+	in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2;
+	in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE;
+
+	caps_len = NDIS_RSS_CAPS_SIZE;
+	error = hn_rndis_query(sc, OID_GEN_RSS_CAPABILITIES,
+	    &in, NDIS_RSS_CAPS_SIZE, &caps, &caps_len);
+	if (error)
+		return (error);
+	if (caps_len < NDIS_RSS_CAPS_SIZE_6_0) {
+		if_printf(sc->hn_ifp, "invalid NDIS RSS caps len %zu",
+		    caps_len);
+		return (EINVAL);
+	}
+
+	if (caps.ndis_nrxr == 0) {
+		if_printf(sc->hn_ifp, "0 RX rings!?\n");
+		return (EINVAL);
+	}
+	*rxr_cnt = caps.ndis_nrxr;
+
+	if (caps_len == NDIS_RSS_CAPS_SIZE) {
+		if (bootverbose) {
+			if_printf(sc->hn_ifp, "RSS indirect table size %u\n",
+			    caps.ndis_nind);
+		}
+	}
+	return (0);
+}
+
+static int
 hn_rndis_set(struct hn_softc *sc, uint32_t oid, const void *data, size_t dlen)
 {
 	struct rndis_set_req *req;
 	const struct rndis_set_comp *comp;
 	struct vmbus_xact *xact;
 	size_t reqlen, comp_len;
 	uint32_t rid;
 	int error;
 
 	KASSERT(dlen > 0, ("invalid dlen %zu", dlen));
 
 	reqlen = sizeof(*req) + dlen;
 	xact = vmbus_xact_get(sc->hn_xact, reqlen);
 	if (xact == NULL) {
 		if_printf(sc->hn_ifp, "no xact for RNDIS set 0x%08x\n", oid);
 		return (ENXIO);
 	}
 	rid = hn_rndis_rid(sc);
 	req = vmbus_xact_req_data(xact);
 	req->rm_type = REMOTE_NDIS_SET_MSG;
 	req->rm_len = reqlen;
 	req->rm_rid = rid;
 	req->rm_oid = oid;
 	req->rm_infobuflen = dlen;
 	req->rm_infobufoffset = RNDIS_SET_REQ_INFOBUFOFFSET;
 	/* Data immediately follows RNDIS set. */
 	memcpy(req + 1, data, dlen);
 
 	comp_len = sizeof(*comp);
 	comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len,
 	    REMOTE_NDIS_SET_CMPLT);
 	if (comp == NULL) {
 		if_printf(sc->hn_ifp, "exec RNDIS set 0x%08x failed\n", oid);
 		error = EIO;
 		goto done;
 	}
 
 	if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
 		if_printf(sc->hn_ifp, "RNDIS set 0x%08x failed: "
 		    "status 0x%08x\n", oid, comp->rm_status);
 		error = EIO;
 		goto done;
 	}
 	error = 0;
 done:
 	vmbus_xact_put(xact);
 	return (error);
 }
 
 static int
 hn_rndis_conf_offload(struct hn_softc *sc)
 {
 	struct ndis_offload_params params;
 	size_t paramsz;
 	int error;
 
 	/* NOTE: 0 means "no change" */
 	memset(&params, 0, sizeof(params));
 
 	params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT;
 	if (sc->hn_ndis_ver < NDIS_VERSION_6_30) {
 		params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2;
 		paramsz = NDIS_OFFLOAD_PARAMS_SIZE_6_1;
 	} else {
 		params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3;
 		paramsz = NDIS_OFFLOAD_PARAMS_SIZE;
 	}
 	params.ndis_hdr.ndis_size = paramsz;
 
 	params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TXRX;
 	params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TXRX;
 	params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TXRX;
 	if (sc->hn_ndis_ver >= NDIS_VERSION_6_30) {
 		params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TXRX;
 		params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TXRX;
 	}
 	params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON;
 	/* XXX ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON */
 
 	error = hn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, &params, paramsz);
 	if (error) {
 		if_printf(sc->hn_ifp, "offload config failed: %d\n", error);
 	} else {
 		if (bootverbose)
 			if_printf(sc->hn_ifp, "offload config done\n");
 	}
 	return (error);
 }
 
+static int
+hn_rndis_conf_rss(struct hn_softc *sc, int nchan)
+{
+	struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
+	struct ndis_rss_params *prm = &rss->rss_params;
+	int i, error;
+
+	/*
+	 * Only NDIS 6.30+ is supported.
+	 */
+	KASSERT(sc->hn_ndis_ver >= NDIS_VERSION_6_30,
+	    ("NDIS 6.30+ is required, NDIS version 0x%08x", sc->hn_ndis_ver));
+
+	memset(rss, 0, sizeof(*rss));
+	prm->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS;
+	prm->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2;
+	prm->ndis_hdr.ndis_size = sizeof(*rss);
+	prm->ndis_hash = NDIS_HASH_FUNCTION_TOEPLITZ |
+	    NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4 |
+	    NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6;
+	/* TODO: Take ndis_rss_caps.ndis_nind into account */
+	prm->ndis_indsize = sizeof(rss->rss_ind);
+	prm->ndis_indoffset =
+	    __offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]);
+	prm->ndis_keysize = sizeof(rss->rss_key);
+	prm->ndis_keyoffset =
+	    __offsetof(struct ndis_rssprm_toeplitz, rss_key[0]);
+
+	/* Setup RSS key */
+	memcpy(rss->rss_key, netvsc_hash_key, sizeof(rss->rss_key));
+
+	/* Setup RSS indirect table */
+	/* TODO: Take ndis_rss_caps.ndis_nind into account */
+	for (i = 0; i < NDIS_HASH_INDCNT; ++i)
+		rss->rss_ind[i] = i % nchan;
+
+	error = hn_rndis_set(sc, OID_GEN_RSS_PARAMETERS, rss, sizeof(*rss));
+	if (error) {
+		if_printf(sc->hn_ifp, "RSS config failed: %d\n", error);
+	} else {
+		if (bootverbose)
+			if_printf(sc->hn_ifp, "RSS config done\n");
+	}
+	return (error);
+}
+
 /*
  * RNDIS filter init device
  */
 static int
 hv_rf_init_device(rndis_device *device)
 {
 	struct hn_softc *sc = device->sc;
 	struct rndis_init_req *req;
 	const struct rndis_init_comp *comp;
 	struct vmbus_xact *xact;
 	size_t comp_len;
 	uint32_t rid;
 	int error;
 
 	/* XXX */
 	device->state = RNDIS_DEV_INITIALIZED;
 
 	xact = vmbus_xact_get(sc->hn_xact, sizeof(*req));
 	if (xact == NULL) {
 		if_printf(sc->hn_ifp, "no xact for RNDIS init\n");
 		return (ENXIO);
 	}
 	rid = hn_rndis_rid(sc);
 	req = vmbus_xact_req_data(xact);
 	req->rm_type = REMOTE_NDIS_INITIALIZE_MSG;
 	req->rm_len = sizeof(*req);
 	req->rm_rid = rid;
 	req->rm_ver_major = RNDIS_VERSION_MAJOR;
 	req->rm_ver_minor = RNDIS_VERSION_MINOR;
 	req->rm_max_xfersz = HN_RNDIS_XFER_SIZE;
 
 	comp_len = RNDIS_INIT_COMP_SIZE_MIN;
 	comp = hn_rndis_xact_execute(sc, xact, rid, sizeof(*req), &comp_len,
 	    REMOTE_NDIS_INITIALIZE_CMPLT);
 	if (comp == NULL) {
 		if_printf(sc->hn_ifp, "exec RNDIS init failed\n");
 		error = EIO;
 		goto done;
 	}
 
 	if (comp->rm_status != RNDIS_STATUS_SUCCESS) {
 		if_printf(sc->hn_ifp, "RNDIS init failed: status 0x%08x\n",
 		    comp->rm_status);
 		error = EIO;
 		goto done;
 	}
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "RNDIS ver %u.%u, pktsz %u, pktcnt %u\n",
 		    comp->rm_ver_major, comp->rm_ver_minor,
 		    comp->rm_pktmaxsz, comp->rm_pktmaxcnt);
 	}
 	error = 0;
 
 done:
 	if (xact != NULL)
 		vmbus_xact_put(xact);
 	return (error);
 }
 
 #define HALT_COMPLETION_WAIT_COUNT      25
 
 /*
  * RNDIS filter halt device
  */
 static int
 hv_rf_halt_device(rndis_device *device)
 {
 	rndis_request *request;
 	int i, ret;
 
 	/* Attempt to do a rndis device halt */
 	request = hv_rndis_request(device, REMOTE_NDIS_HALT_MSG,
 	    RNDIS_MESSAGE_SIZE(rndis_halt_request));
 	if (request == NULL) {
 		return (-1);
 	}
 
 	/* initialize "poor man's semaphore" */
 	request->halt_complete_flag = 0;
 
 	ret = hv_rf_send_request(device, request, REMOTE_NDIS_HALT_MSG);
 	if (ret != 0) {
 		return (-1);
 	}
 
 	/*
 	 * Wait for halt response from halt callback.  We must wait for
 	 * the transaction response before freeing the request and other
 	 * resources.
 	 */
 	for (i=HALT_COMPLETION_WAIT_COUNT; i > 0; i--) {
 		if (request->halt_complete_flag != 0) {
 			break;
 		}
 		DELAY(400);
 	}
 	if (i == 0) {
 		return (-1);
 	}
 
 	device->state = RNDIS_DEV_UNINITIALIZED;
 
 	hv_put_rndis_request(device, request);
 
 	return (0);
 }
 
 /*
  * RNDIS filter open device
  */
 static int
 hv_rf_open_device(rndis_device *device)
 {
 	int ret;
 
 	if (device->state != RNDIS_DEV_INITIALIZED) {
 		return (0);
 	}
 
 	if (hv_promisc_mode != 1) {
 		ret = hv_rf_set_packet_filter(device, 
 		    NDIS_PACKET_TYPE_BROADCAST     |
 		    NDIS_PACKET_TYPE_ALL_MULTICAST |
 		    NDIS_PACKET_TYPE_DIRECTED);
 	} else {
 		ret = hv_rf_set_packet_filter(device, 
 		    NDIS_PACKET_TYPE_PROMISCUOUS);
 	}
 
 	if (ret == 0) {
 		device->state = RNDIS_DEV_DATAINITIALIZED;
 	}
 
 	return (ret);
 }
 
 /*
  * RNDIS filter close device
  */
 static int
 hv_rf_close_device(rndis_device *device)
 {
 	int ret;
 
 	if (device->state != RNDIS_DEV_DATAINITIALIZED) {
 		return (0);
 	}
 
 	ret = hv_rf_set_packet_filter(device, 0);
 	if (ret == 0) {
 		device->state = RNDIS_DEV_INITIALIZED;
 	}
 
 	return (ret);
 }
 
 /*
  * RNDIS filter on device add
  */
 int
 hv_rf_on_device_add(struct hn_softc *sc, void *additl_info,
     int *nchan0, struct hn_rx_ring *rxr)
 {
 	int ret;
 	rndis_device *rndis_dev;
-	struct rndis_recv_scale_cap rsscaps;
-	uint32_t rsscaps_size = sizeof(struct rndis_recv_scale_cap);
 	netvsc_device_info *dev_info = (netvsc_device_info *)additl_info;
 	device_t dev = sc->hn_dev;
 	struct hn_nvs_subch_req *req;
 	const struct hn_nvs_subch_resp *resp;
 	size_t resp_len;
 	struct vmbus_xact *xact = NULL;
 	uint32_t status, nsubch;
 	int nchan = *nchan0;
+	int rxr_cnt;
 
 	rndis_dev = hv_get_rndis_device();
 	if (rndis_dev == NULL) {
 		return (ENOMEM);
 	}
 	sc->rndis_dev = rndis_dev;
 	rndis_dev->sc = sc;
 
 	/*
 	 * Let the inner driver handle this first to create the netvsc channel
 	 * NOTE! Once the channel is created, we may get a receive callback 
 	 * (hv_rf_on_receive()) before this call is completed.
 	 * Note:  Earlier code used a function pointer here.
 	 */
 	ret = hv_nv_on_device_add(sc, rxr);
 	if (ret != 0) {
 		hv_put_rndis_device(rndis_dev);
 		return (ret);
 	}
 
 	/*
 	 * Initialize the rndis device
 	 */
 
 	/* Send the rndis initialization message */
 	ret = hv_rf_init_device(rndis_dev);
 	if (ret != 0) {
 		/*
 		 * TODO: If rndis init failed, we will need to shut down
 		 * the channel
 		 */
 	}
 
 	/* Get the mac address */
 	ret = hv_rf_query_device_mac(rndis_dev);
 	if (ret != 0) {
 		/* TODO: shut down rndis device and the channel */
 	}
 
 	/* Configure NDIS offload settings */
 	hn_rndis_conf_offload(sc);
 	
 	memcpy(dev_info->mac_addr, rndis_dev->hw_mac_addr, ETHER_ADDR_LEN);
 
 	hv_rf_query_device_link_status(rndis_dev);
 	
 	dev_info->link_state = rndis_dev->link_status;
 
-	if (sc->hn_nvs_ver < NVSP_PROTOCOL_VERSION_5 || nchan == 1)
+	if (sc->hn_ndis_ver < NDIS_VERSION_6_30 || nchan == 1) {
+		/*
+		 * Either RSS is not supported, or multiple RX/TX rings
+		 * are not requested.
+		 */
+		*nchan0 = 1;
 		return (0);
+	}
 
-	memset(&rsscaps, 0, rsscaps_size);
-	ret = hv_rf_query_device(rndis_dev,
-			RNDIS_OID_GEN_RSS_CAPABILITIES,
-			&rsscaps, &rsscaps_size);
-	if ((ret != 0) || (rsscaps.num_recv_que < 2)) {
-		device_printf(dev, "hv_rf_query_device failed or "
-			"rsscaps.num_recv_que < 2 \n");
-		goto out;
+	/*
+	 * Get RSS capabilities, e.g. # of RX rings, and # of indirect
+	 * table entries.
+	 */
+	ret = hn_rndis_get_rsscaps(sc, &rxr_cnt);
+	if (ret) {
+		/* No RSS; this is benign. */
+		*nchan0 = 1;
+		return (0);
 	}
-	device_printf(dev, "channel, offered %u, requested %d\n",
-	    rsscaps.num_recv_que, nchan);
-	if (nchan > rsscaps.num_recv_que)
-		nchan = rsscaps.num_recv_que;
+	if (nchan > rxr_cnt)
+		nchan = rxr_cnt;
+	if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
+	    rxr_cnt, nchan);
 
 	if (nchan == 1) {
 		device_printf(dev, "only 1 channel is supported, no vRSS\n");
 		goto out;
 	}
 	
 	/*
 	 * Ask NVS to allocate sub-channels.
 	 */
 	xact = vmbus_xact_get(sc->hn_xact, sizeof(*req));
 	if (xact == NULL) {
 		if_printf(sc->hn_ifp, "no xact for nvs subch req\n");
 		ret = ENXIO;
 		goto out;
 	}
 	req = vmbus_xact_req_data(xact);
 	req->nvs_type = HN_NVS_TYPE_SUBCH_REQ;
 	req->nvs_op = HN_NVS_SUBCH_OP_ALLOC;
 	req->nvs_nsubch = nchan - 1;
 
 	resp = hn_nvs_xact_execute(sc, xact, req, sizeof(*req), &resp_len);
 	if (resp == NULL) {
 		if_printf(sc->hn_ifp, "exec subch failed\n");
 		ret = EIO;
 		goto out;
 	}
 	if (resp_len < sizeof(*resp)) {
 		if_printf(sc->hn_ifp, "invalid subch resp length %zu\n",
 		    resp_len);
 		ret = EINVAL;
 		goto out;
 	}
 	if (resp->nvs_type != HN_NVS_TYPE_SUBCH_RESP) {
 		if_printf(sc->hn_ifp, "not subch resp, type %u\n",
 		    resp->nvs_type);
 		ret = EINVAL;
 		goto out;
 	}
 
 	status = resp->nvs_status;
 	nsubch = resp->nvs_nsubch;
 	vmbus_xact_put(xact);
 	xact = NULL;
 
 	if (status != HN_NVS_STATUS_OK) {
 		if_printf(sc->hn_ifp, "subch req failed: %x\n", status);
 		ret = EIO;
 		goto out;
 	}
 	if (nsubch > nchan - 1) {
 		if_printf(sc->hn_ifp, "%u subchans are allocated, requested %u\n",
 		    nsubch, nchan - 1);
 		nsubch = nchan - 1;
 	}
 	nchan = nsubch + 1;
 
-	ret = hv_rf_set_rss_param(rndis_dev, nchan);
-	*nchan0 = nchan;
+	ret = hn_rndis_conf_rss(sc, nchan);
+	if (ret != 0)
+		*nchan0 = 1;
+	else
+		*nchan0 = nchan;
 out:
 	if (xact != NULL)
 		vmbus_xact_put(xact);
 	return (ret);
 }
 
 /*
  * RNDIS filter on device remove
  */
 int
 hv_rf_on_device_remove(struct hn_softc *sc, boolean_t destroy_channel)
 {
 	rndis_device *rndis_dev = sc->rndis_dev;
 	int ret;
 
 	/* Halt and release the rndis device */
 	ret = hv_rf_halt_device(rndis_dev);
 
 	sc->rndis_dev = NULL;
 	hv_put_rndis_device(rndis_dev);
 
 	/* Pass control to inner driver to remove the device */
 	ret |= hv_nv_on_device_remove(sc, destroy_channel);
 
 	return (ret);
 }
 
 /*
  * RNDIS filter on open
  */
 int
 hv_rf_on_open(struct hn_softc *sc)
 {
 
 	return (hv_rf_open_device(sc->rndis_dev));
 }
 
 /*
  * RNDIS filter on close
  */
 int 
 hv_rf_on_close(struct hn_softc *sc)
 {
 
 	return (hv_rf_close_device(sc->rndis_dev));
 }
 
 static void
 hn_rndis_sent_cb(struct hn_send_ctx *sndc, struct hn_softc *sc,
     struct vmbus_channel *chan __unused, const void *data __unused,
     int dlen __unused)
 {
 	if (sndc->hn_chim_idx != HN_NVS_CHIM_IDX_INVALID)
 		hn_chim_free(sc, sndc->hn_chim_idx);
 }
 
 static void
 hn_rndis_sent_halt(struct hn_send_ctx *sndc, struct hn_softc *sc,
     struct vmbus_channel *chan __unused, const void *data __unused,
     int dlen __unused)
 {
 	rndis_request *request = sndc->hn_cbarg;
 
 	if (sndc->hn_chim_idx != HN_NVS_CHIM_IDX_INVALID)
 		hn_chim_free(sc, sndc->hn_chim_idx);
 
 	/*
 	 * Notify hv_rf_halt_device() about halt completion.
 	 * The halt code must wait for completion before freeing
 	 * the transaction resources.
 	 */
 	request->halt_complete_flag = 1;
 }
 
 void
 hv_rf_channel_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
 {
 
 	netvsc_channel_rollup(rxr, txr);
 }
Index: projects/clang390-import/sys/dev/hyperv/netvsc/ndis.h
===================================================================
--- projects/clang390-import/sys/dev/hyperv/netvsc/ndis.h	(revision 305016)
+++ projects/clang390-import/sys/dev/hyperv/netvsc/ndis.h	(revision 305017)
@@ -1,118 +1,217 @@
 /*-
  * Copyright (c) 2016 Microsoft Corp.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _NET_NDIS_H_
 #define _NET_NDIS_H_
 
+#define NDIS_MEDIA_STATE_CONNECTED	0
+#define NDIS_MEDIA_STATE_DISCONNECTED	1
+
+#define OID_GEN_RSS_CAPABILITIES	0x00010203
+#define OID_GEN_RSS_PARAMETERS		0x00010204
 #define OID_TCP_OFFLOAD_PARAMETERS	0xFC01020C
 
 #define NDIS_OBJTYPE_DEFAULT		0x80
+#define NDIS_OBJTYPE_RSS_CAPS		0x88
+#define NDIS_OBJTYPE_RSS_PARAMS		0x89
 
 /* common_set */
 #define NDIS_OFFLOAD_SET_NOCHG		0
 #define NDIS_OFFLOAD_SET_ON		1
 #define NDIS_OFFLOAD_SET_OFF		2
 
 /* a.k.a GRE MAC */
 #define NDIS_ENCAP_TYPE_NVGRE		0x00000001
 
+#define NDIS_HASH_FUNCTION_MASK		0x000000FF	/* see hash function */
+#define NDIS_HASH_TYPE_MASK		0x00FFFF00	/* see hash type */
+
+/* hash function */
+#define NDIS_HASH_FUNCTION_TOEPLITZ	0x00000001
+
+/* hash type */
+#define NDIS_HASH_IPV4			0x00000100
+#define NDIS_HASH_TCP_IPV4		0x00000200
+#define NDIS_HASH_IPV6			0x00000400
+#define NDIS_HASH_IPV6_EX		0x00000800
+#define NDIS_HASH_TCP_IPV6		0x00001000
+#define NDIS_HASH_TCP_IPV6_EX		0x00002000
+
+#define NDIS_HASH_KEYSIZE_TOEPLITZ	40
+#define NDIS_HASH_INDCNT		128
+
 struct ndis_object_hdr {
 	uint8_t			ndis_type;		/* NDIS_OBJTYPE_ */
 	uint8_t			ndis_rev;		/* type specific */
 	uint16_t		ndis_size;		/* incl. this hdr */
 };
 
-/* OID_TCP_OFFLOAD_PARAMETERS */
+/*
+ * OID_TCP_OFFLOAD_PARAMETERS
+ * ndis_type: NDIS_OBJTYPE_DEFAULT
+ */
 struct ndis_offload_params {
 	struct ndis_object_hdr	ndis_hdr;
 	uint8_t			ndis_ip4csum;		/* param_set */
 	uint8_t			ndis_tcp4csum;		/* param_set */
 	uint8_t			ndis_udp4csum;		/* param_set */
 	uint8_t			ndis_tcp6csum;		/* param_set */
 	uint8_t			ndis_udp6csum;		/* param_set */
 	uint8_t			ndis_lsov1;		/* lsov1_set */
 	uint8_t			ndis_ipsecv1;		/* ipsecv1_set */
 	uint8_t			ndis_lsov2_ip4;		/* lsov2_set */
 	uint8_t			ndis_lsov2_ip6;		/* lsov2_set */
 	uint8_t			ndis_tcp4conn;		/* PARAM_NOCHG */
 	uint8_t			ndis_tcp6conn;		/* PARAM_NOCHG */
 	uint32_t		ndis_flags;		/* 0 */
 	/* NDIS >= 6.1 */
 	uint8_t			ndis_ipsecv2;		/* ipsecv2_set */
 	uint8_t			ndis_ipsecv2_ip4;	/* ipsecv2_set */
 	/* NDIS >= 6.30 */
 	uint8_t			ndis_rsc_ip4;		/* rsc_set */
 	uint8_t			ndis_rsc_ip6;		/* rsc_set */
 	uint8_t			ndis_encap;		/* common_set */
 	uint8_t			ndis_encap_types;	/* NDIS_ENCAP_TYPE_ */
 };
 
 #define NDIS_OFFLOAD_PARAMS_SIZE	sizeof(struct ndis_offload_params)
 #define NDIS_OFFLOAD_PARAMS_SIZE_6_1	\
 	__offsetof(struct ndis_offload_params, ndis_rsc_ip4)
 
 #define NDIS_OFFLOAD_PARAMS_REV_2	2	/* NDIS 6.1 */
 #define NDIS_OFFLOAD_PARAMS_REV_3	3	/* NDIS 6.30 */
 
 /* param_set */
 #define NDIS_OFFLOAD_PARAM_NOCHG	0	/* common to all sets */
 #define NDIS_OFFLOAD_PARAM_OFF		1
 #define NDIS_OFFLOAD_PARAM_TX		2
 #define NDIS_OFFLOAD_PARAM_RX		3
 #define NDIS_OFFLOAD_PARAM_TXRX		4
 
 /* lsov1_set */
 /* NDIS_OFFLOAD_PARAM_NOCHG */
 #define NDIS_OFFLOAD_LSOV1_OFF		1
 #define NDIS_OFFLOAD_LSOV1_ON		2
 
 /* ipsecv1_set */
 /* NDIS_OFFLOAD_PARAM_NOCHG */
 #define NDIS_OFFLOAD_IPSECV1_OFF	1
 #define NDIS_OFFLOAD_IPSECV1_AH		2
 #define NDIS_OFFLOAD_IPSECV1_ESP	3
 #define NDIS_OFFLOAD_IPSECV1_AH_ESP	4
 
 /* lsov2_set */
 /* NDIS_OFFLOAD_PARAM_NOCHG */
 #define NDIS_OFFLOAD_LSOV2_OFF		1
 #define NDIS_OFFLOAD_LSOV2_ON		2
 
 /* ipsecv2_set */
 /* NDIS_OFFLOAD_PARAM_NOCHG */
 #define NDIS_OFFLOAD_IPSECV2_OFF	1
 #define NDIS_OFFLOAD_IPSECV2_AH		2
 #define NDIS_OFFLOAD_IPSECV2_ESP	3
 #define NDIS_OFFLOAD_IPSECV2_AH_ESP	4
 
 /* rsc_set */
 /* NDIS_OFFLOAD_PARAM_NOCHG */
 #define NDIS_OFFLOAD_RSC_OFF		1
 #define NDIS_OFFLOAD_RSC_ON		2
+
+/*
+ * OID_GEN_RSS_CAPABILITIES
+ * ndis_type: NDIS_OBJTYPE_RSS_CAPS
+ */
+struct ndis_rss_caps {
+	struct ndis_object_hdr		ndis_hdr;
+	uint32_t			ndis_flags;	/* NDIS_RSS_CAP_ */
+	uint32_t			ndis_nmsi;	/* # of MSIs */
+	uint32_t			ndis_nrxr;	/* # of RX rings */
+	/* NDIS >= 6.30 */
+	uint16_t			ndis_nind;	/* # of indtbl ent. */
+	uint16_t			ndis_pad;
+};
+
+#define NDIS_RSS_CAPS_SIZE		\
+	__offsetof(struct ndis_rss_caps, ndis_pad)
+#define NDIS_RSS_CAPS_SIZE_6_0		\
+	__offsetof(struct ndis_rss_caps, ndis_nind)
+
+#define NDIS_RSS_CAPS_REV_1		1	/* NDIS 6.{0,1,20} */
+#define NDIS_RSS_CAPS_REV_2		2	/* NDIS 6.30 */
+
+#define NDIS_RSS_CAP_MSI		0x01000000
+#define NDIS_RSS_CAP_CLASSIFY_ISR	0x02000000
+#define NDIS_RSS_CAP_CLASSIFY_DPC	0x04000000
+#define NDIS_RSS_CAP_MSIX		0x08000000
+#define NDIS_RSS_CAP_IPV4		0x00000100
+#define NDIS_RSS_CAP_IPV6		0x00000200
+#define NDIS_RSS_CAP_IPV6_EX		0x00000400
+#define NDIS_RSS_CAP_HASH_TOEPLITZ	0x00000001
+
+/*
+ * OID_GEN_RSS_PARAMETERS
+ * ndis_type: NDIS_OBJTYPE_RSS_PARAMS
+ */
+struct ndis_rss_params {
+	struct ndis_object_hdr		ndis_hdr;
+	uint16_t			ndis_flags;	/* NDIS_RSS_FLAG_ */
+	uint16_t			ndis_bcpu;	/* base cpu 0 */
+	uint32_t			ndis_hash;	/* NDIS_HASH_ */
+	uint16_t			ndis_indsize;	/* indirect table */
+	uint32_t			ndis_indoffset;
+	uint16_t			ndis_keysize;	/* hash key */
+	uint32_t			ndis_keyoffset;
+	/* NDIS >= 6.20 */
+	uint32_t			ndis_cpumaskoffset;
+	uint32_t			ndis_cpumaskcnt;
+	uint32_t			ndis_cpumaskentsz;
+};
+
+#define NDIS_RSS_PARAMS_SIZE		sizeof(struct ndis_rss_params)
+#define NDIS_RSS_PARAMS_SIZE_6_0	\
+	__offsetof(struct ndis_rss_params, ndis_cpumaskoffset)
+
+#define NDIS_RSS_PARAMS_REV_1		1	/* NDIS 6.0 */
+#define NDIS_RSS_PARAMS_REV_2		2	/* NDIS 6.20 */
+
+#define NDIS_RSS_FLAG_BCPU_UNCHG	0x0001
+#define NDIS_RSS_FLAG_HASH_UNCHG	0x0002
+#define NDIS_RSS_FLAG_IND_UNCHG		0x0004
+#define NDIS_RSS_FLAG_KEY_UNCHG		0x0008
+#define NDIS_RSS_FLAG_DISABLE		0x0010
+
+/* non-standard convenient struct */
+struct ndis_rssprm_toeplitz {
+	struct ndis_rss_params		rss_params;
+	/* Toeplitz hash key */
+	uint8_t				rss_key[NDIS_HASH_KEYSIZE_TOEPLITZ];
+	/* Indirect table */
+	uint32_t			rss_ind[NDIS_HASH_INDCNT];
+};
 
 #endif	/* !_NET_NDIS_H_ */
Index: projects/clang390-import/sys/dev/mfi/mfi.c
===================================================================
--- projects/clang390-import/sys/dev/mfi/mfi.c	(revision 305016)
+++ projects/clang390-import/sys/dev/mfi/mfi.c	(revision 305017)
@@ -1,3797 +1,3797 @@
 /*-
  * Copyright (c) 2006 IronPort Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 /*-
  * Copyright (c) 2007 LSI Corp.
  * Copyright (c) 2007 Rajesh Prabhakaran.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_mfi.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/poll.h>
 #include <sys/selinfo.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/eventhandler.h>
 #include <sys/rman.h>
 #include <sys/bus_dma.h>
 #include <sys/bio.h>
 #include <sys/ioccom.h>
 #include <sys/uio.h>
 #include <sys/proc.h>
 #include <sys/signalvar.h>
 #include <sys/sysent.h>
 #include <sys/taskqueue.h>
 
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 #include <dev/mfi/mfireg.h>
 #include <dev/mfi/mfi_ioctl.h>
 #include <dev/mfi/mfivar.h>
 #include <sys/interrupt.h>
 #include <sys/priority.h>
 
 static int	mfi_alloc_commands(struct mfi_softc *);
 static int	mfi_comms_init(struct mfi_softc *);
 static int	mfi_get_controller_info(struct mfi_softc *);
 static int	mfi_get_log_state(struct mfi_softc *,
 		    struct mfi_evt_log_state **);
 static int	mfi_parse_entries(struct mfi_softc *, int, int);
 static void	mfi_data_cb(void *, bus_dma_segment_t *, int, int);
 static void	mfi_startup(void *arg);
 static void	mfi_intr(void *arg);
 static void	mfi_ldprobe(struct mfi_softc *sc);
 static void	mfi_syspdprobe(struct mfi_softc *sc);
 static void	mfi_handle_evt(void *context, int pending);
 static int	mfi_aen_register(struct mfi_softc *sc, int seq, int locale);
 static void	mfi_aen_complete(struct mfi_command *);
 static int	mfi_add_ld(struct mfi_softc *sc, int);
 static void	mfi_add_ld_complete(struct mfi_command *);
 static int	mfi_add_sys_pd(struct mfi_softc *sc, int);
 static void	mfi_add_sys_pd_complete(struct mfi_command *);
 static struct mfi_command * mfi_bio_command(struct mfi_softc *);
 static void	mfi_bio_complete(struct mfi_command *);
 static struct mfi_command *mfi_build_ldio(struct mfi_softc *,struct bio*);
 static struct mfi_command *mfi_build_syspdio(struct mfi_softc *,struct bio*);
 static int	mfi_send_frame(struct mfi_softc *, struct mfi_command *);
 static int	mfi_std_send_frame(struct mfi_softc *, struct mfi_command *);
 static int	mfi_abort(struct mfi_softc *, struct mfi_command **);
 static int	mfi_linux_ioctl_int(struct cdev *, u_long, caddr_t, int, struct thread *);
 static void	mfi_timeout(void *);
 static int	mfi_user_command(struct mfi_softc *,
 		    struct mfi_ioc_passthru *);
 static void	mfi_enable_intr_xscale(struct mfi_softc *sc);
 static void	mfi_enable_intr_ppc(struct mfi_softc *sc);
 static int32_t	mfi_read_fw_status_xscale(struct mfi_softc *sc);
 static int32_t	mfi_read_fw_status_ppc(struct mfi_softc *sc);
 static int	mfi_check_clear_intr_xscale(struct mfi_softc *sc);
 static int	mfi_check_clear_intr_ppc(struct mfi_softc *sc);
 static void 	mfi_issue_cmd_xscale(struct mfi_softc *sc, bus_addr_t bus_add,
 		    uint32_t frame_cnt);
 static void 	mfi_issue_cmd_ppc(struct mfi_softc *sc, bus_addr_t bus_add,
 		    uint32_t frame_cnt);
 static int mfi_config_lock(struct mfi_softc *sc, uint32_t opcode);
 static void mfi_config_unlock(struct mfi_softc *sc, int locked);
 static int mfi_check_command_pre(struct mfi_softc *sc, struct mfi_command *cm);
 static void mfi_check_command_post(struct mfi_softc *sc, struct mfi_command *cm);
 static int mfi_check_for_sscd(struct mfi_softc *sc, struct mfi_command *cm);
 
 SYSCTL_NODE(_hw, OID_AUTO, mfi, CTLFLAG_RD, 0, "MFI driver parameters");
 static int	mfi_event_locale = MFI_EVT_LOCALE_ALL;
 SYSCTL_INT(_hw_mfi, OID_AUTO, event_locale, CTLFLAG_RWTUN, &mfi_event_locale,
            0, "event message locale");
 
 static int	mfi_event_class = MFI_EVT_CLASS_INFO;
 SYSCTL_INT(_hw_mfi, OID_AUTO, event_class, CTLFLAG_RWTUN, &mfi_event_class,
            0, "event message class");
 
 static int	mfi_max_cmds = 128;
 SYSCTL_INT(_hw_mfi, OID_AUTO, max_cmds, CTLFLAG_RDTUN, &mfi_max_cmds,
 	   0, "Max commands limit (-1 = controller limit)");
 
 static int	mfi_detect_jbod_change = 1;
 SYSCTL_INT(_hw_mfi, OID_AUTO, detect_jbod_change, CTLFLAG_RWTUN,
 	   &mfi_detect_jbod_change, 0, "Detect a change to a JBOD");
 
 int		mfi_polled_cmd_timeout = MFI_POLL_TIMEOUT_SECS;
 SYSCTL_INT(_hw_mfi, OID_AUTO, polled_cmd_timeout, CTLFLAG_RWTUN,
 	   &mfi_polled_cmd_timeout, 0,
 	   "Polled command timeout - used for firmware flash etc (in seconds)");
 
 static int	mfi_cmd_timeout = MFI_CMD_TIMEOUT;
 SYSCTL_INT(_hw_mfi, OID_AUTO, cmd_timeout, CTLFLAG_RWTUN, &mfi_cmd_timeout,
 	   0, "Command timeout (in seconds)");
 
 /* Management interface */
 static d_open_t		mfi_open;
 static d_close_t	mfi_close;
 static d_ioctl_t	mfi_ioctl;
 static d_poll_t		mfi_poll;
 
 static struct cdevsw mfi_cdevsw = {
 	.d_version = 	D_VERSION,
 	.d_flags =	0,
 	.d_open = 	mfi_open,
 	.d_close =	mfi_close,
 	.d_ioctl =	mfi_ioctl,
 	.d_poll =	mfi_poll,
 	.d_name =	"mfi",
 };
 
 MALLOC_DEFINE(M_MFIBUF, "mfibuf", "Buffers for the MFI driver");
 
 #define MFI_INQ_LENGTH SHORT_INQUIRY_LENGTH
 struct mfi_skinny_dma_info mfi_skinny;
 
 static void
 mfi_enable_intr_xscale(struct mfi_softc *sc)
 {
 	MFI_WRITE4(sc, MFI_OMSK, 0x01);
 }
 
 static void
 mfi_enable_intr_ppc(struct mfi_softc *sc)
 {
 	if (sc->mfi_flags & MFI_FLAGS_1078) {
 		MFI_WRITE4(sc, MFI_ODCR0, 0xFFFFFFFF);
 		MFI_WRITE4(sc, MFI_OMSK, ~MFI_1078_EIM);
 	}
 	else if (sc->mfi_flags & MFI_FLAGS_GEN2) {
 		MFI_WRITE4(sc, MFI_ODCR0, 0xFFFFFFFF);
 		MFI_WRITE4(sc, MFI_OMSK, ~MFI_GEN2_EIM);
 	}
 	else if (sc->mfi_flags & MFI_FLAGS_SKINNY) {
 		MFI_WRITE4(sc, MFI_OMSK, ~0x00000001);
 	}
 }
 
 static int32_t
 mfi_read_fw_status_xscale(struct mfi_softc *sc)
 {
 	return MFI_READ4(sc, MFI_OMSG0);
 }
 
 static int32_t
 mfi_read_fw_status_ppc(struct mfi_softc *sc)
 {
 	return MFI_READ4(sc, MFI_OSP0);
 }
 
 static int
 mfi_check_clear_intr_xscale(struct mfi_softc *sc)
 {
 	int32_t status;
 
 	status = MFI_READ4(sc, MFI_OSTS);
 	if ((status & MFI_OSTS_INTR_VALID) == 0)
 		return 1;
 
 	MFI_WRITE4(sc, MFI_OSTS, status);
 	return 0;
 }
 
 static int
 mfi_check_clear_intr_ppc(struct mfi_softc *sc)
 {
 	int32_t status;
 
 	status = MFI_READ4(sc, MFI_OSTS);
 	if (sc->mfi_flags & MFI_FLAGS_1078) {
 		if (!(status & MFI_1078_RM)) {
 			return 1;
 		}
 	}
 	else if (sc->mfi_flags & MFI_FLAGS_GEN2) {
 		if (!(status & MFI_GEN2_RM)) {
 			return 1;
 		}
 	}
 	else if (sc->mfi_flags & MFI_FLAGS_SKINNY) {
 		if (!(status & MFI_SKINNY_RM)) {
 			return 1;
 		}
 	}
 	if (sc->mfi_flags & MFI_FLAGS_SKINNY)
 		MFI_WRITE4(sc, MFI_OSTS, status);
 	else
 		MFI_WRITE4(sc, MFI_ODCR0, status);
 	return 0;
 }
 
 static void
 mfi_issue_cmd_xscale(struct mfi_softc *sc, bus_addr_t bus_add, uint32_t frame_cnt)
 {
 	MFI_WRITE4(sc, MFI_IQP,(bus_add >>3)|frame_cnt);
 }
 
 static void
 mfi_issue_cmd_ppc(struct mfi_softc *sc, bus_addr_t bus_add, uint32_t frame_cnt)
 {
 	if (sc->mfi_flags & MFI_FLAGS_SKINNY) {
 	    MFI_WRITE4(sc, MFI_IQPL, (bus_add | frame_cnt <<1)|1 );
 	    MFI_WRITE4(sc, MFI_IQPH, 0x00000000);
 	} else {
 	    MFI_WRITE4(sc, MFI_IQP, (bus_add | frame_cnt <<1)|1 );
 	}
 }
 
 int
 mfi_transition_firmware(struct mfi_softc *sc)
 {
 	uint32_t fw_state, cur_state;
 	int max_wait, i;
 	uint32_t cur_abs_reg_val = 0;
 	uint32_t prev_abs_reg_val = 0;
 
 	cur_abs_reg_val = sc->mfi_read_fw_status(sc);
 	fw_state = cur_abs_reg_val & MFI_FWSTATE_MASK;
 	while (fw_state != MFI_FWSTATE_READY) {
 		if (bootverbose)
 			device_printf(sc->mfi_dev, "Waiting for firmware to "
 			"become ready\n");
 		cur_state = fw_state;
 		switch (fw_state) {
 		case MFI_FWSTATE_FAULT:
 			device_printf(sc->mfi_dev, "Firmware fault\n");
 			return (ENXIO);
 		case MFI_FWSTATE_WAIT_HANDSHAKE:
 			if (sc->mfi_flags & MFI_FLAGS_SKINNY || sc->mfi_flags & MFI_FLAGS_TBOLT)
 			    MFI_WRITE4(sc, MFI_SKINNY_IDB, MFI_FWINIT_CLEAR_HANDSHAKE);
 			else
 			    MFI_WRITE4(sc, MFI_IDB, MFI_FWINIT_CLEAR_HANDSHAKE);
 			max_wait = MFI_RESET_WAIT_TIME;
 			break;
 		case MFI_FWSTATE_OPERATIONAL:
 			if (sc->mfi_flags & MFI_FLAGS_SKINNY || sc->mfi_flags & MFI_FLAGS_TBOLT)
 			    MFI_WRITE4(sc, MFI_SKINNY_IDB, 7);
 			else
 			    MFI_WRITE4(sc, MFI_IDB, MFI_FWINIT_READY);
 			max_wait = MFI_RESET_WAIT_TIME;
 			break;
 		case MFI_FWSTATE_UNDEFINED:
 		case MFI_FWSTATE_BB_INIT:
 			max_wait = MFI_RESET_WAIT_TIME;
 			break;
 		case MFI_FWSTATE_FW_INIT_2:
 			max_wait = MFI_RESET_WAIT_TIME;
 			break;
 		case MFI_FWSTATE_FW_INIT:
 		case MFI_FWSTATE_FLUSH_CACHE:
 			max_wait = MFI_RESET_WAIT_TIME;
 			break;
 		case MFI_FWSTATE_DEVICE_SCAN:
 			max_wait = MFI_RESET_WAIT_TIME; /* wait for 180 seconds */
 			prev_abs_reg_val = cur_abs_reg_val;
 			break;
 		case MFI_FWSTATE_BOOT_MESSAGE_PENDING:
 			if (sc->mfi_flags & MFI_FLAGS_SKINNY || sc->mfi_flags & MFI_FLAGS_TBOLT)
 			    MFI_WRITE4(sc, MFI_SKINNY_IDB, MFI_FWINIT_HOTPLUG);
 			else
 			    MFI_WRITE4(sc, MFI_IDB, MFI_FWINIT_HOTPLUG);
 			max_wait = MFI_RESET_WAIT_TIME;
 			break;
 		default:
 			device_printf(sc->mfi_dev, "Unknown firmware state %#x\n",
 			    fw_state);
 			return (ENXIO);
 		}
 		for (i = 0; i < (max_wait * 10); i++) {
 			cur_abs_reg_val = sc->mfi_read_fw_status(sc);
 			fw_state = cur_abs_reg_val & MFI_FWSTATE_MASK;
 			if (fw_state == cur_state)
 				DELAY(100000);
 			else
 				break;
 		}
 		if (fw_state == MFI_FWSTATE_DEVICE_SCAN) {
 			/* Check the device scanning progress */
 			if (prev_abs_reg_val != cur_abs_reg_val) {
 				continue;
 			}
 		}
 		if (fw_state == cur_state) {
 			device_printf(sc->mfi_dev, "Firmware stuck in state "
 			    "%#x\n", fw_state);
 			return (ENXIO);
 		}
 	}
 	return (0);
 }
 
 static void
 mfi_addr_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	bus_addr_t *addr;
 
 	addr = arg;
 	*addr = segs[0].ds_addr;
 }
 
 
 int
 mfi_attach(struct mfi_softc *sc)
 {
 	uint32_t status;
 	int error, commsz, framessz, sensesz;
 	int frames, unit, max_fw_sge, max_fw_cmds;
 	uint32_t tb_mem_size = 0;
 	struct cdev *dev_t;
 
 	if (sc == NULL)
 		return EINVAL;
 
 	device_printf(sc->mfi_dev, "Megaraid SAS driver Ver %s \n",
 	    MEGASAS_VERSION);
 
 	mtx_init(&sc->mfi_io_lock, "MFI I/O lock", NULL, MTX_DEF);
 	sx_init(&sc->mfi_config_lock, "MFI config");
 	TAILQ_INIT(&sc->mfi_ld_tqh);
 	TAILQ_INIT(&sc->mfi_syspd_tqh);
 	TAILQ_INIT(&sc->mfi_ld_pend_tqh);
 	TAILQ_INIT(&sc->mfi_syspd_pend_tqh);
 	TAILQ_INIT(&sc->mfi_evt_queue);
 	TASK_INIT(&sc->mfi_evt_task, 0, mfi_handle_evt, sc);
 	TASK_INIT(&sc->mfi_map_sync_task, 0, mfi_handle_map_sync, sc);
 	TAILQ_INIT(&sc->mfi_aen_pids);
 	TAILQ_INIT(&sc->mfi_cam_ccbq);
 
 	mfi_initq_free(sc);
 	mfi_initq_ready(sc);
 	mfi_initq_busy(sc);
 	mfi_initq_bio(sc);
 
 	sc->adpreset = 0;
 	sc->last_seq_num = 0;
 	sc->disableOnlineCtrlReset = 1;
 	sc->issuepend_done = 1;
 	sc->hw_crit_error = 0;
 
 	if (sc->mfi_flags & MFI_FLAGS_1064R) {
 		sc->mfi_enable_intr = mfi_enable_intr_xscale;
 		sc->mfi_read_fw_status = mfi_read_fw_status_xscale;
 		sc->mfi_check_clear_intr = mfi_check_clear_intr_xscale;
 		sc->mfi_issue_cmd = mfi_issue_cmd_xscale;
 	} else if (sc->mfi_flags & MFI_FLAGS_TBOLT) {
 		sc->mfi_enable_intr = mfi_tbolt_enable_intr_ppc;
 		sc->mfi_disable_intr = mfi_tbolt_disable_intr_ppc;
 		sc->mfi_read_fw_status = mfi_tbolt_read_fw_status_ppc;
 		sc->mfi_check_clear_intr = mfi_tbolt_check_clear_intr_ppc;
 		sc->mfi_issue_cmd = mfi_tbolt_issue_cmd_ppc;
 		sc->mfi_adp_reset = mfi_tbolt_adp_reset;
 		sc->mfi_tbolt = 1;
 		TAILQ_INIT(&sc->mfi_cmd_tbolt_tqh);
 	} else {
 		sc->mfi_enable_intr =  mfi_enable_intr_ppc;
 		sc->mfi_read_fw_status = mfi_read_fw_status_ppc;
 		sc->mfi_check_clear_intr = mfi_check_clear_intr_ppc;
 		sc->mfi_issue_cmd = mfi_issue_cmd_ppc;
 	}
 
 
 	/* Before we get too far, see if the firmware is working */
 	if ((error = mfi_transition_firmware(sc)) != 0) {
 		device_printf(sc->mfi_dev, "Firmware not in READY state, "
 		    "error %d\n", error);
 		return (ENXIO);
 	}
 
 	/* Start: LSIP200113393 */
 	if (bus_dma_tag_create( sc->mfi_parent_dmat,	/* parent */
 				1, 0,			/* algnmnt, boundary */
 				BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				MEGASAS_MAX_NAME*sizeof(bus_addr_t),			/* maxsize */
 				1,			/* msegments */
 				MEGASAS_MAX_NAME*sizeof(bus_addr_t),			/* maxsegsize */
 				0,			/* flags */
 				NULL, NULL,		/* lockfunc, lockarg */
 				&sc->verbuf_h_dmat)) {
 		device_printf(sc->mfi_dev, "Cannot allocate verbuf_h_dmat DMA tag\n");
 		return (ENOMEM);
 	}
 	if (bus_dmamem_alloc(sc->verbuf_h_dmat, (void **)&sc->verbuf,
 	    BUS_DMA_NOWAIT, &sc->verbuf_h_dmamap)) {
 		device_printf(sc->mfi_dev, "Cannot allocate verbuf_h_dmamap memory\n");
 		return (ENOMEM);
 	}
 	bzero(sc->verbuf, MEGASAS_MAX_NAME*sizeof(bus_addr_t));
 	bus_dmamap_load(sc->verbuf_h_dmat, sc->verbuf_h_dmamap,
 	    sc->verbuf, MEGASAS_MAX_NAME*sizeof(bus_addr_t),
 	    mfi_addr_cb, &sc->verbuf_h_busaddr, 0);
 	/* End: LSIP200113393 */
 
 	/*
 	 * Get information needed for sizing the contiguous memory for the
 	 * frame pool.  Size down the sgl parameter since we know that
 	 * we will never need more than what's required for MAXPHYS.
 	 * It would be nice if these constants were available at runtime
 	 * instead of compile time.
 	 */
 	status = sc->mfi_read_fw_status(sc);
 	max_fw_cmds = status & MFI_FWSTATE_MAXCMD_MASK;
 	if (mfi_max_cmds > 0 && mfi_max_cmds < max_fw_cmds) {
 		device_printf(sc->mfi_dev, "FW MaxCmds = %d, limiting to %d\n",
 		    max_fw_cmds, mfi_max_cmds);
 		sc->mfi_max_fw_cmds = mfi_max_cmds;
 	} else {
 		sc->mfi_max_fw_cmds = max_fw_cmds;
 	}
 	max_fw_sge = (status & MFI_FWSTATE_MAXSGL_MASK) >> 16;
 	sc->mfi_max_sge = min(max_fw_sge, ((MFI_MAXPHYS / PAGE_SIZE) + 1));
 
 	/* ThunderBolt Support get the contiguous memory */
 
 	if (sc->mfi_flags & MFI_FLAGS_TBOLT) {
 		mfi_tbolt_init_globals(sc);
 		device_printf(sc->mfi_dev, "MaxCmd = %d, Drv MaxCmd = %d, "
 		    "MaxSgl = %d, state = %#x\n", max_fw_cmds,
 		    sc->mfi_max_fw_cmds, sc->mfi_max_sge, status);
 		tb_mem_size = mfi_tbolt_get_memory_requirement(sc);
 
 		if (bus_dma_tag_create( sc->mfi_parent_dmat,	/* parent */
 				1, 0,			/* algnmnt, boundary */
 				BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				tb_mem_size,		/* maxsize */
 				1,			/* msegments */
 				tb_mem_size,		/* maxsegsize */
 				0,			/* flags */
 				NULL, NULL,		/* lockfunc, lockarg */
 				&sc->mfi_tb_dmat)) {
 			device_printf(sc->mfi_dev, "Cannot allocate comms DMA tag\n");
 			return (ENOMEM);
 		}
 		if (bus_dmamem_alloc(sc->mfi_tb_dmat, (void **)&sc->request_message_pool,
 		BUS_DMA_NOWAIT, &sc->mfi_tb_dmamap)) {
 			device_printf(sc->mfi_dev, "Cannot allocate comms memory\n");
 			return (ENOMEM);
 		}
 		bzero(sc->request_message_pool, tb_mem_size);
 		bus_dmamap_load(sc->mfi_tb_dmat, sc->mfi_tb_dmamap,
 		sc->request_message_pool, tb_mem_size, mfi_addr_cb, &sc->mfi_tb_busaddr, 0);
 
 		/* For ThunderBolt memory init */
 		if (bus_dma_tag_create( sc->mfi_parent_dmat,	/* parent */
 				0x100, 0,		/* alignmnt, boundary */
 				BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				MFI_FRAME_SIZE,		/* maxsize */
 				1,			/* msegments */
 				MFI_FRAME_SIZE,		/* maxsegsize */
 				0,			/* flags */
 				NULL, NULL,		/* lockfunc, lockarg */
 				&sc->mfi_tb_init_dmat)) {
 			device_printf(sc->mfi_dev, "Cannot allocate init DMA tag\n");
 			return (ENOMEM);
 		}
 		if (bus_dmamem_alloc(sc->mfi_tb_init_dmat, (void **)&sc->mfi_tb_init,
 		    BUS_DMA_NOWAIT, &sc->mfi_tb_init_dmamap)) {
 			device_printf(sc->mfi_dev, "Cannot allocate init memory\n");
 			return (ENOMEM);
 		}
 		bzero(sc->mfi_tb_init, MFI_FRAME_SIZE);
 		bus_dmamap_load(sc->mfi_tb_init_dmat, sc->mfi_tb_init_dmamap,
 		sc->mfi_tb_init, MFI_FRAME_SIZE, mfi_addr_cb,
 		    &sc->mfi_tb_init_busaddr, 0);
 		if (mfi_tbolt_init_desc_pool(sc, sc->request_message_pool,
 		    tb_mem_size)) {
 			device_printf(sc->mfi_dev,
 			    "Thunderbolt pool preparation error\n");
 			return 0;
 		}
 
 		/*
 		  Allocate DMA memory mapping for MPI2 IOC Init descriptor,
 		  we are taking it different from what we have allocated for Request
 		  and reply descriptors to avoid confusion later
 		*/
 		tb_mem_size = sizeof(struct MPI2_IOC_INIT_REQUEST);
 		if (bus_dma_tag_create( sc->mfi_parent_dmat,	/* parent */
 				1, 0,			/* algnmnt, boundary */
 				BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				tb_mem_size,		/* maxsize */
 				1,			/* msegments */
 				tb_mem_size,		/* maxsegsize */
 				0,			/* flags */
 				NULL, NULL,		/* lockfunc, lockarg */
 				&sc->mfi_tb_ioc_init_dmat)) {
 			device_printf(sc->mfi_dev,
 			    "Cannot allocate comms DMA tag\n");
 			return (ENOMEM);
 		}
 		if (bus_dmamem_alloc(sc->mfi_tb_ioc_init_dmat,
 		    (void **)&sc->mfi_tb_ioc_init_desc,
 		    BUS_DMA_NOWAIT, &sc->mfi_tb_ioc_init_dmamap)) {
 			device_printf(sc->mfi_dev, "Cannot allocate comms memory\n");
 			return (ENOMEM);
 		}
 		bzero(sc->mfi_tb_ioc_init_desc, tb_mem_size);
 		bus_dmamap_load(sc->mfi_tb_ioc_init_dmat, sc->mfi_tb_ioc_init_dmamap,
 		sc->mfi_tb_ioc_init_desc, tb_mem_size, mfi_addr_cb,
 		    &sc->mfi_tb_ioc_init_busaddr, 0);
 	}
 	/*
 	 * Create the dma tag for data buffers.  Used both for block I/O
 	 * and for various internal data queries.
 	 */
 	if (bus_dma_tag_create( sc->mfi_parent_dmat,	/* parent */
 				1, 0,			/* algnmnt, boundary */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
 				sc->mfi_max_sge,	/* nsegments */
 				BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
 				BUS_DMA_ALLOCNOW,	/* flags */
 				busdma_lock_mutex,	/* lockfunc */
 				&sc->mfi_io_lock,	/* lockfuncarg */
 				&sc->mfi_buffer_dmat)) {
 		device_printf(sc->mfi_dev, "Cannot allocate buffer DMA tag\n");
 		return (ENOMEM);
 	}
 
 	/*
 	 * Allocate DMA memory for the comms queues.  Keep it under 4GB for
 	 * efficiency.  The mfi_hwcomms struct includes space for 1 reply queue
 	 * entry, so the calculated size here will be will be 1 more than
 	 * mfi_max_fw_cmds.  This is apparently a requirement of the hardware.
 	 */
 	commsz = (sizeof(uint32_t) * sc->mfi_max_fw_cmds) +
 	    sizeof(struct mfi_hwcomms);
 	if (bus_dma_tag_create( sc->mfi_parent_dmat,	/* parent */
 				1, 0,			/* algnmnt, boundary */
 				BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				commsz,			/* maxsize */
 				1,			/* msegments */
 				commsz,			/* maxsegsize */
 				0,			/* flags */
 				NULL, NULL,		/* lockfunc, lockarg */
 				&sc->mfi_comms_dmat)) {
 		device_printf(sc->mfi_dev, "Cannot allocate comms DMA tag\n");
 		return (ENOMEM);
 	}
 	if (bus_dmamem_alloc(sc->mfi_comms_dmat, (void **)&sc->mfi_comms,
 	    BUS_DMA_NOWAIT, &sc->mfi_comms_dmamap)) {
 		device_printf(sc->mfi_dev, "Cannot allocate comms memory\n");
 		return (ENOMEM);
 	}
 	bzero(sc->mfi_comms, commsz);
 	bus_dmamap_load(sc->mfi_comms_dmat, sc->mfi_comms_dmamap,
 	    sc->mfi_comms, commsz, mfi_addr_cb, &sc->mfi_comms_busaddr, 0);
 	/*
 	 * Allocate DMA memory for the command frames.  Keep them in the
 	 * lower 4GB for efficiency.  Calculate the size of the commands at
 	 * the same time; each command is one 64 byte frame plus a set of
          * additional frames for holding sg lists or other data.
 	 * The assumption here is that the SG list will start at the second
 	 * frame and not use the unused bytes in the first frame.  While this
 	 * isn't technically correct, it simplifies the calculation and allows
 	 * for command frames that might be larger than an mfi_io_frame.
 	 */
 	if (sizeof(bus_addr_t) == 8) {
 		sc->mfi_sge_size = sizeof(struct mfi_sg64);
 		sc->mfi_flags |= MFI_FLAGS_SG64;
 	} else {
 		sc->mfi_sge_size = sizeof(struct mfi_sg32);
 	}
 	if (sc->mfi_flags & MFI_FLAGS_SKINNY)
 		sc->mfi_sge_size = sizeof(struct mfi_sg_skinny);
 	frames = (sc->mfi_sge_size * sc->mfi_max_sge - 1) / MFI_FRAME_SIZE + 2;
 	sc->mfi_cmd_size = frames * MFI_FRAME_SIZE;
 	framessz = sc->mfi_cmd_size * sc->mfi_max_fw_cmds;
 	if (bus_dma_tag_create( sc->mfi_parent_dmat,	/* parent */
 				64, 0,			/* algnmnt, boundary */
 				BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				framessz,		/* maxsize */
 				1,			/* nsegments */
 				framessz,		/* maxsegsize */
 				0,			/* flags */
 				NULL, NULL,		/* lockfunc, lockarg */
 				&sc->mfi_frames_dmat)) {
 		device_printf(sc->mfi_dev, "Cannot allocate frame DMA tag\n");
 		return (ENOMEM);
 	}
 	if (bus_dmamem_alloc(sc->mfi_frames_dmat, (void **)&sc->mfi_frames,
 	    BUS_DMA_NOWAIT, &sc->mfi_frames_dmamap)) {
 		device_printf(sc->mfi_dev, "Cannot allocate frames memory\n");
 		return (ENOMEM);
 	}
 	bzero(sc->mfi_frames, framessz);
 	bus_dmamap_load(sc->mfi_frames_dmat, sc->mfi_frames_dmamap,
 	    sc->mfi_frames, framessz, mfi_addr_cb, &sc->mfi_frames_busaddr,0);
 	/*
 	 * Allocate DMA memory for the frame sense data.  Keep them in the
 	 * lower 4GB for efficiency
 	 */
 	sensesz = sc->mfi_max_fw_cmds * MFI_SENSE_LEN;
 	if (bus_dma_tag_create( sc->mfi_parent_dmat,	/* parent */
 				4, 0,			/* algnmnt, boundary */
 				BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				sensesz,		/* maxsize */
 				1,			/* nsegments */
 				sensesz,		/* maxsegsize */
 				0,			/* flags */
 				NULL, NULL,		/* lockfunc, lockarg */
 				&sc->mfi_sense_dmat)) {
 		device_printf(sc->mfi_dev, "Cannot allocate sense DMA tag\n");
 		return (ENOMEM);
 	}
 	if (bus_dmamem_alloc(sc->mfi_sense_dmat, (void **)&sc->mfi_sense,
 	    BUS_DMA_NOWAIT, &sc->mfi_sense_dmamap)) {
 		device_printf(sc->mfi_dev, "Cannot allocate sense memory\n");
 		return (ENOMEM);
 	}
 	bus_dmamap_load(sc->mfi_sense_dmat, sc->mfi_sense_dmamap,
 	    sc->mfi_sense, sensesz, mfi_addr_cb, &sc->mfi_sense_busaddr, 0);
 	if ((error = mfi_alloc_commands(sc)) != 0)
 		return (error);
 
 	/* Before moving the FW to operational state, check whether
 	 * hostmemory is required by the FW or not
 	 */
 
 	/* ThunderBolt MFI_IOC2 INIT */
 	if (sc->mfi_flags & MFI_FLAGS_TBOLT) {
 		sc->mfi_disable_intr(sc);
 		mtx_lock(&sc->mfi_io_lock);
 		if ((error = mfi_tbolt_init_MFI_queue(sc)) != 0) {
 			device_printf(sc->mfi_dev,
 			    "TB Init has failed with error %d\n",error);
 			mtx_unlock(&sc->mfi_io_lock);
 			return error;
 		}
 		mtx_unlock(&sc->mfi_io_lock);
 
 		if ((error = mfi_tbolt_alloc_cmd(sc)) != 0)
 			return error;
 		if (bus_setup_intr(sc->mfi_dev, sc->mfi_irq,
 		    INTR_MPSAFE|INTR_TYPE_BIO, NULL, mfi_intr_tbolt, sc,
 		    &sc->mfi_intr)) {
 			device_printf(sc->mfi_dev, "Cannot set up interrupt\n");
 			return (EINVAL);
 		}
 		sc->mfi_intr_ptr = mfi_intr_tbolt;
 		sc->mfi_enable_intr(sc);
 	} else {
 		if ((error = mfi_comms_init(sc)) != 0)
 			return (error);
 
 		if (bus_setup_intr(sc->mfi_dev, sc->mfi_irq,
 		    INTR_MPSAFE|INTR_TYPE_BIO, NULL, mfi_intr, sc, &sc->mfi_intr)) {
 			device_printf(sc->mfi_dev, "Cannot set up interrupt\n");
 			return (EINVAL);
 		}
 		sc->mfi_intr_ptr = mfi_intr;
 		sc->mfi_enable_intr(sc);
 	}
 	if ((error = mfi_get_controller_info(sc)) != 0)
 		return (error);
 	sc->disableOnlineCtrlReset = 0;
 
 	/* Register a config hook to probe the bus for arrays */
 	sc->mfi_ich.ich_func = mfi_startup;
 	sc->mfi_ich.ich_arg = sc;
 	if (config_intrhook_establish(&sc->mfi_ich) != 0) {
 		device_printf(sc->mfi_dev, "Cannot establish configuration "
 		    "hook\n");
 		return (EINVAL);
 	}
 	mtx_lock(&sc->mfi_io_lock);
 	if ((error = mfi_aen_setup(sc, 0), 0) != 0) {
 		mtx_unlock(&sc->mfi_io_lock);
 		return (error);
 	}
 	mtx_unlock(&sc->mfi_io_lock);
 
 	/*
 	 * Register a shutdown handler.
 	 */
 	if ((sc->mfi_eh = EVENTHANDLER_REGISTER(shutdown_final, mfi_shutdown,
 	    sc, SHUTDOWN_PRI_DEFAULT)) == NULL) {
 		device_printf(sc->mfi_dev, "Warning: shutdown event "
 		    "registration failed\n");
 	}
 
 	/*
 	 * Create the control device for doing management
 	 */
 	unit = device_get_unit(sc->mfi_dev);
 	sc->mfi_cdev = make_dev(&mfi_cdevsw, unit, UID_ROOT, GID_OPERATOR,
 	    0640, "mfi%d", unit);
 	if (unit == 0)
 		make_dev_alias_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_t,
 		    sc->mfi_cdev, "%s", "megaraid_sas_ioctl_node");
 	if (sc->mfi_cdev != NULL)
 		sc->mfi_cdev->si_drv1 = sc;
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(sc->mfi_dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->mfi_dev)),
 	    OID_AUTO, "delete_busy_volumes", CTLFLAG_RW,
 	    &sc->mfi_delete_busy_volumes, 0, "Allow removal of busy volumes");
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(sc->mfi_dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->mfi_dev)),
 	    OID_AUTO, "keep_deleted_volumes", CTLFLAG_RW,
 	    &sc->mfi_keep_deleted_volumes, 0,
 	    "Don't detach the mfid device for a busy volume that is deleted");
 
 	device_add_child(sc->mfi_dev, "mfip", -1);
 	bus_generic_attach(sc->mfi_dev);
 
 	/* Start the timeout watchdog */
 	callout_init(&sc->mfi_watchdog_callout, 1);
 	callout_reset(&sc->mfi_watchdog_callout, mfi_cmd_timeout * hz,
 	    mfi_timeout, sc);
 
 	if (sc->mfi_flags & MFI_FLAGS_TBOLT) {
 		mtx_lock(&sc->mfi_io_lock);
 		mfi_tbolt_sync_map_info(sc);
 		mtx_unlock(&sc->mfi_io_lock);
 	}
 
 	return (0);
 }
 
 static int
 mfi_alloc_commands(struct mfi_softc *sc)
 {
 	struct mfi_command *cm;
 	int i, j;
 
 	/*
 	 * XXX Should we allocate all the commands up front, or allocate on
 	 * demand later like 'aac' does?
 	 */
 	sc->mfi_commands = malloc(sizeof(sc->mfi_commands[0]) *
 	    sc->mfi_max_fw_cmds, M_MFIBUF, M_WAITOK | M_ZERO);
 
 	for (i = 0; i < sc->mfi_max_fw_cmds; i++) {
 		cm = &sc->mfi_commands[i];
 		cm->cm_frame = (union mfi_frame *)((uintptr_t)sc->mfi_frames +
 		    sc->mfi_cmd_size * i);
 		cm->cm_frame_busaddr = sc->mfi_frames_busaddr +
 		    sc->mfi_cmd_size * i;
 		cm->cm_frame->header.context = i;
 		cm->cm_sense = &sc->mfi_sense[i];
 		cm->cm_sense_busaddr= sc->mfi_sense_busaddr + MFI_SENSE_LEN * i;
 		cm->cm_sc = sc;
 		cm->cm_index = i;
 		if (bus_dmamap_create(sc->mfi_buffer_dmat, 0,
 		    &cm->cm_dmamap) == 0) {
 			mtx_lock(&sc->mfi_io_lock);
 			mfi_release_command(cm);
 			mtx_unlock(&sc->mfi_io_lock);
 		} else {
 			device_printf(sc->mfi_dev, "Failed to allocate %d "
 			   "command blocks, only allocated %d\n",
 			    sc->mfi_max_fw_cmds, i - 1);
 			for (j = 0; j < i; j++) {
 				cm = &sc->mfi_commands[i];
 				bus_dmamap_destroy(sc->mfi_buffer_dmat,
 				    cm->cm_dmamap);
 			}
 			free(sc->mfi_commands, M_MFIBUF);
 			sc->mfi_commands = NULL;
 
 			return (ENOMEM);
 		}
 	}
 
 	return (0);
 }
 
 void
 mfi_release_command(struct mfi_command *cm)
 {
 	struct mfi_frame_header *hdr;
 	uint32_t *hdr_data;
 
 	mtx_assert(&cm->cm_sc->mfi_io_lock, MA_OWNED);
 
 	/*
 	 * Zero out the important fields of the frame, but make sure the
 	 * context field is preserved.  For efficiency, handle the fields
 	 * as 32 bit words.  Clear out the first S/G entry too for safety.
 	 */
 	hdr = &cm->cm_frame->header;
 	if (cm->cm_data != NULL && hdr->sg_count) {
 		cm->cm_sg->sg32[0].len = 0;
 		cm->cm_sg->sg32[0].addr = 0;
 	}
 
 	/*
 	 * Command may be on other queues e.g. busy queue depending on the
 	 * flow of a previous call to mfi_mapcmd, so ensure its dequeued
 	 * properly
 	 */
 	if ((cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0)
 		mfi_remove_busy(cm);
 	if ((cm->cm_flags & MFI_ON_MFIQ_READY) != 0)
 		mfi_remove_ready(cm);
 
 	/* We're not expecting it to be on any other queue but check */
 	if ((cm->cm_flags & MFI_ON_MFIQ_MASK) != 0) {
 		panic("Command %p is still on another queue, flags = %#x",
 		    cm, cm->cm_flags);
 	}
 
 	/* tbolt cleanup */
 	if ((cm->cm_flags & MFI_CMD_TBOLT) != 0) {
 		mfi_tbolt_return_cmd(cm->cm_sc,
 		    cm->cm_sc->mfi_cmd_pool_tbolt[cm->cm_extra_frames - 1],
 		    cm);
 	}
 
 	hdr_data = (uint32_t *)cm->cm_frame;
 	hdr_data[0] = 0;	/* cmd, sense_len, cmd_status, scsi_status */
 	hdr_data[1] = 0;	/* target_id, lun_id, cdb_len, sg_count */
 	hdr_data[4] = 0;	/* flags, timeout */
 	hdr_data[5] = 0;	/* data_len */
 
 	cm->cm_extra_frames = 0;
 	cm->cm_flags = 0;
 	cm->cm_complete = NULL;
 	cm->cm_private = NULL;
 	cm->cm_data = NULL;
 	cm->cm_sg = 0;
 	cm->cm_total_frame_size = 0;
 	cm->retry_for_fw_reset = 0;
 
 	mfi_enqueue_free(cm);
 }
 
 int
 mfi_dcmd_command(struct mfi_softc *sc, struct mfi_command **cmp,
     uint32_t opcode, void **bufp, size_t bufsize)
 {
 	struct mfi_command *cm;
 	struct mfi_dcmd_frame *dcmd;
 	void *buf = NULL;
 	uint32_t context = 0;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	cm = mfi_dequeue_free(sc);
 	if (cm == NULL)
 		return (EBUSY);
 
 	/* Zero out the MFI frame */
 	context = cm->cm_frame->header.context;
 	bzero(cm->cm_frame, sizeof(union mfi_frame));
 	cm->cm_frame->header.context = context;
 
 	if ((bufsize > 0) && (bufp != NULL)) {
 		if (*bufp == NULL) {
 			buf = malloc(bufsize, M_MFIBUF, M_NOWAIT|M_ZERO);
 			if (buf == NULL) {
 				mfi_release_command(cm);
 				return (ENOMEM);
 			}
 			*bufp = buf;
 		} else {
 			buf = *bufp;
 		}
 	}
 
 	dcmd =  &cm->cm_frame->dcmd;
 	bzero(dcmd->mbox, MFI_MBOX_SIZE);
 	dcmd->header.cmd = MFI_CMD_DCMD;
 	dcmd->header.timeout = 0;
 	dcmd->header.flags = 0;
 	dcmd->header.data_len = bufsize;
 	dcmd->header.scsi_status = 0;
 	dcmd->opcode = opcode;
 	cm->cm_sg = &dcmd->sgl;
 	cm->cm_total_frame_size = MFI_DCMD_FRAME_SIZE;
 	cm->cm_flags = 0;
 	cm->cm_data = buf;
 	cm->cm_private = buf;
 	cm->cm_len = bufsize;
 
 	*cmp = cm;
 	if ((bufp != NULL) && (*bufp == NULL) && (buf != NULL))
 		*bufp = buf;
 	return (0);
 }
 
 static int
 mfi_comms_init(struct mfi_softc *sc)
 {
 	struct mfi_command *cm;
 	struct mfi_init_frame *init;
 	struct mfi_init_qinfo *qinfo;
 	int error;
 	uint32_t context = 0;
 
 	mtx_lock(&sc->mfi_io_lock);
 	if ((cm = mfi_dequeue_free(sc)) == NULL) {
 		mtx_unlock(&sc->mfi_io_lock);
 		return (EBUSY);
 	}
 
 	/* Zero out the MFI frame */
 	context = cm->cm_frame->header.context;
 	bzero(cm->cm_frame, sizeof(union mfi_frame));
 	cm->cm_frame->header.context = context;
 
 	/*
 	 * Abuse the SG list area of the frame to hold the init_qinfo
 	 * object;
 	 */
 	init = &cm->cm_frame->init;
 	qinfo = (struct mfi_init_qinfo *)((uintptr_t)init + MFI_FRAME_SIZE);
 
 	bzero(qinfo, sizeof(struct mfi_init_qinfo));
 	qinfo->rq_entries = sc->mfi_max_fw_cmds + 1;
 	qinfo->rq_addr_lo = sc->mfi_comms_busaddr +
 	    offsetof(struct mfi_hwcomms, hw_reply_q);
 	qinfo->pi_addr_lo = sc->mfi_comms_busaddr +
 	    offsetof(struct mfi_hwcomms, hw_pi);
 	qinfo->ci_addr_lo = sc->mfi_comms_busaddr +
 	    offsetof(struct mfi_hwcomms, hw_ci);
 
 	init->header.cmd = MFI_CMD_INIT;
 	init->header.data_len = sizeof(struct mfi_init_qinfo);
 	init->qinfo_new_addr_lo = cm->cm_frame_busaddr + MFI_FRAME_SIZE;
 	cm->cm_data = NULL;
 	cm->cm_flags = MFI_CMD_POLLED;
 
 	if ((error = mfi_mapcmd(sc, cm)) != 0)
 		device_printf(sc->mfi_dev, "failed to send init command\n");
 	mfi_release_command(cm);
 	mtx_unlock(&sc->mfi_io_lock);
 
 	return (error);
 }
 
 static int
 mfi_get_controller_info(struct mfi_softc *sc)
 {
 	struct mfi_command *cm = NULL;
 	struct mfi_ctrl_info *ci = NULL;
 	uint32_t max_sectors_1, max_sectors_2;
 	int error;
 
 	mtx_lock(&sc->mfi_io_lock);
 	error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_GETINFO,
 	    (void **)&ci, sizeof(*ci));
 	if (error)
 		goto out;
 	cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED;
 
 	if ((error = mfi_mapcmd(sc, cm)) != 0) {
 		device_printf(sc->mfi_dev, "Failed to get controller info\n");
 		sc->mfi_max_io = (sc->mfi_max_sge - 1) * PAGE_SIZE /
 		    MFI_SECTOR_LEN;
 		error = 0;
 		goto out;
 	}
 
 	bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
 	    BUS_DMASYNC_POSTREAD);
 	bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
 
 	max_sectors_1 = (1 << ci->stripe_sz_ops.max) * ci->max_strips_per_io;
 	max_sectors_2 = ci->max_request_size;
 	sc->mfi_max_io = min(max_sectors_1, max_sectors_2);
 	sc->disableOnlineCtrlReset =
 	    ci->properties.OnOffProperties.disableOnlineCtrlReset;
 
 out:
 	if (ci)
 		free(ci, M_MFIBUF);
 	if (cm)
 		mfi_release_command(cm);
 	mtx_unlock(&sc->mfi_io_lock);
 	return (error);
 }
 
 static int
 mfi_get_log_state(struct mfi_softc *sc, struct mfi_evt_log_state **log_state)
 {
 	struct mfi_command *cm = NULL;
 	int error;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 	error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_EVENT_GETINFO,
 	    (void **)log_state, sizeof(**log_state));
 	if (error)
 		goto out;
 	cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED;
 
 	if ((error = mfi_mapcmd(sc, cm)) != 0) {
 		device_printf(sc->mfi_dev, "Failed to get log state\n");
 		goto out;
 	}
 
 	bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
 	    BUS_DMASYNC_POSTREAD);
 	bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
 
 out:
 	if (cm)
 		mfi_release_command(cm);
 
 	return (error);
 }
 
 int
 mfi_aen_setup(struct mfi_softc *sc, uint32_t seq_start)
 {
 	struct mfi_evt_log_state *log_state = NULL;
 	union mfi_evt class_locale;
 	int error = 0;
 	uint32_t seq;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	class_locale.members.reserved = 0;
 	class_locale.members.locale = mfi_event_locale;
 	class_locale.members.evt_class  = mfi_event_class;
 
 	if (seq_start == 0) {
 		if ((error = mfi_get_log_state(sc, &log_state)) != 0)
 			goto out;
 		sc->mfi_boot_seq_num = log_state->boot_seq_num;
 
 		/*
 		 * Walk through any events that fired since the last
 		 * shutdown.
 		 */
 		if ((error = mfi_parse_entries(sc, log_state->shutdown_seq_num,
 		    log_state->newest_seq_num)) != 0)
 			goto out;
 		seq = log_state->newest_seq_num;
 	} else
 		seq = seq_start;
 	error = mfi_aen_register(sc, seq, class_locale.word);
 out:
 	free(log_state, M_MFIBUF);
 
 	return (error);
 }
 
 int
 mfi_wait_command(struct mfi_softc *sc, struct mfi_command *cm)
 {
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 	cm->cm_complete = NULL;
 
 	/*
 	 * MegaCli can issue a DCMD of 0.  In this case do nothing
 	 * and return 0 to it as status
 	 */
 	if (cm->cm_frame->dcmd.opcode == 0) {
 		cm->cm_frame->header.cmd_status = MFI_STAT_OK;
 		cm->cm_error = 0;
 		return (cm->cm_error);
 	}
 	mfi_enqueue_ready(cm);
 	mfi_startio(sc);
 	if ((cm->cm_flags & MFI_CMD_COMPLETED) == 0)
 		msleep(cm, &sc->mfi_io_lock, PRIBIO, "mfiwait", 0);
 	return (cm->cm_error);
 }
 
 void
 mfi_free(struct mfi_softc *sc)
 {
 	struct mfi_command *cm;
 	int i;
 
 	callout_drain(&sc->mfi_watchdog_callout);
 
 	if (sc->mfi_cdev != NULL)
 		destroy_dev(sc->mfi_cdev);
 
 	if (sc->mfi_commands != NULL) {
 		for (i = 0; i < sc->mfi_max_fw_cmds; i++) {
 			cm = &sc->mfi_commands[i];
 			bus_dmamap_destroy(sc->mfi_buffer_dmat, cm->cm_dmamap);
 		}
 		free(sc->mfi_commands, M_MFIBUF);
 		sc->mfi_commands = NULL;
 	}
 
 	if (sc->mfi_intr)
 		bus_teardown_intr(sc->mfi_dev, sc->mfi_irq, sc->mfi_intr);
 	if (sc->mfi_irq != NULL)
 		bus_release_resource(sc->mfi_dev, SYS_RES_IRQ, sc->mfi_irq_rid,
 		    sc->mfi_irq);
 
 	if (sc->mfi_sense_busaddr != 0)
 		bus_dmamap_unload(sc->mfi_sense_dmat, sc->mfi_sense_dmamap);
 	if (sc->mfi_sense != NULL)
 		bus_dmamem_free(sc->mfi_sense_dmat, sc->mfi_sense,
 		    sc->mfi_sense_dmamap);
 	if (sc->mfi_sense_dmat != NULL)
 		bus_dma_tag_destroy(sc->mfi_sense_dmat);
 
 	if (sc->mfi_frames_busaddr != 0)
 		bus_dmamap_unload(sc->mfi_frames_dmat, sc->mfi_frames_dmamap);
 	if (sc->mfi_frames != NULL)
 		bus_dmamem_free(sc->mfi_frames_dmat, sc->mfi_frames,
 		    sc->mfi_frames_dmamap);
 	if (sc->mfi_frames_dmat != NULL)
 		bus_dma_tag_destroy(sc->mfi_frames_dmat);
 
 	if (sc->mfi_comms_busaddr != 0)
 		bus_dmamap_unload(sc->mfi_comms_dmat, sc->mfi_comms_dmamap);
 	if (sc->mfi_comms != NULL)
 		bus_dmamem_free(sc->mfi_comms_dmat, sc->mfi_comms,
 		    sc->mfi_comms_dmamap);
 	if (sc->mfi_comms_dmat != NULL)
 		bus_dma_tag_destroy(sc->mfi_comms_dmat);
 
 	/* ThunderBolt contiguous memory free here */
 	if (sc->mfi_flags & MFI_FLAGS_TBOLT) {
 		if (sc->mfi_tb_busaddr != 0)
 			bus_dmamap_unload(sc->mfi_tb_dmat, sc->mfi_tb_dmamap);
 		if (sc->request_message_pool != NULL)
 			bus_dmamem_free(sc->mfi_tb_dmat, sc->request_message_pool,
 			    sc->mfi_tb_dmamap);
 		if (sc->mfi_tb_dmat != NULL)
 			bus_dma_tag_destroy(sc->mfi_tb_dmat);
 
 		/* Version buffer memory free */
 		/* Start LSIP200113393 */
 		if (sc->verbuf_h_busaddr != 0)
 			bus_dmamap_unload(sc->verbuf_h_dmat, sc->verbuf_h_dmamap);
 		if (sc->verbuf != NULL)
 			bus_dmamem_free(sc->verbuf_h_dmat, sc->verbuf,
 			    sc->verbuf_h_dmamap);
 		if (sc->verbuf_h_dmat != NULL)
 			bus_dma_tag_destroy(sc->verbuf_h_dmat);
 
 		/* End LSIP200113393 */
 		/* ThunderBolt INIT packet memory Free */
 		if (sc->mfi_tb_init_busaddr != 0)
 			bus_dmamap_unload(sc->mfi_tb_init_dmat,
 			    sc->mfi_tb_init_dmamap);
 		if (sc->mfi_tb_init != NULL)
 			bus_dmamem_free(sc->mfi_tb_init_dmat, sc->mfi_tb_init,
 			    sc->mfi_tb_init_dmamap);
 		if (sc->mfi_tb_init_dmat != NULL)
 			bus_dma_tag_destroy(sc->mfi_tb_init_dmat);
 
 		/* ThunderBolt IOC Init Desc memory free here */
 		if (sc->mfi_tb_ioc_init_busaddr != 0)
 			bus_dmamap_unload(sc->mfi_tb_ioc_init_dmat,
 			    sc->mfi_tb_ioc_init_dmamap);
 		if (sc->mfi_tb_ioc_init_desc != NULL)
 			bus_dmamem_free(sc->mfi_tb_ioc_init_dmat,
 			    sc->mfi_tb_ioc_init_desc,
 			    sc->mfi_tb_ioc_init_dmamap);
 		if (sc->mfi_tb_ioc_init_dmat != NULL)
 			bus_dma_tag_destroy(sc->mfi_tb_ioc_init_dmat);
 		if (sc->mfi_cmd_pool_tbolt != NULL) {
 			for (int i = 0; i < sc->mfi_max_fw_cmds; i++) {
 				if (sc->mfi_cmd_pool_tbolt[i] != NULL) {
 					free(sc->mfi_cmd_pool_tbolt[i],
 					    M_MFIBUF);
 					sc->mfi_cmd_pool_tbolt[i] = NULL;
 				}
 			}
 			free(sc->mfi_cmd_pool_tbolt, M_MFIBUF);
 			sc->mfi_cmd_pool_tbolt = NULL;
 		}
 		if (sc->request_desc_pool != NULL) {
 			free(sc->request_desc_pool, M_MFIBUF);
 			sc->request_desc_pool = NULL;
 		}
 	}
 	if (sc->mfi_buffer_dmat != NULL)
 		bus_dma_tag_destroy(sc->mfi_buffer_dmat);
 	if (sc->mfi_parent_dmat != NULL)
 		bus_dma_tag_destroy(sc->mfi_parent_dmat);
 
 	if (mtx_initialized(&sc->mfi_io_lock)) {
 		mtx_destroy(&sc->mfi_io_lock);
 		sx_destroy(&sc->mfi_config_lock);
 	}
 
 	return;
 }
 
 static void
 mfi_startup(void *arg)
 {
 	struct mfi_softc *sc;
 
 	sc = (struct mfi_softc *)arg;
 
 	config_intrhook_disestablish(&sc->mfi_ich);
 
 	sc->mfi_enable_intr(sc);
 	sx_xlock(&sc->mfi_config_lock);
 	mtx_lock(&sc->mfi_io_lock);
 	mfi_ldprobe(sc);
 	if (sc->mfi_flags & MFI_FLAGS_SKINNY)
 	    mfi_syspdprobe(sc);
 	mtx_unlock(&sc->mfi_io_lock);
 	sx_xunlock(&sc->mfi_config_lock);
 }
 
 static void
 mfi_intr(void *arg)
 {
 	struct mfi_softc *sc;
 	struct mfi_command *cm;
 	uint32_t pi, ci, context;
 
 	sc = (struct mfi_softc *)arg;
 
 	if (sc->mfi_check_clear_intr(sc))
 		return;
 
 restart:
 	pi = sc->mfi_comms->hw_pi;
 	ci = sc->mfi_comms->hw_ci;
 	mtx_lock(&sc->mfi_io_lock);
 	while (ci != pi) {
 		context = sc->mfi_comms->hw_reply_q[ci];
 		if (context < sc->mfi_max_fw_cmds) {
 			cm = &sc->mfi_commands[context];
 			mfi_remove_busy(cm);
 			cm->cm_error = 0;
 			mfi_complete(sc, cm);
 		}
 		if (++ci == (sc->mfi_max_fw_cmds + 1))
 			ci = 0;
 	}
 
 	sc->mfi_comms->hw_ci = ci;
 
 	/* Give defered I/O a chance to run */
 	sc->mfi_flags &= ~MFI_FLAGS_QFRZN;
 	mfi_startio(sc);
 	mtx_unlock(&sc->mfi_io_lock);
 
 	/*
 	 * Dummy read to flush the bus; this ensures that the indexes are up
 	 * to date.  Restart processing if more commands have come it.
 	 */
 	(void)sc->mfi_read_fw_status(sc);
 	if (pi != sc->mfi_comms->hw_pi)
 		goto restart;
 
 	return;
 }
 
 int
 mfi_shutdown(struct mfi_softc *sc)
 {
 	struct mfi_dcmd_frame *dcmd;
 	struct mfi_command *cm;
 	int error;
 
 
 	if (sc->mfi_aen_cm != NULL) {
 		sc->cm_aen_abort = 1;
 		mfi_abort(sc, &sc->mfi_aen_cm);
 	}
 
 	if (sc->mfi_map_sync_cm != NULL) {
 		sc->cm_map_abort = 1;
 		mfi_abort(sc, &sc->mfi_map_sync_cm);
 	}
 
 	mtx_lock(&sc->mfi_io_lock);
 	error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_SHUTDOWN, NULL, 0);
 	if (error) {
 		mtx_unlock(&sc->mfi_io_lock);
 		return (error);
 	}
 
 	dcmd = &cm->cm_frame->dcmd;
 	dcmd->header.flags = MFI_FRAME_DIR_NONE;
 	cm->cm_flags = MFI_CMD_POLLED;
 	cm->cm_data = NULL;
 
 	if ((error = mfi_mapcmd(sc, cm)) != 0)
 		device_printf(sc->mfi_dev, "Failed to shutdown controller\n");
 
 	mfi_release_command(cm);
 	mtx_unlock(&sc->mfi_io_lock);
 	return (error);
 }
 
 static void
 mfi_syspdprobe(struct mfi_softc *sc)
 {
 	struct mfi_frame_header *hdr;
 	struct mfi_command *cm = NULL;
 	struct mfi_pd_list *pdlist = NULL;
 	struct mfi_system_pd *syspd, *tmp;
 	struct mfi_system_pending *syspd_pend;
 	int error, i, found;
 
 	sx_assert(&sc->mfi_config_lock, SA_XLOCKED);
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 	/* Add SYSTEM PD's */
 	error = mfi_dcmd_command(sc, &cm, MFI_DCMD_PD_LIST_QUERY,
 	    (void **)&pdlist, sizeof(*pdlist));
 	if (error) {
 		device_printf(sc->mfi_dev,
 		    "Error while forming SYSTEM PD list\n");
 		goto out;
 	}
 
 	cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED;
 	cm->cm_frame->dcmd.mbox[0] = MR_PD_QUERY_TYPE_EXPOSED_TO_HOST;
 	cm->cm_frame->dcmd.mbox[1] = 0;
 	if (mfi_mapcmd(sc, cm) != 0) {
 		device_printf(sc->mfi_dev,
 		    "Failed to get syspd device listing\n");
 		goto out;
 	}
 	bus_dmamap_sync(sc->mfi_buffer_dmat,cm->cm_dmamap,
 	    BUS_DMASYNC_POSTREAD);
 	bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
 	hdr = &cm->cm_frame->header;
 	if (hdr->cmd_status != MFI_STAT_OK) {
 		device_printf(sc->mfi_dev,
 		    "MFI_DCMD_PD_LIST_QUERY failed %x\n", hdr->cmd_status);
 		goto out;
 	}
 	/* Get each PD and add it to the system */
 	for (i = 0; i < pdlist->count; i++) {
 		if (pdlist->addr[i].device_id ==
 		    pdlist->addr[i].encl_device_id)
 			continue;
 		found = 0;
 		TAILQ_FOREACH(syspd, &sc->mfi_syspd_tqh, pd_link) {
 			if (syspd->pd_id == pdlist->addr[i].device_id)
 				found = 1;
 		}
 		TAILQ_FOREACH(syspd_pend, &sc->mfi_syspd_pend_tqh, pd_link) {
 			if (syspd_pend->pd_id == pdlist->addr[i].device_id)
 				found = 1;
 		}
 		if (found == 0)
 			mfi_add_sys_pd(sc, pdlist->addr[i].device_id);
 	}
 	/* Delete SYSPD's whose state has been changed */
 	TAILQ_FOREACH_SAFE(syspd, &sc->mfi_syspd_tqh, pd_link, tmp) {
 		found = 0;
 		for (i = 0; i < pdlist->count; i++) {
 			if (syspd->pd_id == pdlist->addr[i].device_id) {
 				found = 1;
 				break;
 			}
 		}
 		if (found == 0) {
 			printf("DELETE\n");
 			mtx_unlock(&sc->mfi_io_lock);
 			mtx_lock(&Giant);
 			device_delete_child(sc->mfi_dev, syspd->pd_dev);
 			mtx_unlock(&Giant);
 			mtx_lock(&sc->mfi_io_lock);
 		}
 	}
 out:
 	if (pdlist)
 	    free(pdlist, M_MFIBUF);
 	if (cm)
 	    mfi_release_command(cm);
 
 	return;
 }
 
 static void
 mfi_ldprobe(struct mfi_softc *sc)
 {
 	struct mfi_frame_header *hdr;
 	struct mfi_command *cm = NULL;
 	struct mfi_ld_list *list = NULL;
 	struct mfi_disk *ld;
 	struct mfi_disk_pending *ld_pend;
 	int error, i;
 
 	sx_assert(&sc->mfi_config_lock, SA_XLOCKED);
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	error = mfi_dcmd_command(sc, &cm, MFI_DCMD_LD_GET_LIST,
 	    (void **)&list, sizeof(*list));
 	if (error)
 		goto out;
 
 	cm->cm_flags = MFI_CMD_DATAIN;
 	if (mfi_wait_command(sc, cm) != 0) {
 		device_printf(sc->mfi_dev, "Failed to get device listing\n");
 		goto out;
 	}
 
 	hdr = &cm->cm_frame->header;
 	if (hdr->cmd_status != MFI_STAT_OK) {
 		device_printf(sc->mfi_dev, "MFI_DCMD_LD_GET_LIST failed %x\n",
 		    hdr->cmd_status);
 		goto out;
 	}
 
 	for (i = 0; i < list->ld_count; i++) {
 		TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) {
 			if (ld->ld_id == list->ld_list[i].ld.v.target_id)
 				goto skip_add;
 		}
 		TAILQ_FOREACH(ld_pend, &sc->mfi_ld_pend_tqh, ld_link) {
 			if (ld_pend->ld_id == list->ld_list[i].ld.v.target_id)
 				goto skip_add;
 		}
 		mfi_add_ld(sc, list->ld_list[i].ld.v.target_id);
 	skip_add:;
 	}
 out:
 	if (list)
 		free(list, M_MFIBUF);
 	if (cm)
 		mfi_release_command(cm);
 
 	return;
 }
 
 /*
  * The timestamp is the number of seconds since 00:00 Jan 1, 2000.  If
  * the bits in 24-31 are all set, then it is the number of seconds since
  * boot.
  */
 static const char *
 format_timestamp(uint32_t timestamp)
 {
 	static char buffer[32];
 
 	if ((timestamp & 0xff000000) == 0xff000000)
 		snprintf(buffer, sizeof(buffer), "boot + %us", timestamp &
 		    0x00ffffff);
 	else
 		snprintf(buffer, sizeof(buffer), "%us", timestamp);
 	return (buffer);
 }
 
 static const char *
 format_class(int8_t class)
 {
 	static char buffer[6];
 
 	switch (class) {
 	case MFI_EVT_CLASS_DEBUG:
 		return ("debug");
 	case MFI_EVT_CLASS_PROGRESS:
 		return ("progress");
 	case MFI_EVT_CLASS_INFO:
 		return ("info");
 	case MFI_EVT_CLASS_WARNING:
 		return ("WARN");
 	case MFI_EVT_CLASS_CRITICAL:
 		return ("CRIT");
 	case MFI_EVT_CLASS_FATAL:
 		return ("FATAL");
 	case MFI_EVT_CLASS_DEAD:
 		return ("DEAD");
 	default:
 		snprintf(buffer, sizeof(buffer), "%d", class);
 		return (buffer);
 	}
 }
 
 static void
 mfi_decode_evt(struct mfi_softc *sc, struct mfi_evt_detail *detail)
 {
 	struct mfi_system_pd *syspd = NULL;
 
 	device_printf(sc->mfi_dev, "%d (%s/0x%04x/%s) - %s\n", detail->seq,
 	    format_timestamp(detail->time), detail->evt_class.members.locale,
 	    format_class(detail->evt_class.members.evt_class),
 	    detail->description);
 
         /* Don't act on old AEN's or while shutting down */
         if (detail->seq < sc->mfi_boot_seq_num || sc->mfi_detaching)
                 return;
 
 	switch (detail->arg_type) {
 	case MR_EVT_ARGS_NONE:
 		if (detail->code == MR_EVT_CTRL_HOST_BUS_SCAN_REQUESTED) {
 		    device_printf(sc->mfi_dev, "HostBus scan raised\n");
 			if (mfi_detect_jbod_change) {
 				/*
 				 * Probe for new SYSPD's and Delete
 				 * invalid SYSPD's
 				 */
 				sx_xlock(&sc->mfi_config_lock);
 				mtx_lock(&sc->mfi_io_lock);
 				mfi_syspdprobe(sc);
 				mtx_unlock(&sc->mfi_io_lock);
 				sx_xunlock(&sc->mfi_config_lock);
 			}
 		}
 		break;
 	case MR_EVT_ARGS_LD_STATE:
 		/* During load time driver reads all the events starting
 		 * from the one that has been logged after shutdown. Avoid
 		 * these old events.
 		 */
 		if (detail->args.ld_state.new_state == MFI_LD_STATE_OFFLINE ) {
 			/* Remove the LD */
 			struct mfi_disk *ld;
 			TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) {
 				if (ld->ld_id ==
 				    detail->args.ld_state.ld.target_id)
 					break;
 			}
 			/*
 			Fix: for kernel panics when SSCD is removed
 			KASSERT(ld != NULL, ("volume dissappeared"));
 			*/
 			if (ld != NULL) {
 				mtx_lock(&Giant);
 				device_delete_child(sc->mfi_dev, ld->ld_dev);
 				mtx_unlock(&Giant);
 			}
 		}
 		break;
 	case MR_EVT_ARGS_PD:
 		if (detail->code == MR_EVT_PD_REMOVED) {
 			if (mfi_detect_jbod_change) {
 				/*
 				 * If the removed device is a SYSPD then
 				 * delete it
 				 */
 				TAILQ_FOREACH(syspd, &sc->mfi_syspd_tqh,
 				    pd_link) {
 					if (syspd->pd_id ==
 					    detail->args.pd.device_id) {
 						mtx_lock(&Giant);
 						device_delete_child(
 						    sc->mfi_dev,
 						    syspd->pd_dev);
 						mtx_unlock(&Giant);
 						break;
 					}
 				}
 			}
 		}
 		if (detail->code == MR_EVT_PD_INSERTED) {
 			if (mfi_detect_jbod_change) {
 				/* Probe for new SYSPD's */
 				sx_xlock(&sc->mfi_config_lock);
 				mtx_lock(&sc->mfi_io_lock);
 				mfi_syspdprobe(sc);
 				mtx_unlock(&sc->mfi_io_lock);
 				sx_xunlock(&sc->mfi_config_lock);
 			}
 		}
 		if (sc->mfi_cam_rescan_cb != NULL &&
 		    (detail->code == MR_EVT_PD_INSERTED ||
 		    detail->code == MR_EVT_PD_REMOVED)) {
 			sc->mfi_cam_rescan_cb(sc, detail->args.pd.device_id);
 		}
 		break;
 	}
 }
 
 static void
 mfi_queue_evt(struct mfi_softc *sc, struct mfi_evt_detail *detail)
 {
 	struct mfi_evt_queue_elm *elm;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 	elm = malloc(sizeof(*elm), M_MFIBUF, M_NOWAIT|M_ZERO);
 	if (elm == NULL)
 		return;
 	memcpy(&elm->detail, detail, sizeof(*detail));
 	TAILQ_INSERT_TAIL(&sc->mfi_evt_queue, elm, link);
 	taskqueue_enqueue(taskqueue_swi, &sc->mfi_evt_task);
 }
 
 static void
 mfi_handle_evt(void *context, int pending)
 {
 	TAILQ_HEAD(,mfi_evt_queue_elm) queue;
 	struct mfi_softc *sc;
 	struct mfi_evt_queue_elm *elm;
 
 	sc = context;
 	TAILQ_INIT(&queue);
 	mtx_lock(&sc->mfi_io_lock);
 	TAILQ_CONCAT(&queue, &sc->mfi_evt_queue, link);
 	mtx_unlock(&sc->mfi_io_lock);
 	while ((elm = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, elm, link);
 		mfi_decode_evt(sc, &elm->detail);
 		free(elm, M_MFIBUF);
 	}
 }
 
 static int
 mfi_aen_register(struct mfi_softc *sc, int seq, int locale)
 {
 	struct mfi_command *cm;
 	struct mfi_dcmd_frame *dcmd;
 	union mfi_evt current_aen, prior_aen;
 	struct mfi_evt_detail *ed = NULL;
 	int error = 0;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	current_aen.word = locale;
 	if (sc->mfi_aen_cm != NULL) {
 		prior_aen.word =
 		    ((uint32_t *)&sc->mfi_aen_cm->cm_frame->dcmd.mbox)[1];
 		if (prior_aen.members.evt_class <= current_aen.members.evt_class &&
 		    !((prior_aen.members.locale & current_aen.members.locale)
 		    ^current_aen.members.locale)) {
 			return (0);
 		} else {
 			prior_aen.members.locale |= current_aen.members.locale;
 			if (prior_aen.members.evt_class
 			    < current_aen.members.evt_class)
 				current_aen.members.evt_class =
 				    prior_aen.members.evt_class;
 			mfi_abort(sc, &sc->mfi_aen_cm);
 		}
 	}
 
 	error = mfi_dcmd_command(sc, &cm, MFI_DCMD_CTRL_EVENT_WAIT,
 	    (void **)&ed, sizeof(*ed));
 	if (error)
 		goto out;
 
 	dcmd = &cm->cm_frame->dcmd;
 	((uint32_t *)&dcmd->mbox)[0] = seq;
 	((uint32_t *)&dcmd->mbox)[1] = locale;
 	cm->cm_flags = MFI_CMD_DATAIN;
 	cm->cm_complete = mfi_aen_complete;
 
 	sc->last_seq_num = seq;
 	sc->mfi_aen_cm = cm;
 
 	mfi_enqueue_ready(cm);
 	mfi_startio(sc);
 
 out:
 	return (error);
 }
 
 static void
 mfi_aen_complete(struct mfi_command *cm)
 {
 	struct mfi_frame_header *hdr;
 	struct mfi_softc *sc;
 	struct mfi_evt_detail *detail;
 	struct mfi_aen *mfi_aen_entry, *tmp;
 	int seq = 0, aborted = 0;
 
 	sc = cm->cm_sc;
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	if (sc->mfi_aen_cm == NULL)
 		return;
 
 	hdr = &cm->cm_frame->header;
 
 	if (sc->cm_aen_abort ||
 	    hdr->cmd_status == MFI_STAT_INVALID_STATUS) {
 		sc->cm_aen_abort = 0;
 		aborted = 1;
 	} else {
 		sc->mfi_aen_triggered = 1;
 		if (sc->mfi_poll_waiting) {
 			sc->mfi_poll_waiting = 0;
 			selwakeup(&sc->mfi_select);
 		}
 		detail = cm->cm_data;
 		mfi_queue_evt(sc, detail);
 		seq = detail->seq + 1;
 		TAILQ_FOREACH_SAFE(mfi_aen_entry, &sc->mfi_aen_pids, aen_link,
 		    tmp) {
 			TAILQ_REMOVE(&sc->mfi_aen_pids, mfi_aen_entry,
 			    aen_link);
 			PROC_LOCK(mfi_aen_entry->p);
 			kern_psignal(mfi_aen_entry->p, SIGIO);
 			PROC_UNLOCK(mfi_aen_entry->p);
 			free(mfi_aen_entry, M_MFIBUF);
 		}
 	}
 
 	free(cm->cm_data, M_MFIBUF);
 	wakeup(&sc->mfi_aen_cm);
 	sc->mfi_aen_cm = NULL;
 	mfi_release_command(cm);
 
 	/* set it up again so the driver can catch more events */
 	if (!aborted)
 		mfi_aen_setup(sc, seq);
 }
 
 #define MAX_EVENTS 15
 
 static int
 mfi_parse_entries(struct mfi_softc *sc, int start_seq, int stop_seq)
 {
 	struct mfi_command *cm;
 	struct mfi_dcmd_frame *dcmd;
 	struct mfi_evt_list *el;
 	union mfi_evt class_locale;
 	int error, i, seq, size;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	class_locale.members.reserved = 0;
 	class_locale.members.locale = mfi_event_locale;
 	class_locale.members.evt_class  = mfi_event_class;
 
 	size = sizeof(struct mfi_evt_list) + sizeof(struct mfi_evt_detail)
 		* (MAX_EVENTS - 1);
 	el = malloc(size, M_MFIBUF, M_NOWAIT | M_ZERO);
 	if (el == NULL)
 		return (ENOMEM);
 
 	for (seq = start_seq;;) {
 		if ((cm = mfi_dequeue_free(sc)) == NULL) {
 			free(el, M_MFIBUF);
 			return (EBUSY);
 		}
 
 		dcmd = &cm->cm_frame->dcmd;
 		bzero(dcmd->mbox, MFI_MBOX_SIZE);
 		dcmd->header.cmd = MFI_CMD_DCMD;
 		dcmd->header.timeout = 0;
 		dcmd->header.data_len = size;
 		dcmd->opcode = MFI_DCMD_CTRL_EVENT_GET;
 		((uint32_t *)&dcmd->mbox)[0] = seq;
 		((uint32_t *)&dcmd->mbox)[1] = class_locale.word;
 		cm->cm_sg = &dcmd->sgl;
 		cm->cm_total_frame_size = MFI_DCMD_FRAME_SIZE;
 		cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED;
 		cm->cm_data = el;
 		cm->cm_len = size;
 
 		if ((error = mfi_mapcmd(sc, cm)) != 0) {
 			device_printf(sc->mfi_dev,
 			    "Failed to get controller entries\n");
 			mfi_release_command(cm);
 			break;
 		}
 
 		bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
 		    BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
 
 		if (dcmd->header.cmd_status == MFI_STAT_NOT_FOUND) {
 			mfi_release_command(cm);
 			break;
 		}
 		if (dcmd->header.cmd_status != MFI_STAT_OK) {
 			device_printf(sc->mfi_dev,
 			    "Error %d fetching controller entries\n",
 			    dcmd->header.cmd_status);
 			mfi_release_command(cm);
 			error = EIO;
 			break;
 		}
 		mfi_release_command(cm);
 
 		for (i = 0; i < el->count; i++) {
 			/*
 			 * If this event is newer than 'stop_seq' then
 			 * break out of the loop.  Note that the log
 			 * is a circular buffer so we have to handle
 			 * the case that our stop point is earlier in
 			 * the buffer than our start point.
 			 */
 			if (el->event[i].seq >= stop_seq) {
 				if (start_seq <= stop_seq)
 					break;
 				else if (el->event[i].seq < start_seq)
 					break;
 			}
 			mfi_queue_evt(sc, &el->event[i]);
 		}
 		seq = el->event[el->count - 1].seq + 1;
 	}
 
 	free(el, M_MFIBUF);
 	return (error);
 }
 
 static int
 mfi_add_ld(struct mfi_softc *sc, int id)
 {
 	struct mfi_command *cm;
 	struct mfi_dcmd_frame *dcmd = NULL;
 	struct mfi_ld_info *ld_info = NULL;
 	struct mfi_disk_pending *ld_pend;
 	int error;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	ld_pend = malloc(sizeof(*ld_pend), M_MFIBUF, M_NOWAIT | M_ZERO);
 	if (ld_pend != NULL) {
 		ld_pend->ld_id = id;
 		TAILQ_INSERT_TAIL(&sc->mfi_ld_pend_tqh, ld_pend, ld_link);
 	}
 
 	error = mfi_dcmd_command(sc, &cm, MFI_DCMD_LD_GET_INFO,
 	    (void **)&ld_info, sizeof(*ld_info));
 	if (error) {
 		device_printf(sc->mfi_dev,
 		    "Failed to allocate for MFI_DCMD_LD_GET_INFO %d\n", error);
 		if (ld_info)
 			free(ld_info, M_MFIBUF);
 		return (error);
 	}
 	cm->cm_flags = MFI_CMD_DATAIN;
 	dcmd = &cm->cm_frame->dcmd;
 	dcmd->mbox[0] = id;
 	if (mfi_wait_command(sc, cm) != 0) {
 		device_printf(sc->mfi_dev,
 		    "Failed to get logical drive: %d\n", id);
 		free(ld_info, M_MFIBUF);
 		return (0);
 	}
 	if (ld_info->ld_config.params.isSSCD != 1)
 		mfi_add_ld_complete(cm);
 	else {
 		mfi_release_command(cm);
 		if (ld_info)		/* SSCD drives ld_info free here */
 			free(ld_info, M_MFIBUF);
 	}
 	return (0);
 }
 
 static void
 mfi_add_ld_complete(struct mfi_command *cm)
 {
 	struct mfi_frame_header *hdr;
 	struct mfi_ld_info *ld_info;
 	struct mfi_softc *sc;
 	device_t child;
 
 	sc = cm->cm_sc;
 	hdr = &cm->cm_frame->header;
 	ld_info = cm->cm_private;
 
 	if (sc->cm_map_abort || hdr->cmd_status != MFI_STAT_OK) {
 		free(ld_info, M_MFIBUF);
 		wakeup(&sc->mfi_map_sync_cm);
 		mfi_release_command(cm);
 		return;
 	}
 	wakeup(&sc->mfi_map_sync_cm);
 	mfi_release_command(cm);
 
 	mtx_unlock(&sc->mfi_io_lock);
 	mtx_lock(&Giant);
 	if ((child = device_add_child(sc->mfi_dev, "mfid", -1)) == NULL) {
 		device_printf(sc->mfi_dev, "Failed to add logical disk\n");
 		free(ld_info, M_MFIBUF);
 		mtx_unlock(&Giant);
 		mtx_lock(&sc->mfi_io_lock);
 		return;
 	}
 
 	device_set_ivars(child, ld_info);
 	device_set_desc(child, "MFI Logical Disk");
 	bus_generic_attach(sc->mfi_dev);
 	mtx_unlock(&Giant);
 	mtx_lock(&sc->mfi_io_lock);
 }
 
 static int mfi_add_sys_pd(struct mfi_softc *sc, int id)
 {
 	struct mfi_command *cm;
 	struct mfi_dcmd_frame *dcmd = NULL;
 	struct mfi_pd_info *pd_info = NULL;
 	struct mfi_system_pending *syspd_pend;
 	int error;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	syspd_pend = malloc(sizeof(*syspd_pend), M_MFIBUF, M_NOWAIT | M_ZERO);
 	if (syspd_pend != NULL) {
 		syspd_pend->pd_id = id;
 		TAILQ_INSERT_TAIL(&sc->mfi_syspd_pend_tqh, syspd_pend, pd_link);
 	}
 
 	error = mfi_dcmd_command(sc, &cm, MFI_DCMD_PD_GET_INFO,
 		(void **)&pd_info, sizeof(*pd_info));
 	if (error) {
 		device_printf(sc->mfi_dev,
 		    "Failed to allocated for MFI_DCMD_PD_GET_INFO %d\n",
 		    error);
 		if (pd_info)
 			free(pd_info, M_MFIBUF);
 		return (error);
 	}
 	cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_POLLED;
 	dcmd = &cm->cm_frame->dcmd;
 	dcmd->mbox[0]=id;
 	dcmd->header.scsi_status = 0;
 	dcmd->header.pad0 = 0;
 	if ((error = mfi_mapcmd(sc, cm)) != 0) {
 		device_printf(sc->mfi_dev,
 		    "Failed to get physical drive info %d\n", id);
 		free(pd_info, M_MFIBUF);
 		mfi_release_command(cm);
 		return (error);
 	}
 	bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
 	    BUS_DMASYNC_POSTREAD);
 	bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
 	mfi_add_sys_pd_complete(cm);
 	return (0);
 }
 
 static void
 mfi_add_sys_pd_complete(struct mfi_command *cm)
 {
 	struct mfi_frame_header *hdr;
 	struct mfi_pd_info *pd_info;
 	struct mfi_softc *sc;
 	device_t child;
 
 	sc = cm->cm_sc;
 	hdr = &cm->cm_frame->header;
 	pd_info = cm->cm_private;
 
 	if (hdr->cmd_status != MFI_STAT_OK) {
 		free(pd_info, M_MFIBUF);
 		mfi_release_command(cm);
 		return;
 	}
 	if (pd_info->fw_state != MFI_PD_STATE_SYSTEM) {
 		device_printf(sc->mfi_dev, "PD=%x is not SYSTEM PD\n",
 		    pd_info->ref.v.device_id);
 		free(pd_info, M_MFIBUF);
 		mfi_release_command(cm);
 		return;
 	}
 	mfi_release_command(cm);
 
 	mtx_unlock(&sc->mfi_io_lock);
 	mtx_lock(&Giant);
 	if ((child = device_add_child(sc->mfi_dev, "mfisyspd", -1)) == NULL) {
 		device_printf(sc->mfi_dev, "Failed to add system pd\n");
 		free(pd_info, M_MFIBUF);
 		mtx_unlock(&Giant);
 		mtx_lock(&sc->mfi_io_lock);
 		return;
 	}
 
 	device_set_ivars(child, pd_info);
 	device_set_desc(child, "MFI System PD");
 	bus_generic_attach(sc->mfi_dev);
 	mtx_unlock(&Giant);
 	mtx_lock(&sc->mfi_io_lock);
 }
 
 static struct mfi_command *
 mfi_bio_command(struct mfi_softc *sc)
 {
 	struct bio *bio;
 	struct mfi_command *cm = NULL;
 
 	/*reserving two commands to avoid starvation for IOCTL*/
 	if (sc->mfi_qstat[MFIQ_FREE].q_length < 2) {
 		return (NULL);
 	}
 	if ((bio = mfi_dequeue_bio(sc)) == NULL) {
 		return (NULL);
 	}
 	if ((uintptr_t)bio->bio_driver2 == MFI_LD_IO) {
 		cm = mfi_build_ldio(sc, bio);
 	} else if ((uintptr_t) bio->bio_driver2 == MFI_SYS_PD_IO) {
 		cm = mfi_build_syspdio(sc, bio);
 	}
 	if (!cm)
 	    mfi_enqueue_bio(sc, bio);
 	return cm;
 }
 
 /*
  * mostly copied from cam/scsi/scsi_all.c:scsi_read_write
  */
 
 int
 mfi_build_cdb(int readop, uint8_t byte2, u_int64_t lba, u_int32_t block_count, uint8_t *cdb)
 {
 	int cdb_len;
 
 	if (((lba & 0x1fffff) == lba)
          && ((block_count & 0xff) == block_count)
          && (byte2 == 0)) {
 		/* We can fit in a 6 byte cdb */
 		struct scsi_rw_6 *scsi_cmd;
 
 		scsi_cmd = (struct scsi_rw_6 *)cdb;
 		scsi_cmd->opcode = readop ? READ_6 : WRITE_6;
 		scsi_ulto3b(lba, scsi_cmd->addr);
 		scsi_cmd->length = block_count & 0xff;
 		scsi_cmd->control = 0;
 		cdb_len = sizeof(*scsi_cmd);
 	} else if (((block_count & 0xffff) == block_count) && ((lba & 0xffffffff) == lba)) {
 		/* Need a 10 byte CDB */
 		struct scsi_rw_10 *scsi_cmd;
 
 		scsi_cmd = (struct scsi_rw_10 *)cdb;
 		scsi_cmd->opcode = readop ? READ_10 : WRITE_10;
 		scsi_cmd->byte2 = byte2;
 		scsi_ulto4b(lba, scsi_cmd->addr);
 		scsi_cmd->reserved = 0;
 		scsi_ulto2b(block_count, scsi_cmd->length);
 		scsi_cmd->control = 0;
 		cdb_len = sizeof(*scsi_cmd);
 	} else if (((block_count & 0xffffffff) == block_count) &&
 	    ((lba & 0xffffffff) == lba)) {
 		/* Block count is too big for 10 byte CDB use a 12 byte CDB */
 		struct scsi_rw_12 *scsi_cmd;
 
 		scsi_cmd = (struct scsi_rw_12 *)cdb;
 		scsi_cmd->opcode = readop ? READ_12 : WRITE_12;
 		scsi_cmd->byte2 = byte2;
 		scsi_ulto4b(lba, scsi_cmd->addr);
 		scsi_cmd->reserved = 0;
 		scsi_ulto4b(block_count, scsi_cmd->length);
 		scsi_cmd->control = 0;
 		cdb_len = sizeof(*scsi_cmd);
 	} else {
 		/*
 		 * 16 byte CDB.  We'll only get here if the LBA is larger
 		 * than 2^32
 		 */
 		struct scsi_rw_16 *scsi_cmd;
 
 		scsi_cmd = (struct scsi_rw_16 *)cdb;
 		scsi_cmd->opcode = readop ? READ_16 : WRITE_16;
 		scsi_cmd->byte2 = byte2;
 		scsi_u64to8b(lba, scsi_cmd->addr);
 		scsi_cmd->reserved = 0;
 		scsi_ulto4b(block_count, scsi_cmd->length);
 		scsi_cmd->control = 0;
 		cdb_len = sizeof(*scsi_cmd);
 	}
 
 	return cdb_len;
 }
 
 extern char *unmapped_buf;
 
 static struct mfi_command *
 mfi_build_syspdio(struct mfi_softc *sc, struct bio *bio)
 {
 	struct mfi_command *cm;
 	struct mfi_pass_frame *pass;
 	uint32_t context = 0;
 	int flags = 0, blkcount = 0, readop;
 	uint8_t cdb_len;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	if ((cm = mfi_dequeue_free(sc)) == NULL)
 	    return (NULL);
 
 	/* Zero out the MFI frame */
 	context = cm->cm_frame->header.context;
 	bzero(cm->cm_frame, sizeof(union mfi_frame));
 	cm->cm_frame->header.context = context;
 	pass = &cm->cm_frame->pass;
 	bzero(pass->cdb, 16);
 	pass->header.cmd = MFI_CMD_PD_SCSI_IO;
 	switch (bio->bio_cmd) {
 	case BIO_READ:
 		flags = MFI_CMD_DATAIN | MFI_CMD_BIO;
 		readop = 1;
 		break;
 	case BIO_WRITE:
 		flags = MFI_CMD_DATAOUT | MFI_CMD_BIO;
 		readop = 0;
 		break;
 	default:
 		/* TODO: what about BIO_DELETE??? */
 		panic("Unsupported bio command %x\n", bio->bio_cmd);
 	}
 
 	/* Cheat with the sector length to avoid a non-constant division */
 	blkcount = howmany(bio->bio_bcount, MFI_SECTOR_LEN);
 	/* Fill the LBA and Transfer length in CDB */
 	cdb_len = mfi_build_cdb(readop, 0, bio->bio_pblkno, blkcount,
 	    pass->cdb);
 	pass->header.target_id = (uintptr_t)bio->bio_driver1;
 	pass->header.lun_id = 0;
 	pass->header.timeout = 0;
 	pass->header.flags = 0;
 	pass->header.scsi_status = 0;
 	pass->header.sense_len = MFI_SENSE_LEN;
 	pass->header.data_len = bio->bio_bcount;
 	pass->header.cdb_len = cdb_len;
 	pass->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr;
 	pass->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32);
 	cm->cm_complete = mfi_bio_complete;
 	cm->cm_private = bio;
 	cm->cm_data = unmapped_buf;
 	cm->cm_len = bio->bio_bcount;
 	cm->cm_sg = &pass->sgl;
 	cm->cm_total_frame_size = MFI_PASS_FRAME_SIZE;
 	cm->cm_flags = flags;
 
 	return (cm);
 }
 
 static struct mfi_command *
 mfi_build_ldio(struct mfi_softc *sc, struct bio *bio)
 {
 	struct mfi_io_frame *io;
 	struct mfi_command *cm;
 	int flags;
 	uint32_t blkcount;
 	uint32_t context = 0;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	if ((cm = mfi_dequeue_free(sc)) == NULL)
 	    return (NULL);
 
 	/* Zero out the MFI frame */
 	context = cm->cm_frame->header.context;
 	bzero(cm->cm_frame, sizeof(union mfi_frame));
 	cm->cm_frame->header.context = context;
 	io = &cm->cm_frame->io;
 	switch (bio->bio_cmd) {
 	case BIO_READ:
 		io->header.cmd = MFI_CMD_LD_READ;
 		flags = MFI_CMD_DATAIN | MFI_CMD_BIO;
 		break;
 	case BIO_WRITE:
 		io->header.cmd = MFI_CMD_LD_WRITE;
 		flags = MFI_CMD_DATAOUT | MFI_CMD_BIO;
 		break;
 	default:
 		/* TODO: what about BIO_DELETE??? */
 		panic("Unsupported bio command %x\n", bio->bio_cmd);
 	}
 
 	/* Cheat with the sector length to avoid a non-constant division */
 	blkcount = howmany(bio->bio_bcount, MFI_SECTOR_LEN);
 	io->header.target_id = (uintptr_t)bio->bio_driver1;
 	io->header.timeout = 0;
 	io->header.flags = 0;
 	io->header.scsi_status = 0;
 	io->header.sense_len = MFI_SENSE_LEN;
 	io->header.data_len = blkcount;
 	io->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr;
 	io->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32);
 	io->lba_hi = (bio->bio_pblkno & 0xffffffff00000000) >> 32;
 	io->lba_lo = bio->bio_pblkno & 0xffffffff;
 	cm->cm_complete = mfi_bio_complete;
 	cm->cm_private = bio;
 	cm->cm_data = unmapped_buf;
 	cm->cm_len = bio->bio_bcount;
 	cm->cm_sg = &io->sgl;
 	cm->cm_total_frame_size = MFI_IO_FRAME_SIZE;
 	cm->cm_flags = flags;
 
 	return (cm);
 }
 
 static void
 mfi_bio_complete(struct mfi_command *cm)
 {
 	struct bio *bio;
 	struct mfi_frame_header *hdr;
 	struct mfi_softc *sc;
 
 	bio = cm->cm_private;
 	hdr = &cm->cm_frame->header;
 	sc = cm->cm_sc;
 
 	if ((hdr->cmd_status != MFI_STAT_OK) || (hdr->scsi_status != 0)) {
 		bio->bio_flags |= BIO_ERROR;
 		bio->bio_error = EIO;
 		device_printf(sc->mfi_dev, "I/O error, cmd=%p, status=%#x, "
 		    "scsi_status=%#x\n", cm, hdr->cmd_status, hdr->scsi_status);
 		mfi_print_sense(cm->cm_sc, cm->cm_sense);
 	} else if (cm->cm_error != 0) {
 		bio->bio_flags |= BIO_ERROR;
 		bio->bio_error = cm->cm_error;
 		device_printf(sc->mfi_dev, "I/O error, cmd=%p, error=%#x\n",
 		    cm, cm->cm_error);
 	}
 
 	mfi_release_command(cm);
 	mfi_disk_complete(bio);
 }
 
 void
 mfi_startio(struct mfi_softc *sc)
 {
 	struct mfi_command *cm;
 	struct ccb_hdr *ccbh;
 
 	for (;;) {
 		/* Don't bother if we're short on resources */
 		if (sc->mfi_flags & MFI_FLAGS_QFRZN)
 			break;
 
 		/* Try a command that has already been prepared */
 		cm = mfi_dequeue_ready(sc);
 
 		if (cm == NULL) {
 			if ((ccbh = TAILQ_FIRST(&sc->mfi_cam_ccbq)) != NULL)
 				cm = sc->mfi_cam_start(ccbh);
 		}
 
 		/* Nope, so look for work on the bioq */
 		if (cm == NULL)
 			cm = mfi_bio_command(sc);
 
 		/* No work available, so exit */
 		if (cm == NULL)
 			break;
 
 		/* Send the command to the controller */
 		if (mfi_mapcmd(sc, cm) != 0) {
 			device_printf(sc->mfi_dev, "Failed to startio\n");
 			mfi_requeue_ready(cm);
 			break;
 		}
 	}
 }
 
 int
 mfi_mapcmd(struct mfi_softc *sc, struct mfi_command *cm)
 {
 	int error, polled;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	if ((cm->cm_data != NULL) && (cm->cm_frame->header.cmd != MFI_CMD_STP )) {
 		polled = (cm->cm_flags & MFI_CMD_POLLED) ? BUS_DMA_NOWAIT : 0;
 		if (cm->cm_flags & MFI_CMD_CCB)
 			error = bus_dmamap_load_ccb(sc->mfi_buffer_dmat,
 			    cm->cm_dmamap, cm->cm_data, mfi_data_cb, cm,
 			    polled);
 		else if (cm->cm_flags & MFI_CMD_BIO)
 			error = bus_dmamap_load_bio(sc->mfi_buffer_dmat,
 			    cm->cm_dmamap, cm->cm_private, mfi_data_cb, cm,
 			    polled);
 		else
 			error = bus_dmamap_load(sc->mfi_buffer_dmat,
 			    cm->cm_dmamap, cm->cm_data, cm->cm_len,
 			    mfi_data_cb, cm, polled);
 		if (error == EINPROGRESS) {
 			sc->mfi_flags |= MFI_FLAGS_QFRZN;
 			return (0);
 		}
 	} else {
 		error = mfi_send_frame(sc, cm);
 	}
 
 	return (error);
 }
 
 static void
 mfi_data_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
 {
 	struct mfi_frame_header *hdr;
 	struct mfi_command *cm;
 	union mfi_sgl *sgl;
 	struct mfi_softc *sc;
 	int i, j, first, dir;
 	int sge_size, locked;
 
 	cm = (struct mfi_command *)arg;
 	sc = cm->cm_sc;
 	hdr = &cm->cm_frame->header;
 	sgl = cm->cm_sg;
 
 	/*
 	 * We need to check if we have the lock as this is async
 	 * callback so even though our caller mfi_mapcmd asserts
 	 * it has the lock, there is no guarantee that hasn't been
 	 * dropped if bus_dmamap_load returned prior to our
 	 * completion.
 	 */
 	if ((locked = mtx_owned(&sc->mfi_io_lock)) == 0)
 		mtx_lock(&sc->mfi_io_lock);
 
 	if (error) {
 		printf("error %d in callback\n", error);
 		cm->cm_error = error;
 		mfi_complete(sc, cm);
 		goto out;
 	}
 	/* Use IEEE sgl only for IO's on a SKINNY controller
 	 * For other commands on a SKINNY controller use either
 	 * sg32 or sg64 based on the sizeof(bus_addr_t).
 	 * Also calculate the total frame size based on the type
 	 * of SGL used.
 	 */
 	if (((cm->cm_frame->header.cmd == MFI_CMD_PD_SCSI_IO) ||
 	    (cm->cm_frame->header.cmd == MFI_CMD_LD_READ) ||
 	    (cm->cm_frame->header.cmd == MFI_CMD_LD_WRITE)) &&
 	    (sc->mfi_flags & MFI_FLAGS_SKINNY)) {
 		for (i = 0; i < nsegs; i++) {
 			sgl->sg_skinny[i].addr = segs[i].ds_addr;
 			sgl->sg_skinny[i].len = segs[i].ds_len;
 			sgl->sg_skinny[i].flag = 0;
 		}
 		hdr->flags |= MFI_FRAME_IEEE_SGL | MFI_FRAME_SGL64;
 		sge_size = sizeof(struct mfi_sg_skinny);
 		hdr->sg_count = nsegs;
 	} else {
 		j = 0;
 		if (cm->cm_frame->header.cmd == MFI_CMD_STP) {
 			first = cm->cm_stp_len;
 			if ((sc->mfi_flags & MFI_FLAGS_SG64) == 0) {
 				sgl->sg32[j].addr = segs[0].ds_addr;
 				sgl->sg32[j++].len = first;
 			} else {
 				sgl->sg64[j].addr = segs[0].ds_addr;
 				sgl->sg64[j++].len = first;
 			}
 		} else
 			first = 0;
 		if ((sc->mfi_flags & MFI_FLAGS_SG64) == 0) {
 			for (i = 0; i < nsegs; i++) {
 				sgl->sg32[j].addr = segs[i].ds_addr + first;
 				sgl->sg32[j++].len = segs[i].ds_len - first;
 				first = 0;
 			}
 		} else {
 			for (i = 0; i < nsegs; i++) {
 				sgl->sg64[j].addr = segs[i].ds_addr + first;
 				sgl->sg64[j++].len = segs[i].ds_len - first;
 				first = 0;
 			}
 			hdr->flags |= MFI_FRAME_SGL64;
 		}
 		hdr->sg_count = j;
 		sge_size = sc->mfi_sge_size;
 	}
 
 	dir = 0;
 	if (cm->cm_flags & MFI_CMD_DATAIN) {
 		dir |= BUS_DMASYNC_PREREAD;
 		hdr->flags |= MFI_FRAME_DIR_READ;
 	}
 	if (cm->cm_flags & MFI_CMD_DATAOUT) {
 		dir |= BUS_DMASYNC_PREWRITE;
 		hdr->flags |= MFI_FRAME_DIR_WRITE;
 	}
 	bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, dir);
 	cm->cm_flags |= MFI_CMD_MAPPED;
 
 	/*
 	 * Instead of calculating the total number of frames in the
 	 * compound frame, it's already assumed that there will be at
 	 * least 1 frame, so don't compensate for the modulo of the
 	 * following division.
 	 */
 	cm->cm_total_frame_size += (sc->mfi_sge_size * nsegs);
 	cm->cm_extra_frames = (cm->cm_total_frame_size - 1) / MFI_FRAME_SIZE;
 
 	if ((error = mfi_send_frame(sc, cm)) != 0) {
 		printf("error %d in callback from mfi_send_frame\n", error);
 		cm->cm_error = error;
 		mfi_complete(sc, cm);
 		goto out;
 	}
 
 out:
 	/* leave the lock in the state we found it */
 	if (locked == 0)
 		mtx_unlock(&sc->mfi_io_lock);
 
 	return;
 }
 
 static int
 mfi_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
 {
 	int error;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	if (sc->MFA_enabled)
 		error = mfi_tbolt_send_frame(sc, cm);
 	else
 		error = mfi_std_send_frame(sc, cm);
 
 	if (error != 0 && (cm->cm_flags & MFI_ON_MFIQ_BUSY) != 0)
 		mfi_remove_busy(cm);
 
 	return (error);
 }
 
 static int
 mfi_std_send_frame(struct mfi_softc *sc, struct mfi_command *cm)
 {
 	struct mfi_frame_header *hdr;
 	int tm = mfi_polled_cmd_timeout * 1000;
 
 	hdr = &cm->cm_frame->header;
 
 	if ((cm->cm_flags & MFI_CMD_POLLED) == 0) {
 		cm->cm_timestamp = time_uptime;
 		mfi_enqueue_busy(cm);
 	} else {
 		hdr->cmd_status = MFI_STAT_INVALID_STATUS;
 		hdr->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE;
 	}
 
 	/*
 	 * The bus address of the command is aligned on a 64 byte boundary,
 	 * leaving the least 6 bits as zero.  For whatever reason, the
 	 * hardware wants the address shifted right by three, leaving just
 	 * 3 zero bits.  These three bits are then used as a prefetching
 	 * hint for the hardware to predict how many frames need to be
 	 * fetched across the bus.  If a command has more than 8 frames
 	 * then the 3 bits are set to 0x7 and the firmware uses other
 	 * information in the command to determine the total amount to fetch.
 	 * However, FreeBSD doesn't support I/O larger than 128K, so 8 frames
 	 * is enough for both 32bit and 64bit systems.
 	 */
 	if (cm->cm_extra_frames > 7)
 		cm->cm_extra_frames = 7;
 
 	sc->mfi_issue_cmd(sc, cm->cm_frame_busaddr, cm->cm_extra_frames);
 
 	if ((cm->cm_flags & MFI_CMD_POLLED) == 0)
 		return (0);
 
 	/* This is a polled command, so busy-wait for it to complete. */
 	while (hdr->cmd_status == MFI_STAT_INVALID_STATUS) {
 		DELAY(1000);
 		tm -= 1;
 		if (tm <= 0)
 			break;
 	}
 
 	if (hdr->cmd_status == MFI_STAT_INVALID_STATUS) {
 		device_printf(sc->mfi_dev, "Frame %p timed out "
 		    "command 0x%X\n", hdr, cm->cm_frame->dcmd.opcode);
 		return (ETIMEDOUT);
 	}
 
 	return (0);
 }
 
 
 void
 mfi_complete(struct mfi_softc *sc, struct mfi_command *cm)
 {
 	int dir;
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 
 	if ((cm->cm_flags & MFI_CMD_MAPPED) != 0) {
 		dir = 0;
 		if ((cm->cm_flags & MFI_CMD_DATAIN) ||
 		    (cm->cm_frame->header.cmd == MFI_CMD_STP))
 			dir |= BUS_DMASYNC_POSTREAD;
 		if (cm->cm_flags & MFI_CMD_DATAOUT)
 			dir |= BUS_DMASYNC_POSTWRITE;
 
 		bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap, dir);
 		bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
 		cm->cm_flags &= ~MFI_CMD_MAPPED;
 	}
 
 	cm->cm_flags |= MFI_CMD_COMPLETED;
 
 	if (cm->cm_complete != NULL)
 		cm->cm_complete(cm);
 	else
 		wakeup(cm);
 }
 
 static int
 mfi_abort(struct mfi_softc *sc, struct mfi_command **cm_abort)
 {
 	struct mfi_command *cm;
 	struct mfi_abort_frame *abort;
 	int i = 0, error;
 	uint32_t context = 0;
 
 	mtx_lock(&sc->mfi_io_lock);
 	if ((cm = mfi_dequeue_free(sc)) == NULL) {
 		mtx_unlock(&sc->mfi_io_lock);
 		return (EBUSY);
 	}
 
 	/* Zero out the MFI frame */
 	context = cm->cm_frame->header.context;
 	bzero(cm->cm_frame, sizeof(union mfi_frame));
 	cm->cm_frame->header.context = context;
 
 	abort = &cm->cm_frame->abort;
 	abort->header.cmd = MFI_CMD_ABORT;
 	abort->header.flags = 0;
 	abort->header.scsi_status = 0;
 	abort->abort_context = (*cm_abort)->cm_frame->header.context;
 	abort->abort_mfi_addr_lo = (uint32_t)(*cm_abort)->cm_frame_busaddr;
 	abort->abort_mfi_addr_hi =
 		(uint32_t)((uint64_t)(*cm_abort)->cm_frame_busaddr >> 32);
 	cm->cm_data = NULL;
 	cm->cm_flags = MFI_CMD_POLLED;
 
 	if ((error = mfi_mapcmd(sc, cm)) != 0)
 		device_printf(sc->mfi_dev, "failed to abort command\n");
 	mfi_release_command(cm);
 
 	mtx_unlock(&sc->mfi_io_lock);
 	while (i < 5 && *cm_abort != NULL) {
 		tsleep(cm_abort, 0, "mfiabort",
 		    5 * hz);
 		i++;
 	}
 	if (*cm_abort != NULL) {
 		/* Force a complete if command didn't abort */
 		mtx_lock(&sc->mfi_io_lock);
 		(*cm_abort)->cm_complete(*cm_abort);
 		mtx_unlock(&sc->mfi_io_lock);
 	}
 
 	return (error);
 }
 
 int
 mfi_dump_blocks(struct mfi_softc *sc, int id, uint64_t lba, void *virt,
      int len)
 {
 	struct mfi_command *cm;
 	struct mfi_io_frame *io;
 	int error;
 	uint32_t context = 0;
 
 	if ((cm = mfi_dequeue_free(sc)) == NULL)
 		return (EBUSY);
 
 	/* Zero out the MFI frame */
 	context = cm->cm_frame->header.context;
 	bzero(cm->cm_frame, sizeof(union mfi_frame));
 	cm->cm_frame->header.context = context;
 
 	io = &cm->cm_frame->io;
 	io->header.cmd = MFI_CMD_LD_WRITE;
 	io->header.target_id = id;
 	io->header.timeout = 0;
 	io->header.flags = 0;
 	io->header.scsi_status = 0;
 	io->header.sense_len = MFI_SENSE_LEN;
 	io->header.data_len = howmany(len, MFI_SECTOR_LEN);
 	io->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr;
 	io->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32);
 	io->lba_hi = (lba & 0xffffffff00000000) >> 32;
 	io->lba_lo = lba & 0xffffffff;
 	cm->cm_data = virt;
 	cm->cm_len = len;
 	cm->cm_sg = &io->sgl;
 	cm->cm_total_frame_size = MFI_IO_FRAME_SIZE;
 	cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAOUT;
 
 	if ((error = mfi_mapcmd(sc, cm)) != 0)
 		device_printf(sc->mfi_dev, "failed dump blocks\n");
 	bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
 	    BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
 	mfi_release_command(cm);
 
 	return (error);
 }
 
 int
 mfi_dump_syspd_blocks(struct mfi_softc *sc, int id, uint64_t lba, void *virt,
     int len)
 {
 	struct mfi_command *cm;
 	struct mfi_pass_frame *pass;
 	int error, readop, cdb_len;
 	uint32_t blkcount;
 
 	if ((cm = mfi_dequeue_free(sc)) == NULL)
 		return (EBUSY);
 
 	pass = &cm->cm_frame->pass;
 	bzero(pass->cdb, 16);
 	pass->header.cmd = MFI_CMD_PD_SCSI_IO;
 
 	readop = 0;
 	blkcount = howmany(len, MFI_SECTOR_LEN);
 	cdb_len = mfi_build_cdb(readop, 0, lba, blkcount, pass->cdb);
 	pass->header.target_id = id;
 	pass->header.timeout = 0;
 	pass->header.flags = 0;
 	pass->header.scsi_status = 0;
 	pass->header.sense_len = MFI_SENSE_LEN;
 	pass->header.data_len = len;
 	pass->header.cdb_len = cdb_len;
 	pass->sense_addr_lo = (uint32_t)cm->cm_sense_busaddr;
 	pass->sense_addr_hi = (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32);
 	cm->cm_data = virt;
 	cm->cm_len = len;
 	cm->cm_sg = &pass->sgl;
 	cm->cm_total_frame_size = MFI_PASS_FRAME_SIZE;
 	cm->cm_flags = MFI_CMD_POLLED | MFI_CMD_DATAOUT | MFI_CMD_SCSI;
 
 	if ((error = mfi_mapcmd(sc, cm)) != 0)
 		device_printf(sc->mfi_dev, "failed dump blocks\n");
 	bus_dmamap_sync(sc->mfi_buffer_dmat, cm->cm_dmamap,
 	    BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_unload(sc->mfi_buffer_dmat, cm->cm_dmamap);
 	mfi_release_command(cm);
 
 	return (error);
 }
 
 static int
 mfi_open(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 	struct mfi_softc *sc;
 	int error;
 
 	sc = dev->si_drv1;
 
 	mtx_lock(&sc->mfi_io_lock);
 	if (sc->mfi_detaching)
 		error = ENXIO;
 	else {
 		sc->mfi_flags |= MFI_FLAGS_OPEN;
 		error = 0;
 	}
 	mtx_unlock(&sc->mfi_io_lock);
 
 	return (error);
 }
 
 static int
 mfi_close(struct cdev *dev, int flags, int fmt, struct thread *td)
 {
 	struct mfi_softc *sc;
 	struct mfi_aen *mfi_aen_entry, *tmp;
 
 	sc = dev->si_drv1;
 
 	mtx_lock(&sc->mfi_io_lock);
 	sc->mfi_flags &= ~MFI_FLAGS_OPEN;
 
 	TAILQ_FOREACH_SAFE(mfi_aen_entry, &sc->mfi_aen_pids, aen_link, tmp) {
 		if (mfi_aen_entry->p == curproc) {
 			TAILQ_REMOVE(&sc->mfi_aen_pids, mfi_aen_entry,
 			    aen_link);
 			free(mfi_aen_entry, M_MFIBUF);
 		}
 	}
 	mtx_unlock(&sc->mfi_io_lock);
 	return (0);
 }
 
 static int
 mfi_config_lock(struct mfi_softc *sc, uint32_t opcode)
 {
 
 	switch (opcode) {
 	case MFI_DCMD_LD_DELETE:
 	case MFI_DCMD_CFG_ADD:
 	case MFI_DCMD_CFG_CLEAR:
 	case MFI_DCMD_CFG_FOREIGN_IMPORT:
 		sx_xlock(&sc->mfi_config_lock);
 		return (1);
 	default:
 		return (0);
 	}
 }
 
 static void
 mfi_config_unlock(struct mfi_softc *sc, int locked)
 {
 
 	if (locked)
 		sx_xunlock(&sc->mfi_config_lock);
 }
 
 /*
  * Perform pre-issue checks on commands from userland and possibly veto
  * them.
  */
 static int
 mfi_check_command_pre(struct mfi_softc *sc, struct mfi_command *cm)
 {
 	struct mfi_disk *ld, *ld2;
 	int error;
 	struct mfi_system_pd *syspd = NULL;
 	uint16_t syspd_id;
 	uint16_t *mbox;
 
 	mtx_assert(&sc->mfi_io_lock, MA_OWNED);
 	error = 0;
 	switch (cm->cm_frame->dcmd.opcode) {
 	case MFI_DCMD_LD_DELETE:
 		TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) {
 			if (ld->ld_id == cm->cm_frame->dcmd.mbox[0])
 				break;
 		}
 		if (ld == NULL)
 			error = ENOENT;
 		else
 			error = mfi_disk_disable(ld);
 		break;
 	case MFI_DCMD_CFG_CLEAR:
 		TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) {
 			error = mfi_disk_disable(ld);
 			if (error)
 				break;
 		}
 		if (error) {
 			TAILQ_FOREACH(ld2, &sc->mfi_ld_tqh, ld_link) {
 				if (ld2 == ld)
 					break;
 				mfi_disk_enable(ld2);
 			}
 		}
 		break;
 	case MFI_DCMD_PD_STATE_SET:
 		mbox = (uint16_t *) cm->cm_frame->dcmd.mbox;
 		syspd_id = mbox[0];
 		if (mbox[2] == MFI_PD_STATE_UNCONFIGURED_GOOD) {
 			TAILQ_FOREACH(syspd, &sc->mfi_syspd_tqh, pd_link) {
 				if (syspd->pd_id == syspd_id)
 					break;
 			}
 		}
 		else
 			break;
 		if (syspd)
 			error = mfi_syspd_disable(syspd);
 		break;
 	default:
 		break;
 	}
 	return (error);
 }
 
 /* Perform post-issue checks on commands from userland. */
 static void
 mfi_check_command_post(struct mfi_softc *sc, struct mfi_command *cm)
 {
 	struct mfi_disk *ld, *ldn;
 	struct mfi_system_pd *syspd = NULL;
 	uint16_t syspd_id;
 	uint16_t *mbox;
 
 	switch (cm->cm_frame->dcmd.opcode) {
 	case MFI_DCMD_LD_DELETE:
 		TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) {
 			if (ld->ld_id == cm->cm_frame->dcmd.mbox[0])
 				break;
 		}
 		KASSERT(ld != NULL, ("volume dissappeared"));
 		if (cm->cm_frame->header.cmd_status == MFI_STAT_OK) {
 			mtx_unlock(&sc->mfi_io_lock);
 			mtx_lock(&Giant);
 			device_delete_child(sc->mfi_dev, ld->ld_dev);
 			mtx_unlock(&Giant);
 			mtx_lock(&sc->mfi_io_lock);
 		} else
 			mfi_disk_enable(ld);
 		break;
 	case MFI_DCMD_CFG_CLEAR:
 		if (cm->cm_frame->header.cmd_status == MFI_STAT_OK) {
 			mtx_unlock(&sc->mfi_io_lock);
 			mtx_lock(&Giant);
 			TAILQ_FOREACH_SAFE(ld, &sc->mfi_ld_tqh, ld_link, ldn) {
 				device_delete_child(sc->mfi_dev, ld->ld_dev);
 			}
 			mtx_unlock(&Giant);
 			mtx_lock(&sc->mfi_io_lock);
 		} else {
 			TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link)
 				mfi_disk_enable(ld);
 		}
 		break;
 	case MFI_DCMD_CFG_ADD:
 		mfi_ldprobe(sc);
 		break;
 	case MFI_DCMD_CFG_FOREIGN_IMPORT:
 		mfi_ldprobe(sc);
 		break;
 	case MFI_DCMD_PD_STATE_SET:
 		mbox = (uint16_t *) cm->cm_frame->dcmd.mbox;
 		syspd_id = mbox[0];
 		if (mbox[2] == MFI_PD_STATE_UNCONFIGURED_GOOD) {
 			TAILQ_FOREACH(syspd, &sc->mfi_syspd_tqh,pd_link) {
 				if (syspd->pd_id == syspd_id)
 					break;
 			}
 		}
 		else
 			break;
 		/* If the transition fails then enable the syspd again */
 		if (syspd && cm->cm_frame->header.cmd_status != MFI_STAT_OK)
 			mfi_syspd_enable(syspd);
 		break;
 	}
 }
 
 static int
 mfi_check_for_sscd(struct mfi_softc *sc, struct mfi_command *cm)
 {
 	struct mfi_config_data *conf_data;
 	struct mfi_command *ld_cm = NULL;
 	struct mfi_ld_info *ld_info = NULL;
 	struct mfi_ld_config *ld;
 	char *p;
 	int error = 0;
 
 	conf_data = (struct mfi_config_data *)cm->cm_data;
 
 	if (cm->cm_frame->dcmd.opcode == MFI_DCMD_CFG_ADD) {
 		p = (char *)conf_data->array;
 		p += conf_data->array_size * conf_data->array_count;
 		ld = (struct mfi_ld_config *)p;
 		if (ld->params.isSSCD == 1)
 			error = 1;
 	} else if (cm->cm_frame->dcmd.opcode == MFI_DCMD_LD_DELETE) {
 		error = mfi_dcmd_command (sc, &ld_cm, MFI_DCMD_LD_GET_INFO,
 		    (void **)&ld_info, sizeof(*ld_info));
 		if (error) {
 			device_printf(sc->mfi_dev, "Failed to allocate"
 			    "MFI_DCMD_LD_GET_INFO %d", error);
 			if (ld_info)
 				free(ld_info, M_MFIBUF);
 			return 0;
 		}
 		ld_cm->cm_flags = MFI_CMD_DATAIN;
 		ld_cm->cm_frame->dcmd.mbox[0]= cm->cm_frame->dcmd.mbox[0];
 		ld_cm->cm_frame->header.target_id = cm->cm_frame->dcmd.mbox[0];
 		if (mfi_wait_command(sc, ld_cm) != 0) {
 			device_printf(sc->mfi_dev, "failed to get log drv\n");
 			mfi_release_command(ld_cm);
 			free(ld_info, M_MFIBUF);
 			return 0;
 		}
 
 		if (ld_cm->cm_frame->header.cmd_status != MFI_STAT_OK) {
 			free(ld_info, M_MFIBUF);
 			mfi_release_command(ld_cm);
 			return 0;
 		}
 		else
 			ld_info = (struct mfi_ld_info *)ld_cm->cm_private;
 
 		if (ld_info->ld_config.params.isSSCD == 1)
 			error = 1;
 
 		mfi_release_command(ld_cm);
 		free(ld_info, M_MFIBUF);
 
 	}
 	return error;
 }
 
 static int
 mfi_stp_cmd(struct mfi_softc *sc, struct mfi_command *cm,caddr_t arg)
 {
 	uint8_t i;
 	struct mfi_ioc_packet *ioc;
 	ioc = (struct mfi_ioc_packet *)arg;
 	int sge_size, error;
 	struct megasas_sge *kern_sge;
 
 	memset(sc->kbuff_arr, 0, sizeof(sc->kbuff_arr));
 	kern_sge =(struct megasas_sge *) ((uintptr_t)cm->cm_frame + ioc->mfi_sgl_off);
 	cm->cm_frame->header.sg_count = ioc->mfi_sge_count;
 
 	if (sizeof(bus_addr_t) == 8) {
 		cm->cm_frame->header.flags |= MFI_FRAME_SGL64;
 		cm->cm_extra_frames = 2;
 		sge_size = sizeof(struct mfi_sg64);
 	} else {
 		cm->cm_extra_frames =  (cm->cm_total_frame_size - 1) / MFI_FRAME_SIZE;
 		sge_size = sizeof(struct mfi_sg32);
 	}
 
 	cm->cm_total_frame_size += (sge_size * ioc->mfi_sge_count);
 	for (i = 0; i < ioc->mfi_sge_count; i++) {
 			if (bus_dma_tag_create( sc->mfi_parent_dmat,	/* parent */
 			1, 0,			/* algnmnt, boundary */
 			BUS_SPACE_MAXADDR_32BIT,/* lowaddr */
 			BUS_SPACE_MAXADDR,	/* highaddr */
 			NULL, NULL,		/* filter, filterarg */
 			ioc->mfi_sgl[i].iov_len,/* maxsize */
 			2,			/* nsegments */
 			ioc->mfi_sgl[i].iov_len,/* maxsegsize */
 			BUS_DMA_ALLOCNOW,	/* flags */
 			NULL, NULL,		/* lockfunc, lockarg */
 			&sc->mfi_kbuff_arr_dmat[i])) {
 			device_printf(sc->mfi_dev,
 			    "Cannot allocate mfi_kbuff_arr_dmat tag\n");
 			return (ENOMEM);
 		}
 
 		if (bus_dmamem_alloc(sc->mfi_kbuff_arr_dmat[i],
 		    (void **)&sc->kbuff_arr[i], BUS_DMA_NOWAIT,
 		    &sc->mfi_kbuff_arr_dmamap[i])) {
 			device_printf(sc->mfi_dev,
 			    "Cannot allocate mfi_kbuff_arr_dmamap memory\n");
 			return (ENOMEM);
 		}
 
 		bus_dmamap_load(sc->mfi_kbuff_arr_dmat[i],
 		    sc->mfi_kbuff_arr_dmamap[i], sc->kbuff_arr[i],
 		    ioc->mfi_sgl[i].iov_len, mfi_addr_cb,
 		    &sc->mfi_kbuff_arr_busaddr[i], 0);
 
 		if (!sc->kbuff_arr[i]) {
 			device_printf(sc->mfi_dev,
 			    "Could not allocate memory for kbuff_arr info\n");
 			return -1;
 		}
 		kern_sge[i].phys_addr = sc->mfi_kbuff_arr_busaddr[i];
 		kern_sge[i].length = ioc->mfi_sgl[i].iov_len;
 
 		if (sizeof(bus_addr_t) == 8) {
 			cm->cm_frame->stp.sgl.sg64[i].addr =
 			    kern_sge[i].phys_addr;
 			cm->cm_frame->stp.sgl.sg64[i].len =
 			    ioc->mfi_sgl[i].iov_len;
 		} else {
 			cm->cm_frame->stp.sgl.sg32[i].addr =
 			    kern_sge[i].phys_addr;
 			cm->cm_frame->stp.sgl.sg32[i].len =
 			    ioc->mfi_sgl[i].iov_len;
 		}
 
 		error = copyin(ioc->mfi_sgl[i].iov_base,
 		    sc->kbuff_arr[i],
 		    ioc->mfi_sgl[i].iov_len);
 		if (error != 0) {
 			device_printf(sc->mfi_dev, "Copy in failed\n");
 			return error;
 		}
 	}
 
 	cm->cm_flags |=MFI_CMD_MAPPED;
 	return 0;
 }
 
 static int
 mfi_user_command(struct mfi_softc *sc, struct mfi_ioc_passthru *ioc)
 {
 	struct mfi_command *cm;
 	struct mfi_dcmd_frame *dcmd;
 	void *ioc_buf = NULL;
 	uint32_t context;
 	int error = 0, locked;
 
 
 	if (ioc->buf_size > 0) {
 		if (ioc->buf_size > 1024 * 1024)
 			return (ENOMEM);
 		ioc_buf = malloc(ioc->buf_size, M_MFIBUF, M_WAITOK);
 		error = copyin(ioc->buf, ioc_buf, ioc->buf_size);
 		if (error) {
 			device_printf(sc->mfi_dev, "failed to copyin\n");
 			free(ioc_buf, M_MFIBUF);
 			return (error);
 		}
 	}
 
 	locked = mfi_config_lock(sc, ioc->ioc_frame.opcode);
 
 	mtx_lock(&sc->mfi_io_lock);
 	while ((cm = mfi_dequeue_free(sc)) == NULL)
 		msleep(mfi_user_command, &sc->mfi_io_lock, 0, "mfiioc", hz);
 
 	/* Save context for later */
 	context = cm->cm_frame->header.context;
 
 	dcmd = &cm->cm_frame->dcmd;
 	bcopy(&ioc->ioc_frame, dcmd, sizeof(struct mfi_dcmd_frame));
 
 	cm->cm_sg = &dcmd->sgl;
 	cm->cm_total_frame_size = MFI_DCMD_FRAME_SIZE;
 	cm->cm_data = ioc_buf;
 	cm->cm_len = ioc->buf_size;
 
 	/* restore context */
 	cm->cm_frame->header.context = context;
 
 	/* Cheat since we don't know if we're writing or reading */
 	cm->cm_flags = MFI_CMD_DATAIN | MFI_CMD_DATAOUT;
 
 	error = mfi_check_command_pre(sc, cm);
 	if (error)
 		goto out;
 
 	error = mfi_wait_command(sc, cm);
 	if (error) {
 		device_printf(sc->mfi_dev, "ioctl failed %d\n", error);
 		goto out;
 	}
 	bcopy(dcmd, &ioc->ioc_frame, sizeof(struct mfi_dcmd_frame));
 	mfi_check_command_post(sc, cm);
 out:
 	mfi_release_command(cm);
 	mtx_unlock(&sc->mfi_io_lock);
 	mfi_config_unlock(sc, locked);
 	if (ioc->buf_size > 0)
 		error = copyout(ioc_buf, ioc->buf, ioc->buf_size);
 	if (ioc_buf)
 		free(ioc_buf, M_MFIBUF);
 	return (error);
 }
 
 #define	PTRIN(p)		((void *)(uintptr_t)(p))
 
 static int
 mfi_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag, struct thread *td)
 {
 	struct mfi_softc *sc;
 	union mfi_statrequest *ms;
 	struct mfi_ioc_packet *ioc;
 #ifdef COMPAT_FREEBSD32
 	struct mfi_ioc_packet32 *ioc32;
 #endif
 	struct mfi_ioc_aen *aen;
 	struct mfi_command *cm = NULL;
 	uint32_t context = 0;
 	union mfi_sense_ptr sense_ptr;
 	uint8_t *data = NULL, *temp, *addr, skip_pre_post = 0;
 	size_t len;
 	int i, res;
 	struct mfi_ioc_passthru *iop = (struct mfi_ioc_passthru *)arg;
 #ifdef COMPAT_FREEBSD32
 	struct mfi_ioc_passthru32 *iop32 = (struct mfi_ioc_passthru32 *)arg;
 	struct mfi_ioc_passthru iop_swab;
 #endif
 	int error, locked;
 	union mfi_sgl *sgl;
 	sc = dev->si_drv1;
 	error = 0;
 
 	if (sc->adpreset)
 		return EBUSY;
 
 	if (sc->hw_crit_error)
 		return EBUSY;
 
 	if (sc->issuepend_done == 0)
 		return EBUSY;
 
 	switch (cmd) {
 	case MFIIO_STATS:
 		ms = (union mfi_statrequest *)arg;
 		switch (ms->ms_item) {
 		case MFIQ_FREE:
 		case MFIQ_BIO:
 		case MFIQ_READY:
 		case MFIQ_BUSY:
 			bcopy(&sc->mfi_qstat[ms->ms_item], &ms->ms_qstat,
 			    sizeof(struct mfi_qstat));
 			break;
 		default:
 			error = ENOIOCTL;
 			break;
 		}
 		break;
 	case MFIIO_QUERY_DISK:
 	{
 		struct mfi_query_disk *qd;
 		struct mfi_disk *ld;
 
 		qd = (struct mfi_query_disk *)arg;
 		mtx_lock(&sc->mfi_io_lock);
 		TAILQ_FOREACH(ld, &sc->mfi_ld_tqh, ld_link) {
 			if (ld->ld_id == qd->array_id)
 				break;
 		}
 		if (ld == NULL) {
 			qd->present = 0;
 			mtx_unlock(&sc->mfi_io_lock);
 			return (0);
 		}
 		qd->present = 1;
 		if (ld->ld_flags & MFI_DISK_FLAGS_OPEN)
 			qd->open = 1;
 		bzero(qd->devname, SPECNAMELEN + 1);
 		snprintf(qd->devname, SPECNAMELEN, "mfid%d", ld->ld_unit);
 		mtx_unlock(&sc->mfi_io_lock);
 		break;
 	}
 	case MFI_CMD:
 #ifdef COMPAT_FREEBSD32
 	case MFI_CMD32:
 #endif
 		{
 		devclass_t devclass;
 		ioc = (struct mfi_ioc_packet *)arg;
 		int adapter;
 
 		adapter = ioc->mfi_adapter_no;
 		if (device_get_unit(sc->mfi_dev) == 0 && adapter != 0) {
 			devclass = devclass_find("mfi");
 			sc = devclass_get_softc(devclass, adapter);
 		}
 		mtx_lock(&sc->mfi_io_lock);
 		if ((cm = mfi_dequeue_free(sc)) == NULL) {
 			mtx_unlock(&sc->mfi_io_lock);
 			return (EBUSY);
 		}
 		mtx_unlock(&sc->mfi_io_lock);
 		locked = 0;
 
 		/*
 		 * save off original context since copying from user
 		 * will clobber some data
 		 */
 		context = cm->cm_frame->header.context;
 		cm->cm_frame->header.context = cm->cm_index;
 
 		bcopy(ioc->mfi_frame.raw, cm->cm_frame,
 		    2 * MEGAMFI_FRAME_SIZE);
 		cm->cm_total_frame_size = (sizeof(union mfi_sgl)
 		    * ioc->mfi_sge_count) + ioc->mfi_sgl_off;
 		cm->cm_frame->header.scsi_status = 0;
 		cm->cm_frame->header.pad0 = 0;
 		if (ioc->mfi_sge_count) {
 			cm->cm_sg =
 			    (union mfi_sgl *)&cm->cm_frame->bytes[ioc->mfi_sgl_off];
 		}
 		sgl = cm->cm_sg;
 		cm->cm_flags = 0;
 		if (cm->cm_frame->header.flags & MFI_FRAME_DATAIN)
 			cm->cm_flags |= MFI_CMD_DATAIN;
 		if (cm->cm_frame->header.flags & MFI_FRAME_DATAOUT)
 			cm->cm_flags |= MFI_CMD_DATAOUT;
 		/* Legacy app shim */
 		if (cm->cm_flags == 0)
 			cm->cm_flags |= MFI_CMD_DATAIN | MFI_CMD_DATAOUT;
 		cm->cm_len = cm->cm_frame->header.data_len;
 		if (cm->cm_frame->header.cmd == MFI_CMD_STP) {
 #ifdef COMPAT_FREEBSD32
 			if (cmd == MFI_CMD) {
 #endif
 				/* Native */
 				cm->cm_stp_len = ioc->mfi_sgl[0].iov_len;
 #ifdef COMPAT_FREEBSD32
 			} else {
 				/* 32bit on 64bit */
 				ioc32 = (struct mfi_ioc_packet32 *)ioc;
 				cm->cm_stp_len = ioc32->mfi_sgl[0].iov_len;
 			}
 #endif
 			cm->cm_len += cm->cm_stp_len;
 		}
 		if (cm->cm_len &&
 		    (cm->cm_flags & (MFI_CMD_DATAIN | MFI_CMD_DATAOUT))) {
 			cm->cm_data = data = malloc(cm->cm_len, M_MFIBUF,
 			    M_WAITOK | M_ZERO);
 		} else {
 			cm->cm_data = 0;
 		}
 
 		/* restore header context */
 		cm->cm_frame->header.context = context;
 
 		if (cm->cm_frame->header.cmd == MFI_CMD_STP) {
 			res = mfi_stp_cmd(sc, cm, arg);
 			if (res != 0)
 				goto out;
 		} else {
 			temp = data;
 			if ((cm->cm_flags & MFI_CMD_DATAOUT) ||
 			    (cm->cm_frame->header.cmd == MFI_CMD_STP)) {
 				for (i = 0; i < ioc->mfi_sge_count; i++) {
 #ifdef COMPAT_FREEBSD32
 					if (cmd == MFI_CMD) {
 #endif
 						/* Native */
 						addr = ioc->mfi_sgl[i].iov_base;
 						len = ioc->mfi_sgl[i].iov_len;
 #ifdef COMPAT_FREEBSD32
 					} else {
 						/* 32bit on 64bit */
 						ioc32 = (struct mfi_ioc_packet32 *)ioc;
 						addr = PTRIN(ioc32->mfi_sgl[i].iov_base);
 						len = ioc32->mfi_sgl[i].iov_len;
 					}
 #endif
 					error = copyin(addr, temp, len);
 					if (error != 0) {
 						device_printf(sc->mfi_dev,
 						    "Copy in failed\n");
 						goto out;
 					}
 					temp = &temp[len];
 				}
 			}
 		}
 
 		if (cm->cm_frame->header.cmd == MFI_CMD_DCMD)
 			locked = mfi_config_lock(sc,
 			     cm->cm_frame->dcmd.opcode);
 
 		if (cm->cm_frame->header.cmd == MFI_CMD_PD_SCSI_IO) {
 			cm->cm_frame->pass.sense_addr_lo =
 			    (uint32_t)cm->cm_sense_busaddr;
 			cm->cm_frame->pass.sense_addr_hi =
 			    (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32);
 		}
 		mtx_lock(&sc->mfi_io_lock);
 		skip_pre_post = mfi_check_for_sscd (sc, cm);
 		if (!skip_pre_post) {
 			error = mfi_check_command_pre(sc, cm);
 			if (error) {
 				mtx_unlock(&sc->mfi_io_lock);
 				goto out;
 			}
 		}
 		if ((error = mfi_wait_command(sc, cm)) != 0) {
 			device_printf(sc->mfi_dev,
 			    "Controller polled failed\n");
 			mtx_unlock(&sc->mfi_io_lock);
 			goto out;
 		}
 		if (!skip_pre_post) {
 			mfi_check_command_post(sc, cm);
 		}
 		mtx_unlock(&sc->mfi_io_lock);
 
 		if (cm->cm_frame->header.cmd != MFI_CMD_STP) {
 			temp = data;
 			if ((cm->cm_flags & MFI_CMD_DATAIN) ||
 			    (cm->cm_frame->header.cmd == MFI_CMD_STP)) {
 				for (i = 0; i < ioc->mfi_sge_count; i++) {
 #ifdef COMPAT_FREEBSD32
 					if (cmd == MFI_CMD) {
 #endif
 						/* Native */
 						addr = ioc->mfi_sgl[i].iov_base;
 						len = ioc->mfi_sgl[i].iov_len;
 #ifdef COMPAT_FREEBSD32
 					} else {
 						/* 32bit on 64bit */
 						ioc32 = (struct mfi_ioc_packet32 *)ioc;
 						addr = PTRIN(ioc32->mfi_sgl[i].iov_base);
 						len = ioc32->mfi_sgl[i].iov_len;
 					}
 #endif
 					error = copyout(temp, addr, len);
 					if (error != 0) {
 						device_printf(sc->mfi_dev,
 						    "Copy out failed\n");
 						goto out;
 					}
 					temp = &temp[len];
 				}
 			}
 		}
 
 		if (ioc->mfi_sense_len) {
 			/* get user-space sense ptr then copy out sense */
 			bcopy(&ioc->mfi_frame.raw[ioc->mfi_sense_off],
 			    &sense_ptr.sense_ptr_data[0],
 			    sizeof(sense_ptr.sense_ptr_data));
 #ifdef COMPAT_FREEBSD32
 			if (cmd != MFI_CMD) {
 				/*
 				 * not 64bit native so zero out any address
 				 * over 32bit */
 				sense_ptr.addr.high = 0;
 			}
 #endif
 			error = copyout(cm->cm_sense, sense_ptr.user_space,
 			    ioc->mfi_sense_len);
 			if (error != 0) {
 				device_printf(sc->mfi_dev,
 				    "Copy out failed\n");
 				goto out;
 			}
 		}
 
 		ioc->mfi_frame.hdr.cmd_status = cm->cm_frame->header.cmd_status;
 out:
 		mfi_config_unlock(sc, locked);
 		if (data)
 			free(data, M_MFIBUF);
 		if (cm->cm_frame->header.cmd == MFI_CMD_STP) {
 			for (i = 0; i < 2; i++) {
 				if (sc->kbuff_arr[i]) {
-					if (sc->mfi_kbuff_arr_busaddr != 0)
+					if (sc->mfi_kbuff_arr_busaddr[i] != 0)
 						bus_dmamap_unload(
 						    sc->mfi_kbuff_arr_dmat[i],
 						    sc->mfi_kbuff_arr_dmamap[i]
 						    );
 					if (sc->kbuff_arr[i] != NULL)
 						bus_dmamem_free(
 						    sc->mfi_kbuff_arr_dmat[i],
 						    sc->kbuff_arr[i],
 						    sc->mfi_kbuff_arr_dmamap[i]
 						    );
 					if (sc->mfi_kbuff_arr_dmat[i] != NULL)
 						bus_dma_tag_destroy(
 						    sc->mfi_kbuff_arr_dmat[i]);
 				}
 			}
 		}
 		if (cm) {
 			mtx_lock(&sc->mfi_io_lock);
 			mfi_release_command(cm);
 			mtx_unlock(&sc->mfi_io_lock);
 		}
 
 		break;
 		}
 	case MFI_SET_AEN:
 		aen = (struct mfi_ioc_aen *)arg;
 		mtx_lock(&sc->mfi_io_lock);
 		error = mfi_aen_register(sc, aen->aen_seq_num,
 		    aen->aen_class_locale);
 		mtx_unlock(&sc->mfi_io_lock);
 
 		break;
 	case MFI_LINUX_CMD_2: /* Firmware Linux ioctl shim */
 		{
 			devclass_t devclass;
 			struct mfi_linux_ioc_packet l_ioc;
 			int adapter;
 
 			devclass = devclass_find("mfi");
 			if (devclass == NULL)
 				return (ENOENT);
 
 			error = copyin(arg, &l_ioc, sizeof(l_ioc));
 			if (error)
 				return (error);
 			adapter = l_ioc.lioc_adapter_no;
 			sc = devclass_get_softc(devclass, adapter);
 			if (sc == NULL)
 				return (ENOENT);
 			return (mfi_linux_ioctl_int(sc->mfi_cdev,
 			    cmd, arg, flag, td));
 			break;
 		}
 	case MFI_LINUX_SET_AEN_2: /* AEN Linux ioctl shim */
 		{
 			devclass_t devclass;
 			struct mfi_linux_ioc_aen l_aen;
 			int adapter;
 
 			devclass = devclass_find("mfi");
 			if (devclass == NULL)
 				return (ENOENT);
 
 			error = copyin(arg, &l_aen, sizeof(l_aen));
 			if (error)
 				return (error);
 			adapter = l_aen.laen_adapter_no;
 			sc = devclass_get_softc(devclass, adapter);
 			if (sc == NULL)
 				return (ENOENT);
 			return (mfi_linux_ioctl_int(sc->mfi_cdev,
 			    cmd, arg, flag, td));
 			break;
 		}
 #ifdef COMPAT_FREEBSD32
 	case MFIIO_PASSTHRU32:
 		if (!SV_CURPROC_FLAG(SV_ILP32)) {
 			error = ENOTTY;
 			break;
 		}
 		iop_swab.ioc_frame	= iop32->ioc_frame;
 		iop_swab.buf_size	= iop32->buf_size;
 		iop_swab.buf		= PTRIN(iop32->buf);
 		iop			= &iop_swab;
 		/* FALLTHROUGH */
 #endif
 	case MFIIO_PASSTHRU:
 		error = mfi_user_command(sc, iop);
 #ifdef COMPAT_FREEBSD32
 		if (cmd == MFIIO_PASSTHRU32)
 			iop32->ioc_frame = iop_swab.ioc_frame;
 #endif
 		break;
 	default:
 		device_printf(sc->mfi_dev, "IOCTL 0x%lx not handled\n", cmd);
 		error = ENOTTY;
 		break;
 	}
 
 	return (error);
 }
 
 static int
 mfi_linux_ioctl_int(struct cdev *dev, u_long cmd, caddr_t arg, int flag, struct thread *td)
 {
 	struct mfi_softc *sc;
 	struct mfi_linux_ioc_packet l_ioc;
 	struct mfi_linux_ioc_aen l_aen;
 	struct mfi_command *cm = NULL;
 	struct mfi_aen *mfi_aen_entry;
 	union mfi_sense_ptr sense_ptr;
 	uint32_t context = 0;
 	uint8_t *data = NULL, *temp;
 	int i;
 	int error, locked;
 
 	sc = dev->si_drv1;
 	error = 0;
 	switch (cmd) {
 	case MFI_LINUX_CMD_2: /* Firmware Linux ioctl shim */
 		error = copyin(arg, &l_ioc, sizeof(l_ioc));
 		if (error != 0)
 			return (error);
 
 		if (l_ioc.lioc_sge_count > MAX_LINUX_IOCTL_SGE) {
 			return (EINVAL);
 		}
 
 		mtx_lock(&sc->mfi_io_lock);
 		if ((cm = mfi_dequeue_free(sc)) == NULL) {
 			mtx_unlock(&sc->mfi_io_lock);
 			return (EBUSY);
 		}
 		mtx_unlock(&sc->mfi_io_lock);
 		locked = 0;
 
 		/*
 		 * save off original context since copying from user
 		 * will clobber some data
 		 */
 		context = cm->cm_frame->header.context;
 
 		bcopy(l_ioc.lioc_frame.raw, cm->cm_frame,
 		      2 * MFI_DCMD_FRAME_SIZE);	/* this isn't quite right */
 		cm->cm_total_frame_size = (sizeof(union mfi_sgl)
 		      * l_ioc.lioc_sge_count) + l_ioc.lioc_sgl_off;
 		cm->cm_frame->header.scsi_status = 0;
 		cm->cm_frame->header.pad0 = 0;
 		if (l_ioc.lioc_sge_count)
 			cm->cm_sg =
 			    (union mfi_sgl *)&cm->cm_frame->bytes[l_ioc.lioc_sgl_off];
 		cm->cm_flags = 0;
 		if (cm->cm_frame->header.flags & MFI_FRAME_DATAIN)
 			cm->cm_flags |= MFI_CMD_DATAIN;
 		if (cm->cm_frame->header.flags & MFI_FRAME_DATAOUT)
 			cm->cm_flags |= MFI_CMD_DATAOUT;
 		cm->cm_len = cm->cm_frame->header.data_len;
 		if (cm->cm_len &&
 		      (cm->cm_flags & (MFI_CMD_DATAIN | MFI_CMD_DATAOUT))) {
 			cm->cm_data = data = malloc(cm->cm_len, M_MFIBUF,
 			    M_WAITOK | M_ZERO);
 		} else {
 			cm->cm_data = 0;
 		}
 
 		/* restore header context */
 		cm->cm_frame->header.context = context;
 
 		temp = data;
 		if (cm->cm_flags & MFI_CMD_DATAOUT) {
 			for (i = 0; i < l_ioc.lioc_sge_count; i++) {
 				error = copyin(PTRIN(l_ioc.lioc_sgl[i].iov_base),
 				       temp,
 				       l_ioc.lioc_sgl[i].iov_len);
 				if (error != 0) {
 					device_printf(sc->mfi_dev,
 					    "Copy in failed\n");
 					goto out;
 				}
 				temp = &temp[l_ioc.lioc_sgl[i].iov_len];
 			}
 		}
 
 		if (cm->cm_frame->header.cmd == MFI_CMD_DCMD)
 			locked = mfi_config_lock(sc, cm->cm_frame->dcmd.opcode);
 
 		if (cm->cm_frame->header.cmd == MFI_CMD_PD_SCSI_IO) {
 			cm->cm_frame->pass.sense_addr_lo =
 			    (uint32_t)cm->cm_sense_busaddr;
 			cm->cm_frame->pass.sense_addr_hi =
 			    (uint32_t)((uint64_t)cm->cm_sense_busaddr >> 32);
 		}
 
 		mtx_lock(&sc->mfi_io_lock);
 		error = mfi_check_command_pre(sc, cm);
 		if (error) {
 			mtx_unlock(&sc->mfi_io_lock);
 			goto out;
 		}
 
 		if ((error = mfi_wait_command(sc, cm)) != 0) {
 			device_printf(sc->mfi_dev,
 			    "Controller polled failed\n");
 			mtx_unlock(&sc->mfi_io_lock);
 			goto out;
 		}
 
 		mfi_check_command_post(sc, cm);
 		mtx_unlock(&sc->mfi_io_lock);
 
 		temp = data;
 		if (cm->cm_flags & MFI_CMD_DATAIN) {
 			for (i = 0; i < l_ioc.lioc_sge_count; i++) {
 				error = copyout(temp,
 					PTRIN(l_ioc.lioc_sgl[i].iov_base),
 					l_ioc.lioc_sgl[i].iov_len);
 				if (error != 0) {
 					device_printf(sc->mfi_dev,
 					    "Copy out failed\n");
 					goto out;
 				}
 				temp = &temp[l_ioc.lioc_sgl[i].iov_len];
 			}
 		}
 
 		if (l_ioc.lioc_sense_len) {
 			/* get user-space sense ptr then copy out sense */
 			bcopy(&((struct mfi_linux_ioc_packet*)arg)
                             ->lioc_frame.raw[l_ioc.lioc_sense_off],
 			    &sense_ptr.sense_ptr_data[0],
 			    sizeof(sense_ptr.sense_ptr_data));
 #ifdef __amd64__
 			/*
 			 * only 32bit Linux support so zero out any
 			 * address over 32bit
 			 */
 			sense_ptr.addr.high = 0;
 #endif
 			error = copyout(cm->cm_sense, sense_ptr.user_space,
 			    l_ioc.lioc_sense_len);
 			if (error != 0) {
 				device_printf(sc->mfi_dev,
 				    "Copy out failed\n");
 				goto out;
 			}
 		}
 
 		error = copyout(&cm->cm_frame->header.cmd_status,
 			&((struct mfi_linux_ioc_packet*)arg)
 			->lioc_frame.hdr.cmd_status,
 			1);
 		if (error != 0) {
 			device_printf(sc->mfi_dev,
 				      "Copy out failed\n");
 			goto out;
 		}
 
 out:
 		mfi_config_unlock(sc, locked);
 		if (data)
 			free(data, M_MFIBUF);
 		if (cm) {
 			mtx_lock(&sc->mfi_io_lock);
 			mfi_release_command(cm);
 			mtx_unlock(&sc->mfi_io_lock);
 		}
 
 		return (error);
 	case MFI_LINUX_SET_AEN_2: /* AEN Linux ioctl shim */
 		error = copyin(arg, &l_aen, sizeof(l_aen));
 		if (error != 0)
 			return (error);
 		printf("AEN IMPLEMENTED for pid %d\n", curproc->p_pid);
 		mfi_aen_entry = malloc(sizeof(struct mfi_aen), M_MFIBUF,
 		    M_WAITOK);
 		mtx_lock(&sc->mfi_io_lock);
 		if (mfi_aen_entry != NULL) {
 			mfi_aen_entry->p = curproc;
 			TAILQ_INSERT_TAIL(&sc->mfi_aen_pids, mfi_aen_entry,
 			    aen_link);
 		}
 		error = mfi_aen_register(sc, l_aen.laen_seq_num,
 		    l_aen.laen_class_locale);
 
 		if (error != 0) {
 			TAILQ_REMOVE(&sc->mfi_aen_pids, mfi_aen_entry,
 			    aen_link);
 			free(mfi_aen_entry, M_MFIBUF);
 		}
 		mtx_unlock(&sc->mfi_io_lock);
 
 		return (error);
 	default:
 		device_printf(sc->mfi_dev, "IOCTL 0x%lx not handled\n", cmd);
 		error = ENOENT;
 		break;
 	}
 
 	return (error);
 }
 
 static int
 mfi_poll(struct cdev *dev, int poll_events, struct thread *td)
 {
 	struct mfi_softc *sc;
 	int revents = 0;
 
 	sc = dev->si_drv1;
 
 	if (poll_events & (POLLIN | POLLRDNORM)) {
 		if (sc->mfi_aen_triggered != 0) {
 			revents |= poll_events & (POLLIN | POLLRDNORM);
 			sc->mfi_aen_triggered = 0;
 		}
 		if (sc->mfi_aen_triggered == 0 && sc->mfi_aen_cm == NULL) {
 			revents |= POLLERR;
 		}
 	}
 
 	if (revents == 0) {
 		if (poll_events & (POLLIN | POLLRDNORM)) {
 			sc->mfi_poll_waiting = 1;
 			selrecord(td, &sc->mfi_select);
 		}
 	}
 
 	return revents;
 }
 
 static void
 mfi_dump_all(void)
 {
 	struct mfi_softc *sc;
 	struct mfi_command *cm;
 	devclass_t dc;
 	time_t deadline;
 	int timedout;
 	int i;
 
 	dc = devclass_find("mfi");
 	if (dc == NULL) {
 		printf("No mfi dev class\n");
 		return;
 	}
 
 	for (i = 0; ; i++) {
 		sc = devclass_get_softc(dc, i);
 		if (sc == NULL)
 			break;
 		device_printf(sc->mfi_dev, "Dumping\n\n");
 		timedout = 0;
 		deadline = time_uptime - mfi_cmd_timeout;
 		mtx_lock(&sc->mfi_io_lock);
 		TAILQ_FOREACH(cm, &sc->mfi_busy, cm_link) {
 			if (cm->cm_timestamp <= deadline) {
 				device_printf(sc->mfi_dev,
 				    "COMMAND %p TIMEOUT AFTER %d SECONDS\n",
 				    cm, (int)(time_uptime - cm->cm_timestamp));
 				MFI_PRINT_CMD(cm);
 				timedout++;
 			}
 		}
 
 #if 0
 		if (timedout)
 			MFI_DUMP_CMDS(sc);
 #endif
 
 		mtx_unlock(&sc->mfi_io_lock);
 	}
 
 	return;
 }
 
 static void
 mfi_timeout(void *data)
 {
 	struct mfi_softc *sc = (struct mfi_softc *)data;
 	struct mfi_command *cm, *tmp;
 	time_t deadline;
 	int timedout = 0;
 
 	deadline = time_uptime - mfi_cmd_timeout;
 	if (sc->adpreset == 0) {
 		if (!mfi_tbolt_reset(sc)) {
 			callout_reset(&sc->mfi_watchdog_callout,
 			    mfi_cmd_timeout * hz, mfi_timeout, sc);
 			return;
 		}
 	}
 	mtx_lock(&sc->mfi_io_lock);
 	TAILQ_FOREACH_SAFE(cm, &sc->mfi_busy, cm_link, tmp) {
 		if (sc->mfi_aen_cm == cm || sc->mfi_map_sync_cm == cm)
 			continue;
 		if (cm->cm_timestamp <= deadline) {
 			if (sc->adpreset != 0 && sc->issuepend_done == 0) {
 				cm->cm_timestamp = time_uptime;
 			} else {
 				device_printf(sc->mfi_dev,
 				    "COMMAND %p TIMEOUT AFTER %d SECONDS\n",
 				     cm, (int)(time_uptime - cm->cm_timestamp)
 				     );
 				MFI_PRINT_CMD(cm);
 				MFI_VALIDATE_CMD(sc, cm);
 				/*
 				 * While commands can get stuck forever we do
 				 * not fail them as there is no way to tell if
 				 * the controller has actually processed them
 				 * or not.
 				 *
 				 * In addition its very likely that force
 				 * failing a command here would cause a panic
 				 * e.g. in UFS.
 				 */
 				timedout++;
 			}
 		}
 	}
 
 #if 0
 	if (timedout)
 		MFI_DUMP_CMDS(sc);
 #endif
 
 	mtx_unlock(&sc->mfi_io_lock);
 
 	callout_reset(&sc->mfi_watchdog_callout, mfi_cmd_timeout * hz,
 	    mfi_timeout, sc);
 
 	if (0)
 		mfi_dump_all();
 	return;
 }
Index: projects/clang390-import/sys/dev/ofw/ofw_bus_subr.c
===================================================================
--- projects/clang390-import/sys/dev/ofw/ofw_bus_subr.c	(revision 305016)
+++ projects/clang390-import/sys/dev/ofw/ofw_bus_subr.c	(revision 305017)
@@ -1,929 +1,951 @@
 /*-
  * Copyright (c) 2001 - 2003 by Thomas Moestl <tmm@FreeBSD.org>.
  * Copyright (c) 2005 Marius Strobl <marius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_platform.h"
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/errno.h>
 #include <sys/libkern.h>
 
 #include <machine/resource.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 #include <dev/ofw/openfirm.h>
 
 #include "ofw_bus_if.h"
 
+#define	OFW_COMPAT_LEN	255
+
 int
 ofw_bus_gen_setup_devinfo(struct ofw_bus_devinfo *obd, phandle_t node)
 {
 
 	if (obd == NULL)
 		return (ENOMEM);
 	/* The 'name' property is considered mandatory. */
 	if ((OF_getprop_alloc(node, "name", 1, (void **)&obd->obd_name)) == -1)
 		return (EINVAL);
 	OF_getprop_alloc(node, "compatible", 1, (void **)&obd->obd_compat);
 	OF_getprop_alloc(node, "device_type", 1, (void **)&obd->obd_type);
 	OF_getprop_alloc(node, "model", 1, (void **)&obd->obd_model);
 	OF_getprop_alloc(node, "status", 1, (void **)&obd->obd_status);
 	obd->obd_node = node;
 	return (0);
 }
 
 void
 ofw_bus_gen_destroy_devinfo(struct ofw_bus_devinfo *obd)
 {
 
 	if (obd == NULL)
 		return;
 	if (obd->obd_compat != NULL)
 		free(obd->obd_compat, M_OFWPROP);
 	if (obd->obd_model != NULL)
 		free(obd->obd_model, M_OFWPROP);
 	if (obd->obd_name != NULL)
 		free(obd->obd_name, M_OFWPROP);
 	if (obd->obd_type != NULL)
 		free(obd->obd_type, M_OFWPROP);
 	if (obd->obd_status != NULL)
 		free(obd->obd_status, M_OFWPROP);
 }
 
 int
 ofw_bus_gen_child_pnpinfo_str(device_t cbdev, device_t child, char *buf,
     size_t buflen)
 {
 
 	if (ofw_bus_get_name(child) != NULL) {
 		strlcat(buf, "name=", buflen);
 		strlcat(buf, ofw_bus_get_name(child), buflen);
 	}
 
 	if (ofw_bus_get_compat(child) != NULL) {
 		strlcat(buf, " compat=", buflen);
 		strlcat(buf, ofw_bus_get_compat(child), buflen);
 	}
 	return (0);
 };
 
 const char *
 ofw_bus_gen_get_compat(device_t bus, device_t dev)
 {
 	const struct ofw_bus_devinfo *obd;
 
 	obd = OFW_BUS_GET_DEVINFO(bus, dev);
 	if (obd == NULL)
 		return (NULL);
 	return (obd->obd_compat);
 }
 
 const char *
 ofw_bus_gen_get_model(device_t bus, device_t dev)
 {
 	const struct ofw_bus_devinfo *obd;
 
 	obd = OFW_BUS_GET_DEVINFO(bus, dev);
 	if (obd == NULL)
 		return (NULL);
 	return (obd->obd_model);
 }
 
 const char *
 ofw_bus_gen_get_name(device_t bus, device_t dev)
 {
 	const struct ofw_bus_devinfo *obd;
 
 	obd = OFW_BUS_GET_DEVINFO(bus, dev);
 	if (obd == NULL)
 		return (NULL);
 	return (obd->obd_name);
 }
 
 phandle_t
 ofw_bus_gen_get_node(device_t bus, device_t dev)
 {
 	const struct ofw_bus_devinfo *obd;
 
 	obd = OFW_BUS_GET_DEVINFO(bus, dev);
 	if (obd == NULL)
 		return (0);
 	return (obd->obd_node);
 }
 
 const char *
 ofw_bus_gen_get_type(device_t bus, device_t dev)
 {
 	const struct ofw_bus_devinfo *obd;
 
 	obd = OFW_BUS_GET_DEVINFO(bus, dev);
 	if (obd == NULL)
 		return (NULL);
 	return (obd->obd_type);
 }
 
 const char *
 ofw_bus_get_status(device_t dev)
 {
 	const struct ofw_bus_devinfo *obd;
 
 	obd = OFW_BUS_GET_DEVINFO(device_get_parent(dev), dev);
 	if (obd == NULL)
 		return (NULL);
 
 	return (obd->obd_status);
 }
 
 int
 ofw_bus_status_okay(device_t dev)
 {
 	const char *status;
 
 	status = ofw_bus_get_status(dev);
 	if (status == NULL || strcmp(status, "okay") == 0 ||
 	    strcmp(status, "ok") == 0)
 		return (1);
 	
 	return (0);
 }
 
 static int
-ofw_bus_node_is_compatible(const char *compat, int len, const char *onecompat)
+ofw_bus_node_is_compatible_int(const char *compat, int len,
+    const char *onecompat)
 {
 	int onelen, l, ret;
 
 	onelen = strlen(onecompat);
 
 	ret = 0;
 	while (len > 0) {
 		if (strlen(compat) == onelen &&
 		    strncasecmp(compat, onecompat, onelen) == 0) {
 			/* Found it. */
 			ret = 1;
 			break;
 		}
 
 		/* Slide to the next sub-string. */
 		l = strlen(compat) + 1;
 		compat += l;
 		len -= l;
 	}
 
 	return (ret);
 }
 
 int
+ofw_bus_node_is_compatible(phandle_t node, const char *compatstr)
+{
+	char compat[OFW_COMPAT_LEN];
+	int len, rv;
+
+	if ((len = OF_getproplen(node, "compatible")) <= 0)
+		return (0);
+
+	bzero(compat, OFW_COMPAT_LEN);
+
+	if (OF_getprop(node, "compatible", compat, OFW_COMPAT_LEN) < 0)
+		return (0);
+
+	rv = ofw_bus_node_is_compatible_int(compat, len, compatstr);
+
+	return (rv);
+}
+
+int
 ofw_bus_is_compatible(device_t dev, const char *onecompat)
 {
 	phandle_t node;
 	const char *compat;
 	int len;
 
 	if ((compat = ofw_bus_get_compat(dev)) == NULL)
 		return (0);
 
 	if ((node = ofw_bus_get_node(dev)) == -1)
 		return (0);
 
 	/* Get total 'compatible' prop len */
 	if ((len = OF_getproplen(node, "compatible")) <= 0)
 		return (0);
 
-	return (ofw_bus_node_is_compatible(compat, len, onecompat));
+	return (ofw_bus_node_is_compatible_int(compat, len, onecompat));
 }
 
 int
 ofw_bus_is_compatible_strict(device_t dev, const char *compatible)
 {
 	const char *compat;
 	size_t len;
 
 	if ((compat = ofw_bus_get_compat(dev)) == NULL)
 		return (0);
 
 	len = strlen(compatible);
 	if (strlen(compat) == len &&
 	    strncasecmp(compat, compatible, len) == 0)
 		return (1);
 
 	return (0);
 }
 
 const struct ofw_compat_data *
 ofw_bus_search_compatible(device_t dev, const struct ofw_compat_data *compat)
 {
 
 	if (compat == NULL)
 		return NULL;
 
 	for (; compat->ocd_str != NULL; ++compat) {
 		if (ofw_bus_is_compatible(dev, compat->ocd_str))
 			break;
 	}
 
 	return (compat);
 }
 
 int
 ofw_bus_has_prop(device_t dev, const char *propname)
 {
 	phandle_t node;
 
 	if ((node = ofw_bus_get_node(dev)) == -1)
 		return (0);
 
 	return (OF_hasprop(node, propname));
 }
 
 void
 ofw_bus_setup_iinfo(phandle_t node, struct ofw_bus_iinfo *ii, int intrsz)
 {
 	pcell_t addrc;
 	int msksz;
 
 	if (OF_getencprop(node, "#address-cells", &addrc, sizeof(addrc)) == -1)
 		addrc = 2;
 	ii->opi_addrc = addrc * sizeof(pcell_t);
 
 	ii->opi_imapsz = OF_getencprop_alloc(node, "interrupt-map", 1,
 	    (void **)&ii->opi_imap);
 	if (ii->opi_imapsz > 0) {
 		msksz = OF_getencprop_alloc(node, "interrupt-map-mask", 1,
 		    (void **)&ii->opi_imapmsk);
 		/*
 		 * Failure to get the mask is ignored; a full mask is used
 		 * then.  We barf on bad mask sizes, however.
 		 */
 		if (msksz != -1 && msksz != ii->opi_addrc + intrsz)
 			panic("ofw_bus_setup_iinfo: bad interrupt-map-mask "
 			    "property!");
 	}
 }
 
 int
 ofw_bus_lookup_imap(phandle_t node, struct ofw_bus_iinfo *ii, void *reg,
     int regsz, void *pintr, int pintrsz, void *mintr, int mintrsz,
     phandle_t *iparent)
 {
 	uint8_t maskbuf[regsz + pintrsz];
 	int rv;
 
 	if (ii->opi_imapsz <= 0)
 		return (0);
 	KASSERT(regsz >= ii->opi_addrc,
 	    ("ofw_bus_lookup_imap: register size too small: %d < %d",
 		regsz, ii->opi_addrc));
 	if (node != -1) {
 		rv = OF_getencprop(node, "reg", reg, regsz);
 		if (rv < regsz)
 			panic("ofw_bus_lookup_imap: cannot get reg property");
 	}
 	return (ofw_bus_search_intrmap(pintr, pintrsz, reg, ii->opi_addrc,
 	    ii->opi_imap, ii->opi_imapsz, ii->opi_imapmsk, maskbuf, mintr,
 	    mintrsz, iparent));
 }
 
 /*
  * Map an interrupt using the firmware reg, interrupt-map and
  * interrupt-map-mask properties.
  * The interrupt property to be mapped must be of size intrsz, and pointed to
  * by intr.  The regs property of the node for which the mapping is done must
  * be passed as regs. This property is an array of register specifications;
  * the size of the address part of such a specification must be passed as
  * physsz.  Only the first element of the property is used.
  * imap and imapsz hold the interrupt mask and it's size.
  * imapmsk is a pointer to the interrupt-map-mask property, which must have
  * a size of physsz + intrsz; it may be NULL, in which case a full mask is
  * assumed.
  * maskbuf must point to a buffer of length physsz + intrsz.
  * The interrupt is returned in result, which must point to a buffer of length
  * rintrsz (which gives the expected size of the mapped interrupt).
  * Returns number of cells in the interrupt if a mapping was found, 0 otherwise.
  */
 int
 ofw_bus_search_intrmap(void *intr, int intrsz, void *regs, int physsz,
     void *imap, int imapsz, void *imapmsk, void *maskbuf, void *result,
     int rintrsz, phandle_t *iparent)
 {
 	phandle_t parent;
 	uint8_t *ref = maskbuf;
 	uint8_t *uiintr = intr;
 	uint8_t *uiregs = regs;
 	uint8_t *uiimapmsk = imapmsk;
 	uint8_t *mptr;
 	pcell_t paddrsz;
 	pcell_t pintrsz;
 	int i, rsz, tsz;
 
 	rsz = -1;
 	if (imapmsk != NULL) {
 		for (i = 0; i < physsz; i++)
 			ref[i] = uiregs[i] & uiimapmsk[i];
 		for (i = 0; i < intrsz; i++)
 			ref[physsz + i] = uiintr[i] & uiimapmsk[physsz + i];
 	} else {
 		bcopy(regs, ref, physsz);
 		bcopy(intr, ref + physsz, intrsz);
 	}
 
 	mptr = imap;
 	i = imapsz;
 	paddrsz = 0;
 	while (i > 0) {
 		bcopy(mptr + physsz + intrsz, &parent, sizeof(parent));
 #ifndef OFW_IMAP_NO_IPARENT_ADDR_CELLS
 		/*
 		 * Find if we need to read the parent address data.
 		 * CHRP-derived OF bindings, including ePAPR-compliant FDTs,
 		 * use this as an optional part of the specifier.
 		 */
 		if (OF_getencprop(OF_node_from_xref(parent),
 		    "#address-cells", &paddrsz, sizeof(paddrsz)) == -1)
 			paddrsz = 0;	/* default */
 		paddrsz *= sizeof(pcell_t);
 #endif
 
 		if (OF_searchencprop(OF_node_from_xref(parent),
 		    "#interrupt-cells", &pintrsz, sizeof(pintrsz)) == -1)
 			pintrsz = 1;	/* default */
 		pintrsz *= sizeof(pcell_t);
 
 		/* Compute the map stride size. */
 		tsz = physsz + intrsz + sizeof(phandle_t) + paddrsz + pintrsz;
 		KASSERT(i >= tsz, ("ofw_bus_search_intrmap: truncated map"));
 
 		if (bcmp(ref, mptr, physsz + intrsz) == 0) {
 			bcopy(mptr + physsz + intrsz + sizeof(parent) + paddrsz,
 			    result, MIN(rintrsz, pintrsz));
 
 			if (iparent != NULL)
 				*iparent = parent;
 			return (pintrsz/sizeof(pcell_t));
 		}
 		mptr += tsz;
 		i -= tsz;
 	}
 	return (0);
 }
 
 int
 ofw_bus_msimap(phandle_t node, uint16_t pci_rid, phandle_t *msi_parent,
     uint32_t *msi_rid)
 {
 	pcell_t *map, mask, msi_base, rid_base, rid_length;
 	ssize_t len;
 	uint32_t masked_rid, rid;
 	int err, i;
 
 	/* TODO: This should be OF_searchprop_alloc if we had it */
 	len = OF_getencprop_alloc(node, "msi-map", sizeof(*map), (void **)&map);
 	if (len < 0) {
 		if (msi_parent != NULL) {
 			*msi_parent = 0;
 			OF_getencprop(node, "msi-parent", msi_parent,
 			    sizeof(*msi_parent));
 		}
 		if (msi_rid != NULL)
 			*msi_rid = pci_rid;
 		return (0);
 	}
 
 	err = ENOENT;
 	rid = 0;
 	mask = 0xffffffff;
 	OF_getencprop(node, "msi-map-mask", &mask, sizeof(mask));
 
 	masked_rid = pci_rid & mask;
 	for (i = 0; i < len; i += 4) {
 		rid_base = map[i + 0];
 		rid_length = map[i + 3];
 
 		if (masked_rid < rid_base ||
 		    masked_rid >= (rid_base + rid_length))
 			continue;
 
 		msi_base = map[i + 2];
 
 		if (msi_parent != NULL)
 			*msi_parent = map[i + 1];
 		if (msi_rid != NULL)
 			*msi_rid = masked_rid - rid_base + msi_base;
 		err = 0;
 		break;
 	}
 
 	free(map, M_OFWPROP);
 
 	return (err);
 }
 
 int
 ofw_bus_reg_to_rl(device_t dev, phandle_t node, pcell_t acells, pcell_t scells,
     struct resource_list *rl)
 {
 	uint64_t phys, size;
 	ssize_t i, j, rid, nreg, ret;
 	uint32_t *reg;
 	char *name;
 
 	/*
 	 * This may be just redundant when having ofw_bus_devinfo
 	 * but makes this routine independent of it.
 	 */
 	ret = OF_getprop_alloc(node, "name", sizeof(*name), (void **)&name);
 	if (ret == -1)
 		name = NULL;
 
 	ret = OF_getencprop_alloc(node, "reg", sizeof(*reg), (void **)&reg);
 	nreg = (ret == -1) ? 0 : ret;
 
 	if (nreg % (acells + scells) != 0) {
 		if (bootverbose)
 			device_printf(dev, "Malformed reg property on <%s>\n",
 			    (name == NULL) ? "unknown" : name);
 		nreg = 0;
 	}
 
 	for (i = 0, rid = 0; i < nreg; i += acells + scells, rid++) {
 		phys = size = 0;
 		for (j = 0; j < acells; j++) {
 			phys <<= 32;
 			phys |= reg[i + j];
 		}
 		for (j = 0; j < scells; j++) {
 			size <<= 32;
 			size |= reg[i + acells + j];
 		}
 		/* Skip the dummy reg property of glue devices like ssm(4). */
 		if (size != 0)
 			resource_list_add(rl, SYS_RES_MEMORY, rid,
 			    phys, phys + size - 1, size);
 	}
 	free(name, M_OFWPROP);
 	free(reg, M_OFWPROP);
 
 	return (0);
 }
 
 /*
  * Get interrupt parent for given node.
  * Returns 0 if interrupt parent doesn't exist.
  */
 phandle_t
 ofw_bus_find_iparent(phandle_t node)
 {
 	phandle_t iparent;
 
 	if (OF_searchencprop(node, "interrupt-parent", &iparent,
 		    sizeof(iparent)) == -1) {
 		for (iparent = node; iparent != 0;
 		    iparent = OF_parent(iparent)) {
 			if (OF_hasprop(iparent, "interrupt-controller"))
 				break;
 		}
 		iparent = OF_xref_from_node(iparent);
 	}
 	return (iparent);
 }
 
 int
 ofw_bus_intr_to_rl(device_t dev, phandle_t node,
     struct resource_list *rl, int *rlen)
 {
 	phandle_t iparent;
 	uint32_t icells, *intr;
 	int err, i, irqnum, nintr, rid;
 	boolean_t extended;
 
 	nintr = OF_getencprop_alloc(node, "interrupts",  sizeof(*intr),
 	    (void **)&intr);
 	if (nintr > 0) {
 		iparent = ofw_bus_find_iparent(node);
 		if (iparent == 0) {
 			device_printf(dev, "No interrupt-parent found, "
 			    "assuming direct parent\n");
 			iparent = OF_parent(node);
 			iparent = OF_xref_from_node(iparent);
 		}
 		if (OF_searchencprop(OF_node_from_xref(iparent), 
 		    "#interrupt-cells", &icells, sizeof(icells)) == -1) {
 			device_printf(dev, "Missing #interrupt-cells "
 			    "property, assuming <1>\n");
 			icells = 1;
 		}
 		if (icells < 1 || icells > nintr) {
 			device_printf(dev, "Invalid #interrupt-cells property "
 			    "value <%d>, assuming <1>\n", icells);
 			icells = 1;
 		}
 		extended = false;
 	} else {
 		nintr = OF_getencprop_alloc(node, "interrupts-extended",
 		    sizeof(*intr), (void **)&intr);
 		if (nintr <= 0)
 			return (0);
 		extended = true;
 	}
 	err = 0;
 	rid = 0;
 	for (i = 0; i < nintr; i += icells) {
 		if (extended) {
 			iparent = intr[i++];
 			if (OF_searchencprop(OF_node_from_xref(iparent), 
 			    "#interrupt-cells", &icells, sizeof(icells)) == -1) {
 				device_printf(dev, "Missing #interrupt-cells "
 				    "property\n");
 				err = ENOENT;
 				break;
 			}
 			if (icells < 1 || (i + icells) > nintr) {
 				device_printf(dev, "Invalid #interrupt-cells "
 				    "property value <%d>\n", icells);
 				err = ERANGE;
 				break;
 			}
 		}
 		irqnum = ofw_bus_map_intr(dev, iparent, icells, &intr[i]);
 		resource_list_add(rl, SYS_RES_IRQ, rid++, irqnum, irqnum, 1);
 	}
 	if (rlen != NULL)
 		*rlen = rid;
 	free(intr, M_OFWPROP);
 	return (err);
 }
 
 int
 ofw_bus_intr_by_rid(device_t dev, phandle_t node, int wanted_rid,
     phandle_t *producer, int *ncells, pcell_t **cells)
 {
 	phandle_t iparent;
 	uint32_t icells, *intr;
 	int err, i, nintr, rid;
 	boolean_t extended;
 
 	nintr = OF_getencprop_alloc(node, "interrupts",  sizeof(*intr),
 	    (void **)&intr);
 	if (nintr > 0) {
 		iparent = ofw_bus_find_iparent(node);
 		if (iparent == 0) {
 			device_printf(dev, "No interrupt-parent found, "
 			    "assuming direct parent\n");
 			iparent = OF_parent(node);
 			iparent = OF_xref_from_node(iparent);
 		}
 		if (OF_searchencprop(OF_node_from_xref(iparent),
 		    "#interrupt-cells", &icells, sizeof(icells)) == -1) {
 			device_printf(dev, "Missing #interrupt-cells "
 			    "property, assuming <1>\n");
 			icells = 1;
 		}
 		if (icells < 1 || icells > nintr) {
 			device_printf(dev, "Invalid #interrupt-cells property "
 			    "value <%d>, assuming <1>\n", icells);
 			icells = 1;
 		}
 		extended = false;
 	} else {
 		nintr = OF_getencprop_alloc(node, "interrupts-extended",
 		    sizeof(*intr), (void **)&intr);
 		if (nintr <= 0)
 			return (ESRCH);
 		extended = true;
 	}
 	err = ESRCH;
 	rid = 0;
 	for (i = 0; i < nintr; i += icells, rid++) {
 		if (extended) {
 			iparent = intr[i++];
 			if (OF_searchencprop(OF_node_from_xref(iparent),
 			    "#interrupt-cells", &icells, sizeof(icells)) == -1) {
 				device_printf(dev, "Missing #interrupt-cells "
 				    "property\n");
 				err = ENOENT;
 				break;
 			}
 			if (icells < 1 || (i + icells) > nintr) {
 				device_printf(dev, "Invalid #interrupt-cells "
 				    "property value <%d>\n", icells);
 				err = ERANGE;
 				break;
 			}
 		}
 		if (rid == wanted_rid) {
 			*cells = malloc(icells * sizeof(**cells), M_OFWPROP,
 			    M_WAITOK);
 			*producer = iparent;
 			*ncells= icells;
 			memcpy(*cells, intr + i, icells * sizeof(**cells));
 			err = 0;
 			break;
 		}
 	}
 	free(intr, M_OFWPROP);
 	return (err);
 }
 
 phandle_t
 ofw_bus_find_child(phandle_t start, const char *child_name)
 {
 	char *name;
 	int ret;
 	phandle_t child;
 
 	for (child = OF_child(start); child != 0; child = OF_peer(child)) {
 		ret = OF_getprop_alloc(child, "name", sizeof(*name), (void **)&name);
 		if (ret == -1)
 			continue;
 		if (strcmp(name, child_name) == 0) {
 			free(name, M_OFWPROP);
 			return (child);
 		}
 
 		free(name, M_OFWPROP);
 	}
 
 	return (0);
 }
 
 phandle_t
 ofw_bus_find_compatible(phandle_t node, const char *onecompat)
 {
 	phandle_t child, ret;
 	void *compat;
 	int len;
 
 	/*
 	 * Traverse all children of 'start' node, and find first with
 	 * matching 'compatible' property.
 	 */
 	for (child = OF_child(node); child != 0; child = OF_peer(child)) {
 		len = OF_getprop_alloc(child, "compatible", 1, &compat);
 		if (len >= 0) {
-			ret = ofw_bus_node_is_compatible(compat, len,
+			ret = ofw_bus_node_is_compatible_int(compat, len,
 			    onecompat);
 			free(compat, M_OFWPROP);
 			if (ret != 0)
 				return (child);
 		}
 
 		ret = ofw_bus_find_compatible(child, onecompat);
 		if (ret != 0)
 			return (ret);
 	}
 	return (0);
 }
 
 /**
  * @brief Return child of bus whose phandle is node
  *
  * A direct child of @p will be returned if it its phandle in the
  * OFW tree is @p node. Otherwise, NULL is returned.
  *
  * @param bus		The bus to examine
  * @param node		The phandle_t to look for.
  */
 device_t
 ofw_bus_find_child_device_by_phandle(device_t bus, phandle_t node)
 {
 	device_t *children, retval, child;
 	int nkid, i;
 
 	/*
 	 * Nothing can match the flag value for no node.
 	 */
 	if (node == -1)
 		return (NULL);
 
 	/*
 	 * Search the children for a match. We microoptimize
 	 * a bit by not using ofw_bus_get since we already know
 	 * the parent. We do not recurse.
 	 */
 	if (device_get_children(bus, &children, &nkid) != 0)
 		return (NULL);
 	retval = NULL;
 	for (i = 0; i < nkid; i++) {
 		child = children[i];
 		if (OFW_BUS_GET_NODE(bus, child) == node) {
 			retval = child;
 			break;
 		}
 	}
 	free(children, M_TEMP);
 
 	return (retval);
 }
 
 /*
  * Parse property that contain list of xrefs and values
  * (like standard "clocks" and "resets" properties)
  * Input arguments:
  *  node - consumers device node
  *  list_name  - name of parsed list - "clocks"
  *  cells_name - name of size property - "#clock-cells"
  *  idx - the index of the requested list entry, or, if -1, an indication
  *        to return the number of entries in the parsed list.
  * Output arguments:
  *  producer - handle of producer
  *  ncells   - number of cells in result or the number of items in the list when
  *             idx == -1.
  *  cells    - array of decoded cells
  */
 static int
 ofw_bus_parse_xref_list_internal(phandle_t node, const char *list_name,
     const char *cells_name, int idx, phandle_t *producer, int *ncells,
     pcell_t **cells)
 {
 	phandle_t pnode;
 	phandle_t *elems;
 	uint32_t  pcells;
 	int rv, i, j, nelems, cnt;
 
 	elems = NULL;
 	nelems = OF_getencprop_alloc(node, list_name,  sizeof(*elems),
 	    (void **)&elems);
 	if (nelems <= 0)
 		return (ENOENT);
 	rv = (idx == -1) ? 0 : ENOENT;
 	for (i = 0, cnt = 0; i < nelems; i += pcells, cnt++) {
 		pnode = elems[i++];
 		if (OF_getencprop(OF_node_from_xref(pnode),
 		    cells_name, &pcells, sizeof(pcells)) == -1) {
 			printf("Missing %s property\n", cells_name);
 			rv = ENOENT;
 			break;
 		}
 
 		if ((i + pcells) > nelems) {
 			printf("Invalid %s property value <%d>\n", cells_name,
 			    pcells);
 			rv = ERANGE;
 			break;
 		}
 		if (cnt == idx) {
 			*cells= malloc(pcells * sizeof(**cells), M_OFWPROP,
 			    M_WAITOK);
 			*producer = pnode;
 			*ncells = pcells;
 			for (j = 0; j < pcells; j++)
 				(*cells)[j] = elems[i + j];
 			rv = 0;
 			break;
 		}
 	}
 	if (elems != NULL)
 		free(elems, M_OFWPROP);
 	if (idx == -1 && rv == 0)
 		*ncells = cnt;
 	return (rv);
 }
 
 /*
  * Parse property that contain list of xrefs and values
  * (like standard "clocks" and "resets" properties)
  * Input arguments:
  *  node - consumers device node
  *  list_name  - name of parsed list - "clocks"
  *  cells_name - name of size property - "#clock-cells"
  *  idx - the index of the requested list entry (>= 0)
  * Output arguments:
  *  producer - handle of producer
  *  ncells   - number of cells in result
  *  cells    - array of decoded cells
  */
 int
 ofw_bus_parse_xref_list_alloc(phandle_t node, const char *list_name,
     const char *cells_name, int idx, phandle_t *producer, int *ncells,
     pcell_t **cells)
 {
 
 	KASSERT(idx >= 0,
 	    ("ofw_bus_parse_xref_list_alloc: negative index supplied"));
 
 	return (ofw_bus_parse_xref_list_internal(node, list_name, cells_name,
 		    idx, producer, ncells, cells));
 }
 
 /*
  * Parse property that contain list of xrefs and values
  * (like standard "clocks" and "resets" properties)
  * and determine the number of items in the list
  * Input arguments:
  *  node - consumers device node
  *  list_name  - name of parsed list - "clocks"
  *  cells_name - name of size property - "#clock-cells"
  * Output arguments:
  *  count - number of items in list
  */
 int
 ofw_bus_parse_xref_list_get_length(phandle_t node, const char *list_name,
     const char *cells_name, int *count)
 {
 
 	return (ofw_bus_parse_xref_list_internal(node, list_name, cells_name,
 		    -1, NULL, count, NULL));
 }
 
 /*
  * Find index of string in string list property (case sensitive).
  */
 int
 ofw_bus_find_string_index(phandle_t node, const char *list_name,
     const char *name, int *idx)
 {
 	char *elems;
 	int rv, i, cnt, nelems;
 
 	elems = NULL;
 	nelems = OF_getprop_alloc(node, list_name, 1, (void **)&elems);
 	if (nelems <= 0)
 		return (ENOENT);
 
 	rv = ENOENT;
 	for (i = 0, cnt = 0; i < nelems; cnt++) {
 		if (strcmp(elems + i, name) == 0) {
 			*idx = cnt;
 			rv = 0;
 			break;
 		}
 		i += strlen(elems + i) + 1;
 	}
 
 	if (elems != NULL)
 		free(elems, M_OFWPROP);
 	return (rv);
 }
 
 /*
  * Create zero terminated array of strings from string list property.
  */
 int
 ofw_bus_string_list_to_array(phandle_t node, const char *list_name,
    const char ***out_array)
 {
 	char *elems, *tptr;
 	const char **array;
 	int i, cnt, nelems, len;
 
 	elems = NULL;
 	nelems = OF_getprop_alloc(node, list_name, 1, (void **)&elems);
 	if (nelems <= 0)
 		return (nelems);
 
 	/* Count number of strings. */
 	for (i = 0, cnt = 0; i < nelems; cnt++)
 		i += strlen(elems + i) + 1;
 
 	/* Allocate space for arrays and all strings. */
 	array = malloc((cnt + 1) * sizeof(char *) + nelems, M_OFWPROP,
 	    M_WAITOK);
 
 	/* Get address of first string. */
 	tptr = (char *)(array + cnt + 1);
 
 	/* Copy strings. */
 	memcpy(tptr, elems, nelems);
 	free(elems, M_OFWPROP);
 
 	/* Fill string pointers. */
 	for (i = 0, cnt = 0; i < nelems; cnt++) {
 		len = strlen(tptr) + 1;
 		array[cnt] = tptr;
 		i += len;
 		tptr += len;
 	}
 	array[cnt] = 0;
 	*out_array = array;
 
 	return (cnt);
 }
Index: projects/clang390-import/sys/dev/ofw/ofw_bus_subr.h
===================================================================
--- projects/clang390-import/sys/dev/ofw/ofw_bus_subr.h	(revision 305016)
+++ projects/clang390-import/sys/dev/ofw/ofw_bus_subr.h	(revision 305017)
@@ -1,144 +1,145 @@
 /*-
  * Copyright (c) 2005 Marius Strobl <marius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_DEV_OFW_OFW_BUS_SUBR_H_
 #define	_DEV_OFW_OFW_BUS_SUBR_H_
 
 #include <sys/bus.h>
 #ifdef INTRNG
 #include <sys/intr.h>
 #endif
 #include <dev/ofw/openfirm.h>
 
 #include "ofw_bus_if.h"
 
 #define	ORIP_NOINT	-1
 #define	ORIR_NOTFOUND	0xffffffff
 
 struct ofw_bus_iinfo {
 	uint8_t			*opi_imap;
 	uint8_t			*opi_imapmsk;
 	int			opi_imapsz;
 	pcell_t			opi_addrc;
 };
 
 struct ofw_compat_data {
 	const char	*ocd_str;
 	uintptr_t	 ocd_data;
 };
 
 #ifdef INTRNG
 struct intr_map_data_fdt {
 	struct intr_map_data	hdr;
 	phandle_t		iparent;
 	u_int			ncells;
 	pcell_t			cells[];
 };
 #endif
 
 #define SIMPLEBUS_PNP_DESCR "Z:compat;P:private;"
 #define SIMPLEBUS_PNP_INFO(t) \
 	MODULE_PNP_INFO(SIMPLEBUS_PNP_DESCR, simplebus, t, t, sizeof(t[0]), sizeof(t) / sizeof(t[0]));
 
 /* Generic implementation of ofw_bus_if.m methods and helper routines */
 int	ofw_bus_gen_setup_devinfo(struct ofw_bus_devinfo *, phandle_t);
 void	ofw_bus_gen_destroy_devinfo(struct ofw_bus_devinfo *);
 
 ofw_bus_get_compat_t	ofw_bus_gen_get_compat;
 ofw_bus_get_model_t	ofw_bus_gen_get_model;
 ofw_bus_get_name_t	ofw_bus_gen_get_name;
 ofw_bus_get_node_t	ofw_bus_gen_get_node;
 ofw_bus_get_type_t	ofw_bus_gen_get_type;
 
 /* Helper method to report interesting OF properties in pnpinfo */
 bus_child_pnpinfo_str_t	ofw_bus_gen_child_pnpinfo_str;
 
 /* Routines for processing firmware interrupt maps */
 void	ofw_bus_setup_iinfo(phandle_t, struct ofw_bus_iinfo *, int);
 int	ofw_bus_lookup_imap(phandle_t, struct ofw_bus_iinfo *, void *, int,
 	    void *, int, void *, int, phandle_t *);
 int	ofw_bus_search_intrmap(void *, int, void *, int, void *, int, void *,
 	    void *, void *, int, phandle_t *);
 
 /* Routines for processing msi maps */
 int ofw_bus_msimap(phandle_t, uint16_t, phandle_t *, uint32_t *);
 
 /* Routines for parsing device-tree data into resource lists. */
 int ofw_bus_reg_to_rl(device_t, phandle_t, pcell_t, pcell_t,
     struct resource_list *);
 int ofw_bus_intr_to_rl(device_t, phandle_t, struct resource_list *, int *);
 int ofw_bus_intr_by_rid(device_t, phandle_t, int, phandle_t *, int *,
     pcell_t **);
 
 /* Helper to get device status property */
 const char *ofw_bus_get_status(device_t dev);
 int ofw_bus_status_okay(device_t dev);
 
 /* Helper to get node's interrupt parent */
 phandle_t ofw_bus_find_iparent(phandle_t);
 
 /* Helper routine for checking compat prop */
 int ofw_bus_is_compatible(device_t, const char *);
 int ofw_bus_is_compatible_strict(device_t, const char *);
+int ofw_bus_node_is_compatible(phandle_t, const char *);
 
 /* 
  * Helper routine to search a list of compat properties.  The table is
  * terminated by an entry with a NULL compat-string pointer; a pointer to that
  * table entry is returned if none of the compat strings match for the device,
  * giving you control over the not-found value.  Will not return NULL unless the
  * provided table pointer is NULL.
  */
 const struct ofw_compat_data *
     ofw_bus_search_compatible(device_t, const struct ofw_compat_data *);
 
 /* Helper routine for checking existence of a prop */
 int ofw_bus_has_prop(device_t, const char *);
 
 /* Helper to search for a child with a given compat prop */
 phandle_t ofw_bus_find_compatible(phandle_t, const char *);
 
 /* Helper to search for a child with a given name */
 phandle_t ofw_bus_find_child(phandle_t, const char *);
 
 /* Helper routine to find a device_t child matching a given phandle_t */
 device_t ofw_bus_find_child_device_by_phandle(device_t bus, phandle_t node);
 
 /* Helper routines for parsing lists  */
 int ofw_bus_parse_xref_list_alloc(phandle_t node, const char *list_name,
     const char *cells_name, int idx, phandle_t *producer, int *ncells,
     pcell_t **cells);
 int ofw_bus_parse_xref_list_get_length(phandle_t node, const char *list_name,
     const char *cells_name, int *count);
 int ofw_bus_find_string_index(phandle_t node, const char *list_name,
     const char *name, int *idx);
 int ofw_bus_string_list_to_array(phandle_t node, const char *list_name,
     const char ***array);
 
 #endif /* !_DEV_OFW_OFW_BUS_SUBR_H_ */
Index: projects/clang390-import/sys/dev/syscons/syscons.c
===================================================================
--- projects/clang390-import/sys/dev/syscons/syscons.c	(revision 305016)
+++ projects/clang390-import/sys/dev/syscons/syscons.c	(revision 305017)
@@ -1,3983 +1,4004 @@
 /*-
  * Copyright (c) 1992-1998 Søren Schmidt
  * All rights reserved.
  *
  * This code is derived from software contributed to The DragonFly Project
  * by Sascha Wildner <saw@online.de>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_syscons.h"
 #include "opt_splash.h"
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/cons.h>
 #include <sys/consio.h>
 #include <sys/kdb.h>
 #include <sys/eventhandler.h>
 #include <sys/fbio.h>
 #include <sys/kbio.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/random.h>
 #include <sys/reboot.h>
 #include <sys/serial.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
 #include <sys/tty.h>
 #include <sys/power.h>
 
 #include <machine/clock.h>
 #if defined(__arm__) || defined(__mips__) || \
 	defined(__powerpc__) || defined(__sparc64__)
 #include <machine/sc_machdep.h>
 #else
 #include <machine/pc/display.h>
 #endif
 #if defined( __i386__) || defined(__amd64__)
 #include <machine/psl.h>
 #include <machine/frame.h>
 #endif
 #include <machine/stdarg.h>
 
 #include <dev/kbd/kbdreg.h>
 #include <dev/fb/fbreg.h>
 #include <dev/fb/splashreg.h>
 #include <dev/syscons/syscons.h>
 
 #define COLD 0
 #define WARM 1
 
 #define DEFAULT_BLANKTIME	(5*60)		/* 5 minutes */
 #define MAX_BLANKTIME		(7*24*60*60)	/* 7 days!? */
 
 #define KEYCODE_BS		0x0e		/* "<-- Backspace" key, XXX */
 
 /* NULL-safe version of "tty_opened()" */
 #define	tty_opened_ns(tp)	((tp) != NULL && tty_opened(tp))
 
 typedef struct default_attr {
 	int		std_color;		/* normal hardware color */
 	int		rev_color;		/* reverse hardware color */
 } default_attr;
 
 static default_attr user_default = {
     SC_NORM_ATTR,
     SC_NORM_REV_ATTR,
 };
 
 static	int		sc_console_unit = -1;
 static	int		sc_saver_keyb_only = 1;
 static  scr_stat    	*sc_console;
 static  struct consdev	*sc_consptr;
 static	scr_stat	main_console;
 static	struct tty 	*main_devs[MAXCONS];
 
 static  char        	init_done = COLD;
 static	int		shutdown_in_progress = FALSE;
 static	int		suspend_in_progress = FALSE;
 static	char		sc_malloc = FALSE;
 
 static	int		saver_mode = CONS_NO_SAVER; /* LKM/user saver */
 static	int		run_scrn_saver = FALSE;	/* should run the saver? */
 static	int		enable_bell = TRUE; /* enable beeper */
 
 #ifndef SC_DISABLE_REBOOT
 static  int		enable_reboot = TRUE; /* enable keyboard reboot */
 #endif
 
 #ifndef SC_DISABLE_KDBKEY
 static  int		enable_kdbkey = TRUE; /* enable keyboard debug */
 #endif
 
 static	long        	scrn_blank_time = 0;    /* screen saver timeout value */
 #ifdef DEV_SPLASH
 static	int     	scrn_blanked;		/* # of blanked screen */
 static	int		sticky_splash = FALSE;
 
 static	void		none_saver(sc_softc_t *sc, int blank) { }
 static	void		(*current_saver)(sc_softc_t *, int) = none_saver;
 #endif
 
 #ifdef SC_NO_SUSPEND_VTYSWITCH
 static	int		sc_no_suspend_vtswitch = 1;
 #else
 static	int		sc_no_suspend_vtswitch = 0;
 #endif
 static	int		sc_susp_scr;
 
 static SYSCTL_NODE(_hw, OID_AUTO, syscons, CTLFLAG_RD, 0, "syscons");
 static SYSCTL_NODE(_hw_syscons, OID_AUTO, saver, CTLFLAG_RD, 0, "saver");
 SYSCTL_INT(_hw_syscons_saver, OID_AUTO, keybonly, CTLFLAG_RW,
     &sc_saver_keyb_only, 0, "screen saver interrupted by input only");
 SYSCTL_INT(_hw_syscons, OID_AUTO, bell, CTLFLAG_RW, &enable_bell, 
     0, "enable bell");
 #ifndef SC_DISABLE_REBOOT
 SYSCTL_INT(_hw_syscons, OID_AUTO, kbd_reboot, CTLFLAG_RW|CTLFLAG_SECURE, &enable_reboot,
     0, "enable keyboard reboot");
 #endif
 #ifndef SC_DISABLE_KDBKEY
 SYSCTL_INT(_hw_syscons, OID_AUTO, kbd_debug, CTLFLAG_RW|CTLFLAG_SECURE, &enable_kdbkey,
     0, "enable keyboard debug");
 #endif
 SYSCTL_INT(_hw_syscons, OID_AUTO, sc_no_suspend_vtswitch, CTLFLAG_RWTUN,
     &sc_no_suspend_vtswitch, 0, "Disable VT switch before suspend.");
 #if !defined(SC_NO_FONT_LOADING) && defined(SC_DFLT_FONT)
 #include "font.h"
 #endif
 
 	tsw_ioctl_t	*sc_user_ioctl;
 
 static	bios_values_t	bios_value;
 
 static	int		enable_panic_key;
 SYSCTL_INT(_machdep, OID_AUTO, enable_panic_key, CTLFLAG_RW, &enable_panic_key,
 	   0, "Enable panic via keypress specified in kbdmap(5)");
 
 #define SC_CONSOLECTL	255
 
 #define VTY_WCHAN(sc, vty) (&SC_DEV(sc, vty))
 
 /* prototypes */
 static int sc_allocate_keyboard(sc_softc_t *sc, int unit);
 static int scvidprobe(int unit, int flags, int cons);
 static int sckbdprobe(int unit, int flags, int cons);
 static void scmeminit(void *arg);
 static int scdevtounit(struct tty *tp);
 static kbd_callback_func_t sckbdevent;
 static void scinit(int unit, int flags);
 static scr_stat *sc_get_stat(struct tty *tp);
 static void scterm(int unit, int flags);
 static void scshutdown(void *, int);
 static void scsuspend(void *);
 static void scresume(void *);
 static u_int scgetc(sc_softc_t *sc, u_int flags, struct sc_cnstate *sp);
 static void sc_puts(scr_stat *scp, u_char *buf, int len, int kernel);
 #define SCGETC_CN	1
 #define SCGETC_NONBLOCK	2
 static void sccnupdate(scr_stat *scp);
 static scr_stat *alloc_scp(sc_softc_t *sc, int vty);
 static void init_scp(sc_softc_t *sc, int vty, scr_stat *scp);
 static timeout_t scrn_timer;
 static int and_region(int *s1, int *e1, int s2, int e2);
 static void scrn_update(scr_stat *scp, int show_cursor);
 
 #ifdef DEV_SPLASH
 static int scsplash_callback(int event, void *arg);
 static void scsplash_saver(sc_softc_t *sc, int show);
 static int add_scrn_saver(void (*this_saver)(sc_softc_t *, int));
 static int remove_scrn_saver(void (*this_saver)(sc_softc_t *, int));
 static int set_scrn_saver_mode(scr_stat *scp, int mode, u_char *pal, int border);
 static int restore_scrn_saver_mode(scr_stat *scp, int changemode);
 static void stop_scrn_saver(sc_softc_t *sc, void (*saver)(sc_softc_t *, int));
 static int wait_scrn_saver_stop(sc_softc_t *sc);
 #define scsplash_stick(stick)		(sticky_splash = (stick))
 #else /* !DEV_SPLASH */
 #define scsplash_stick(stick)
 #endif /* DEV_SPLASH */
 
 static int do_switch_scr(sc_softc_t *sc, int s);
 static int vt_proc_alive(scr_stat *scp);
 static int signal_vt_rel(scr_stat *scp);
 static int signal_vt_acq(scr_stat *scp);
 static int finish_vt_rel(scr_stat *scp, int release, int *s);
 static int finish_vt_acq(scr_stat *scp);
 static void exchange_scr(sc_softc_t *sc);
 static void update_cursor_image(scr_stat *scp);
 static void change_cursor_shape(scr_stat *scp, int flags, int base, int height);
 static void update_font(scr_stat *);
 static int save_kbd_state(scr_stat *scp);
 static int update_kbd_state(scr_stat *scp, int state, int mask);
 static int update_kbd_leds(scr_stat *scp, int which);
 static timeout_t blink_screen;
 static struct tty *sc_alloc_tty(int, int);
 
 static cn_probe_t	sc_cnprobe;
 static cn_init_t	sc_cninit;
 static cn_term_t	sc_cnterm;
 static cn_getc_t	sc_cngetc;
 static cn_putc_t	sc_cnputc;
 static cn_grab_t	sc_cngrab;
 static cn_ungrab_t	sc_cnungrab;
 
 CONSOLE_DRIVER(sc);
 
 static	tsw_open_t	sctty_open;
 static	tsw_close_t	sctty_close;
 static	tsw_outwakeup_t	sctty_outwakeup;
 static	tsw_ioctl_t	sctty_ioctl;
 static	tsw_mmap_t	sctty_mmap;
 
 static struct ttydevsw sc_ttydevsw = {
 	.tsw_open	= sctty_open,
 	.tsw_close	= sctty_close,
 	.tsw_outwakeup	= sctty_outwakeup,
 	.tsw_ioctl	= sctty_ioctl,
 	.tsw_mmap	= sctty_mmap,
 };
 
 static d_ioctl_t	consolectl_ioctl;
 static d_close_t	consolectl_close;
 
 static struct cdevsw consolectl_devsw = {
 	.d_version	= D_VERSION,
 	.d_flags	= D_NEEDGIANT | D_TRACKCLOSE,
 	.d_ioctl	= consolectl_ioctl,
 	.d_close	= consolectl_close,
 	.d_name		= "consolectl",
 };
 
 int
 sc_probe_unit(int unit, int flags)
 {
     if (!vty_enabled(VTY_SC))
         return ENXIO;
     if (!scvidprobe(unit, flags, FALSE)) {
 	if (bootverbose)
 	    printf("%s%d: no video adapter found.\n", SC_DRIVER_NAME, unit);
 	return ENXIO;
     }
 
     /* syscons will be attached even when there is no keyboard */
     sckbdprobe(unit, flags, FALSE);
 
     return 0;
 }
 
 /* probe video adapters, return TRUE if found */ 
 static int
 scvidprobe(int unit, int flags, int cons)
 {
     /*
      * Access the video adapter driver through the back door!
      * Video adapter drivers need to be configured before syscons.
      * However, when syscons is being probed as the low-level console,
      * they have not been initialized yet.  We force them to initialize
      * themselves here. XXX
      */
     vid_configure(cons ? VIO_PROBE_ONLY : 0);
 
     return (vid_find_adapter("*", unit) >= 0);
 }
 
 /* probe the keyboard, return TRUE if found */
 static int
 sckbdprobe(int unit, int flags, int cons)
 {
     /* access the keyboard driver through the backdoor! */
     kbd_configure(cons ? KB_CONF_PROBE_ONLY : 0);
 
     return (kbd_find_keyboard("*", unit) >= 0);
 }
 
 static char
 *adapter_name(video_adapter_t *adp)
 {
     static struct {
 	int type;
 	char *name[2];
     } names[] = {
 	{ KD_MONO,	{ "MDA",	"MDA" } },
 	{ KD_HERCULES,	{ "Hercules",	"Hercules" } },
 	{ KD_CGA,	{ "CGA",	"CGA" } },
 	{ KD_EGA,	{ "EGA",	"EGA (mono)" } },
 	{ KD_VGA,	{ "VGA",	"VGA (mono)" } },
 	{ KD_PC98,	{ "PC-98x1",	"PC-98x1" } },
 	{ KD_TGA,	{ "TGA",	"TGA" } },
 	{ -1,		{ "Unknown",	"Unknown" } },
     };
     int i;
 
     for (i = 0; names[i].type != -1; ++i)
 	if (names[i].type == adp->va_type)
 	    break;
     return names[i].name[(adp->va_flags & V_ADP_COLOR) ? 0 : 1];
 }
 
 static void
 sctty_outwakeup(struct tty *tp)
 {
     size_t len;
     u_char buf[PCBURST];
     scr_stat *scp = sc_get_stat(tp);
 
     if (scp->status & SLKED ||
 	(scp == scp->sc->cur_scp && scp->sc->blink_in_progress))
 	return;
 
     for (;;) {
 	len = ttydisc_getc(tp, buf, sizeof buf);
 	if (len == 0)
 	    break;
 	SC_VIDEO_LOCK(scp->sc);
 	sc_puts(scp, buf, len, 0);
 	SC_VIDEO_UNLOCK(scp->sc);
     }
 }
 
 static struct tty *
 sc_alloc_tty(int index, int devnum)
 {
 	struct sc_ttysoftc *stc;
 	struct tty *tp;
 
 	/* Allocate TTY object and softc to store unit number. */
 	stc = malloc(sizeof(struct sc_ttysoftc), M_DEVBUF, M_WAITOK);
 	stc->st_index = index;
 	stc->st_stat = NULL;
 	tp = tty_alloc_mutex(&sc_ttydevsw, stc, &Giant);
 
 	/* Create device node. */
 	tty_makedev(tp, NULL, "v%r", devnum);
 
 	return (tp);
 }
 
 #ifdef SC_PIXEL_MODE
 static void
 sc_set_vesa_mode(scr_stat *scp, sc_softc_t *sc, int unit)
 {
 	video_info_t info;
 	u_char *font;
 	int depth;
 	int fontsize;
 	int i;
 	int vmode;
 
 	vmode = 0;
 	(void)resource_int_value("sc", unit, "vesa_mode", &vmode);
 	if (vmode < M_VESA_BASE || vmode > M_VESA_MODE_MAX ||
 	    vidd_get_info(sc->adp, vmode, &info) != 0 ||
 	    !sc_support_pixel_mode(&info))
 		vmode = 0;
 
 	/*
 	 * If the mode is unset or unsupported, search for an available
 	 * 800x600 graphics mode with the highest color depth.
 	 */
 	if (vmode == 0) {
 		for (depth = 0, i = M_VESA_BASE; i <= M_VESA_MODE_MAX; i++)
 			if (vidd_get_info(sc->adp, i, &info) == 0 &&
 			    info.vi_width == 800 && info.vi_height == 600 &&
 			    sc_support_pixel_mode(&info) &&
 			    info.vi_depth > depth) {
 				vmode = i;
 				depth = info.vi_depth;
 			}
 		if (vmode == 0)
 			return;
 		vidd_get_info(sc->adp, vmode, &info);
 	}
 
 #if !defined(SC_NO_FONT_LOADING) && defined(SC_DFLT_FONT)
 	fontsize = info.vi_cheight;
 #else
 	fontsize = scp->font_size;
 #endif
 	if (fontsize < 14)
 		fontsize = 8;
 	else if (fontsize >= 16)
 		fontsize = 16;
 	else
 		fontsize = 14;
 #ifndef SC_NO_FONT_LOADING
 	switch (fontsize) {
 	case 8:
 		if ((sc->fonts_loaded & FONT_8) == 0)
 			return;
 		font = sc->font_8;
 		break;
 	case 14:
 		if ((sc->fonts_loaded & FONT_14) == 0)
 			return;
 		font = sc->font_14;
 		break;
 	case 16:
 		if ((sc->fonts_loaded & FONT_16) == 0)
 			return;
 		font = sc->font_16;
 		break;
 	}
 #else
 	font = NULL;
 #endif
 #ifdef DEV_SPLASH
 	if ((sc->flags & SC_SPLASH_SCRN) != 0)
 		splash_term(sc->adp);
 #endif
 #ifndef SC_NO_HISTORY
 	if (scp->history != NULL) {
 		sc_vtb_append(&scp->vtb, 0, scp->history,
 		    scp->ypos * scp->xsize + scp->xpos);
 		scp->history_pos = sc_vtb_tail(scp->history);
 	}
 #endif
 	vidd_set_mode(sc->adp, vmode);
 	scp->status |= (UNKNOWN_MODE | PIXEL_MODE | MOUSE_HIDDEN);
 	scp->status &= ~(GRAPHICS_MODE | MOUSE_VISIBLE);
 	scp->xpixel = info.vi_width;
 	scp->ypixel = info.vi_height;
 	scp->xsize = scp->xpixel / 8;
 	scp->ysize = scp->ypixel / fontsize;
 	scp->xpos = 0;
 	scp->ypos = scp->ysize - 1;
 	scp->xoff = scp->yoff = 0;
 	scp->font = font;
 	scp->font_size = fontsize;
 	scp->font_width = 8;
 	scp->start = scp->xsize * scp->ysize - 1;
 	scp->end = 0;
 	scp->cursor_pos = scp->cursor_oldpos = scp->xsize * scp->xsize;
 	scp->mode = sc->initial_mode = vmode;
 #ifndef __sparc64__
 	sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize,
 	    (void *)sc->adp->va_window, FALSE);
 #endif
 	sc_alloc_scr_buffer(scp, FALSE, FALSE);
 	sc_init_emulator(scp, NULL);
 #ifndef SC_NO_CUTPASTE
 	sc_alloc_cut_buffer(scp, FALSE);
 #endif
 #ifndef SC_NO_HISTORY
 	sc_alloc_history_buffer(scp, 0, 0, FALSE);
 #endif
 	sc_set_border(scp, scp->border);
 	sc_set_cursor_image(scp);
 	scp->status &= ~UNKNOWN_MODE;
 #ifdef DEV_SPLASH
 	if ((sc->flags & SC_SPLASH_SCRN) != 0)
 		splash_init(sc->adp, scsplash_callback, sc);
 #endif
 }
 #endif
 
 int
 sc_attach_unit(int unit, int flags)
 {
     sc_softc_t *sc;
     scr_stat *scp;
     struct cdev *dev;
     int vc;
 
     if (!vty_enabled(VTY_SC))
         return ENXIO;
 
     flags &= ~SC_KERNEL_CONSOLE;
 
     if (sc_console_unit == unit) {
 	/*
 	 * If this unit is being used as the system console, we need to
 	 * adjust some variables and buffers before and after scinit().
 	 */
 	/* assert(sc_console != NULL) */
 	flags |= SC_KERNEL_CONSOLE;
 	scmeminit(NULL);
     }
     scinit(unit, flags);
 
     sc = sc_get_softc(unit, flags & SC_KERNEL_CONSOLE);
     sc->config = flags;
     callout_init(&sc->ctimeout, 0);
     callout_init(&sc->cblink, 0);
     scp = sc_get_stat(sc->dev[0]);
     if (sc_console == NULL)	/* sc_console_unit < 0 */
 	sc_console = scp;
 
 #ifdef SC_PIXEL_MODE
     if ((sc->config & SC_VESAMODE) != 0)
 	sc_set_vesa_mode(scp, sc, unit);
 #endif /* SC_PIXEL_MODE */
 
     /* initialize cursor */
     if (!ISGRAPHSC(scp))
     	update_cursor_image(scp);
 
     /* get screen update going */
     scrn_timer(sc);
 
     /* set up the keyboard */
     (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode);
     update_kbd_state(scp, scp->status, LOCK_MASK);
 
     printf("%s%d: %s <%d virtual consoles, flags=0x%x>\n",
 	   SC_DRIVER_NAME, unit, adapter_name(sc->adp), sc->vtys, sc->config);
     if (bootverbose) {
 	printf("%s%d:", SC_DRIVER_NAME, unit);
     	if (sc->adapter >= 0)
 	    printf(" fb%d", sc->adapter);
 	if (sc->keyboard >= 0)
 	    printf(", kbd%d", sc->keyboard);
 	if (scp->tsw)
 	    printf(", terminal emulator: %s (%s)",
 		   scp->tsw->te_name, scp->tsw->te_desc);
 	printf("\n");
     }
 
     /* Register suspend/resume/shutdown callbacks for the kernel console. */
     if (sc_console_unit == unit) {
 	EVENTHANDLER_REGISTER(power_suspend_early, scsuspend, NULL,
 			      EVENTHANDLER_PRI_ANY);
 	EVENTHANDLER_REGISTER(power_resume, scresume, NULL,
 			      EVENTHANDLER_PRI_ANY);
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, scshutdown, NULL,
 			      SHUTDOWN_PRI_DEFAULT);
     }
 
     for (vc = 0; vc < sc->vtys; vc++) {
 	if (sc->dev[vc] == NULL) {
 		sc->dev[vc] = sc_alloc_tty(vc, vc + unit * MAXCONS);
 		if (vc == 0 && sc->dev == main_devs)
 			SC_STAT(sc->dev[0]) = &main_console;
 	}
 	/*
 	 * The first vty already has struct tty and scr_stat initialized
 	 * in scinit().  The other vtys will have these structs when
 	 * first opened.
 	 */
     }
 
     dev = make_dev(&consolectl_devsw, 0, UID_ROOT, GID_WHEEL, 0600,
         "consolectl");
     dev->si_drv1 = sc->dev[0];
 
     return 0;
 }
 
 static void
 scmeminit(void *arg)
 {
     if (!vty_enabled(VTY_SC))
         return;
     if (sc_malloc)
 	return;
     sc_malloc = TRUE;
 
     /*
      * As soon as malloc() becomes functional, we had better allocate
      * various buffers for the kernel console.
      */
 
     if (sc_console_unit < 0)	/* sc_console == NULL */
 	return;
 
     /* copy the temporary buffer to the final buffer */
     sc_alloc_scr_buffer(sc_console, FALSE, FALSE);
 
 #ifndef SC_NO_CUTPASTE
     sc_alloc_cut_buffer(sc_console, FALSE);
 #endif
 
 #ifndef SC_NO_HISTORY
     /* initialize history buffer & pointers */
     sc_alloc_history_buffer(sc_console, 0, 0, FALSE);
 #endif
 }
 
 /* XXX */
 SYSINIT(sc_mem, SI_SUB_KMEM, SI_ORDER_ANY, scmeminit, NULL);
 
 static int
 scdevtounit(struct tty *tp)
 {
     int vty = SC_VTY(tp);
 
     if (vty == SC_CONSOLECTL)
 	return ((sc_console != NULL) ? sc_console->sc->unit : -1);
     else if ((vty < 0) || (vty >= MAXCONS*sc_max_unit()))
 	return -1;
     else
 	return vty/MAXCONS;
 }
 
 static int
 sctty_open(struct tty *tp)
 {
     int unit = scdevtounit(tp);
     sc_softc_t *sc;
     scr_stat *scp;
 #ifndef __sparc64__
     keyarg_t key;
 #endif
 
     DPRINTF(5, ("scopen: dev:%s, unit:%d, vty:%d\n",
 		devtoname(tp->t_dev), unit, SC_VTY(tp)));
 
     sc = sc_get_softc(unit, (sc_console_unit == unit) ? SC_KERNEL_CONSOLE : 0);
     if (sc == NULL)
 	return ENXIO;
 
     if (!tty_opened(tp)) {
         /* Use the current setting of the <-- key as default VERASE. */  
         /* If the Delete key is preferable, an stty is necessary     */
 #ifndef __sparc64__
 	if (sc->kbd != NULL) {
 	    key.keynum = KEYCODE_BS;
 	    (void)kbdd_ioctl(sc->kbd, GIO_KEYMAPENT, (caddr_t)&key);
             tp->t_termios.c_cc[VERASE] = key.key.map[0];
 	}
 #endif
     }
 
     scp = sc_get_stat(tp);
     if (scp == NULL) {
 	scp = SC_STAT(tp) = alloc_scp(sc, SC_VTY(tp));
 	if (ISGRAPHSC(scp))
 	    sc_set_pixel_mode(scp, NULL, 0, 0, 16, 8);
     }
     if (!tp->t_winsize.ws_col && !tp->t_winsize.ws_row) {
 	tp->t_winsize.ws_col = scp->xsize;
 	tp->t_winsize.ws_row = scp->ysize;
     }
 
     return (0);
 }
 
 static void
 sctty_close(struct tty *tp)
 {
     scr_stat *scp;
     int s;
 
     if (SC_VTY(tp) != SC_CONSOLECTL) {
 	scp = sc_get_stat(tp);
 	/* were we in the middle of the VT switching process? */
 	DPRINTF(5, ("sc%d: scclose(), ", scp->sc->unit));
 	s = spltty();
 	if ((scp == scp->sc->cur_scp) && (scp->sc->unit == sc_console_unit))
 	    cnavailable(sc_consptr, TRUE);
 	if (finish_vt_rel(scp, TRUE, &s) == 0)	/* force release */
 	    DPRINTF(5, ("reset WAIT_REL, "));
 	if (finish_vt_acq(scp) == 0)		/* force acknowledge */
 	    DPRINTF(5, ("reset WAIT_ACQ, "));
 #ifdef not_yet_done
 	if (scp == &main_console) {
 	    scp->pid = 0;
 	    scp->proc = NULL;
 	    scp->smode.mode = VT_AUTO;
 	}
 	else {
 	    sc_vtb_destroy(&scp->vtb);
 #ifndef __sparc64__
 	    sc_vtb_destroy(&scp->scr);
 #endif
 	    sc_free_history_buffer(scp, scp->ysize);
 	    SC_STAT(tp) = NULL;
 	    free(scp, M_DEVBUF);
 	}
 #else
 	scp->pid = 0;
 	scp->proc = NULL;
 	scp->smode.mode = VT_AUTO;
 #endif
 	scp->kbd_mode = K_XLATE;
 	if (scp == scp->sc->cur_scp)
 	    (void)kbdd_ioctl(scp->sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode);
 	DPRINTF(5, ("done.\n"));
     }
 }
 
 #if 0 /* XXX mpsafetty: fix screensaver. What about outwakeup? */
 static int
 scread(struct cdev *dev, struct uio *uio, int flag)
 {
     if (!sc_saver_keyb_only)
 	sc_touch_scrn_saver();
     return ttyread(dev, uio, flag);
 }
 #endif
 
 static int
 sckbdevent(keyboard_t *thiskbd, int event, void *arg)
 {
     sc_softc_t *sc;
     struct tty *cur_tty;
     int c, error = 0; 
     size_t len;
     const u_char *cp;
 
     sc = (sc_softc_t *)arg;
     /* assert(thiskbd == sc->kbd) */
 
     mtx_lock(&Giant);
 
     switch (event) {
     case KBDIO_KEYINPUT:
 	break;
     case KBDIO_UNLOADING:
 	sc->kbd = NULL;
 	sc->keyboard = -1;
 	kbd_release(thiskbd, (void *)&sc->keyboard);
 	goto done;
     default:
 	error = EINVAL;
 	goto done;
     }
 
     /* 
      * Loop while there is still input to get from the keyboard.
      * I don't think this is nessesary, and it doesn't fix
      * the Xaccel-2.1 keyboard hang, but it can't hurt.		XXX
      */
     while ((c = scgetc(sc, SCGETC_NONBLOCK, NULL)) != NOKEY) {
 
 	cur_tty = SC_DEV(sc, sc->cur_scp->index);
 	if (!tty_opened_ns(cur_tty))
 	    continue;
 
 	if ((*sc->cur_scp->tsw->te_input)(sc->cur_scp, c, cur_tty))
 	    continue;
 
 	switch (KEYFLAGS(c)) {
 	case 0x0000: /* normal key */
 	    ttydisc_rint(cur_tty, KEYCHAR(c), 0);
 	    break;
 	case FKEY:  /* function key, return string */
 	    cp = (*sc->cur_scp->tsw->te_fkeystr)(sc->cur_scp, c);
 	    if (cp != NULL) {
 	    	ttydisc_rint_simple(cur_tty, cp, strlen(cp));
 		break;
 	    }
 	    cp = kbdd_get_fkeystr(thiskbd, KEYCHAR(c), &len);
 	    if (cp != NULL)
 	    	ttydisc_rint_simple(cur_tty, cp, len);
 	    break;
 	case MKEY:  /* meta is active, prepend ESC */
 	    ttydisc_rint(cur_tty, 0x1b, 0);
 	    ttydisc_rint(cur_tty, KEYCHAR(c), 0);
 	    break;
 	case BKEY:  /* backtab fixed sequence (esc [ Z) */
 	    ttydisc_rint_simple(cur_tty, "\x1B[Z", 3);
 	    break;
 	}
 
 	ttydisc_rint_done(cur_tty);
     }
 
     sc->cur_scp->status |= MOUSE_HIDDEN;
 
 done:
     mtx_unlock(&Giant);
     return (error);
 }
 
 static int
 sctty_ioctl(struct tty *tp, u_long cmd, caddr_t data, struct thread *td)
 {
     int error;
     int i;
     sc_softc_t *sc;
     scr_stat *scp;
     int s;
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
     int ival;
 #endif
 
     /* If there is a user_ioctl function call that first */
     if (sc_user_ioctl) {
 	error = (*sc_user_ioctl)(tp, cmd, data, td);
 	if (error != ENOIOCTL)
 	    return error;
     }
 
     error = sc_vid_ioctl(tp, cmd, data, td);
     if (error != ENOIOCTL)
 	return error;
 
 #ifndef SC_NO_HISTORY
     error = sc_hist_ioctl(tp, cmd, data, td);
     if (error != ENOIOCTL)
 	return error;
 #endif
 
 #ifndef SC_NO_SYSMOUSE
     error = sc_mouse_ioctl(tp, cmd, data, td);
     if (error != ENOIOCTL)
 	return error;
 #endif
 
     scp = sc_get_stat(tp);
     /* assert(scp != NULL) */
     /* scp is sc_console, if SC_VTY(dev) == SC_CONSOLECTL. */
     sc = scp->sc;
 
     if (scp->tsw) {
 	error = (*scp->tsw->te_ioctl)(scp, tp, cmd, data, td);
 	if (error != ENOIOCTL)
 	    return error;
     }
 
     switch (cmd) {  		/* process console hardware related ioctl's */
 
     case GIO_ATTR:      	/* get current attributes */
 	/* this ioctl is not processed here, but in the terminal emulator */
 	return ENOTTY;
 
     case GIO_COLOR:     	/* is this a color console ? */
 	*(int *)data = (sc->adp->va_flags & V_ADP_COLOR) ? 1 : 0;
 	return 0;
 
     case CONS_BLANKTIME:    	/* set screen saver timeout (0 = no saver) */
 	if (*(int *)data < 0 || *(int *)data > MAX_BLANKTIME)
             return EINVAL;
 	s = spltty();
 	scrn_blank_time = *(int *)data;
 	run_scrn_saver = (scrn_blank_time != 0);
 	splx(s);
 	return 0;
 
     case CONS_CURSORTYPE:   	/* set cursor type (obsolete) */
 	s = spltty();
 	*(int *)data &= CONS_CURSOR_ATTRS;
 	sc_change_cursor_shape(scp, *(int *)data, -1, -1);
 	splx(s);
 	return 0;
 
     case CONS_GETCURSORSHAPE:   /* get cursor shape (new interface) */
 	if (((int *)data)[0] & CONS_LOCAL_CURSOR) {
 	    ((int *)data)[0] = scp->curr_curs_attr.flags;
 	    ((int *)data)[1] = scp->curr_curs_attr.base;
 	    ((int *)data)[2] = scp->curr_curs_attr.height;
 	} else {
 	    ((int *)data)[0] = sc->curs_attr.flags;
 	    ((int *)data)[1] = sc->curs_attr.base;
 	    ((int *)data)[2] = sc->curs_attr.height;
 	}
 	return 0;
 
     case CONS_SETCURSORSHAPE:   /* set cursor shape (new interface) */
 	s = spltty();
 	sc_change_cursor_shape(scp, ((int *)data)[0],
 	    ((int *)data)[1], ((int *)data)[2]);
 	splx(s);
 	return 0;
 
     case CONS_BELLTYPE: 	/* set bell type sound/visual */
 	if ((*(int *)data) & CONS_VISUAL_BELL)
 	    sc->flags |= SC_VISUAL_BELL;
 	else
 	    sc->flags &= ~SC_VISUAL_BELL;
 	if ((*(int *)data) & CONS_QUIET_BELL)
 	    sc->flags |= SC_QUIET_BELL;
 	else
 	    sc->flags &= ~SC_QUIET_BELL;
 	return 0;
 
     case CONS_GETINFO:  	/* get current (virtual) console info */
     {
 	vid_info_t *ptr = (vid_info_t*)data;
 	if (ptr->size == sizeof(struct vid_info)) {
 	    ptr->m_num = sc->cur_scp->index;
 	    ptr->font_size = scp->font_size;
 	    ptr->mv_col = scp->xpos;
 	    ptr->mv_row = scp->ypos;
 	    ptr->mv_csz = scp->xsize;
 	    ptr->mv_rsz = scp->ysize;
 	    ptr->mv_hsz = (scp->history != NULL) ? scp->history->vtb_rows : 0;
 	    /*
 	     * The following fields are filled by the terminal emulator. XXX
 	     *
 	     * ptr->mv_norm.fore
 	     * ptr->mv_norm.back
 	     * ptr->mv_rev.fore
 	     * ptr->mv_rev.back
 	     */
 	    ptr->mv_grfc.fore = 0;      /* not supported */
 	    ptr->mv_grfc.back = 0;      /* not supported */
 	    ptr->mv_ovscan = scp->border;
 	    if (scp == sc->cur_scp)
 		save_kbd_state(scp);
 	    ptr->mk_keylock = scp->status & LOCK_MASK;
 	    return 0;
 	}
 	return EINVAL;
     }
 
     case CONS_GETVERS:  	/* get version number */
 	*(int*)data = 0x200;    /* version 2.0 */
 	return 0;
 
     case CONS_IDLE:		/* see if the screen has been idle */
 	/*
 	 * When the screen is in the GRAPHICS_MODE or UNKNOWN_MODE,
 	 * the user process may have been writing something on the
 	 * screen and syscons is not aware of it. Declare the screen
 	 * is NOT idle if it is in one of these modes. But there is
 	 * an exception to it; if a screen saver is running in the 
 	 * graphics mode in the current screen, we should say that the
 	 * screen has been idle.
 	 */
 	*(int *)data = (sc->flags & SC_SCRN_IDLE)
 		       && (!ISGRAPHSC(sc->cur_scp)
 			   || (sc->cur_scp->status & SAVER_RUNNING));
 	return 0;
 
     case CONS_SAVERMODE:	/* set saver mode */
 	switch(*(int *)data) {
 	case CONS_NO_SAVER:
 	case CONS_USR_SAVER:
 	    /* if a LKM screen saver is running, stop it first. */
 	    scsplash_stick(FALSE);
 	    saver_mode = *(int *)data;
 	    s = spltty();
 #ifdef DEV_SPLASH
 	    if ((error = wait_scrn_saver_stop(NULL))) {
 		splx(s);
 		return error;
 	    }
 #endif
 	    run_scrn_saver = TRUE;
 	    if (saver_mode == CONS_USR_SAVER)
 		scp->status |= SAVER_RUNNING;
 	    else
 		scp->status &= ~SAVER_RUNNING;
 	    scsplash_stick(TRUE);
 	    splx(s);
 	    break;
 	case CONS_LKM_SAVER:
 	    s = spltty();
 	    if ((saver_mode == CONS_USR_SAVER) && (scp->status & SAVER_RUNNING))
 		scp->status &= ~SAVER_RUNNING;
 	    saver_mode = *(int *)data;
 	    splx(s);
 	    break;
 	default:
 	    return EINVAL;
 	}
 	return 0;
 
     case CONS_SAVERSTART:	/* immediately start/stop the screen saver */
 	/*
 	 * Note that this ioctl does not guarantee the screen saver 
 	 * actually starts or stops. It merely attempts to do so...
 	 */
 	s = spltty();
 	run_scrn_saver = (*(int *)data != 0);
 	if (run_scrn_saver)
 	    sc->scrn_time_stamp -= scrn_blank_time;
 	splx(s);
 	return 0;
 
     case CONS_SCRSHOT:		/* get a screen shot */
     {
 	int retval, hist_rsz;
 	size_t lsize, csize;
 	vm_offset_t frbp, hstp;
 	unsigned lnum;
 	scrshot_t *ptr = (scrshot_t *)data;
 	void *outp = ptr->buf;
 
 	if (ptr->x < 0 || ptr->y < 0 || ptr->xsize < 0 || ptr->ysize < 0)
 		return EINVAL;
 	s = spltty();
 	if (ISGRAPHSC(scp)) {
 	    splx(s);
 	    return EOPNOTSUPP;
 	}
 	hist_rsz = (scp->history != NULL) ? scp->history->vtb_rows : 0;
 	if (((u_int)ptr->x + ptr->xsize) > scp->xsize ||
 	    ((u_int)ptr->y + ptr->ysize) > (scp->ysize + hist_rsz)) {
 	    splx(s);
 	    return EINVAL;
 	}
 
 	lsize = scp->xsize * sizeof(u_int16_t);
 	csize = ptr->xsize * sizeof(u_int16_t);
 	/* Pointer to the last line of framebuffer */
 	frbp = scp->vtb.vtb_buffer + scp->ysize * lsize + ptr->x *
 	       sizeof(u_int16_t);
 	/* Pointer to the last line of target buffer */
 	outp = (char *)outp + ptr->ysize * csize;
 	/* Pointer to the last line of history buffer */
 	if (scp->history != NULL)
 	    hstp = scp->history->vtb_buffer + sc_vtb_tail(scp->history) *
 		sizeof(u_int16_t) + ptr->x * sizeof(u_int16_t);
 	else
 	    hstp = 0;
 
 	retval = 0;
 	for (lnum = 0; lnum < (ptr->y + ptr->ysize); lnum++) {
 	    if (lnum < scp->ysize) {
 		frbp -= lsize;
 	    } else {
 		hstp -= lsize;
 		if (hstp < scp->history->vtb_buffer)
 		    hstp += scp->history->vtb_rows * lsize;
 		frbp = hstp;
 	    }
 	    if (lnum < ptr->y)
 		continue;
 	    outp = (char *)outp - csize;
 	    retval = copyout((void *)frbp, outp, csize);
 	    if (retval != 0)
 		break;
 	}
 	splx(s);
 	return retval;
     }
 
     case VT_SETMODE:    	/* set screen switcher mode */
     {
 	struct vt_mode *mode;
 	struct proc *p1;
 
 	mode = (struct vt_mode *)data;
 	DPRINTF(5, ("%s%d: VT_SETMODE ", SC_DRIVER_NAME, sc->unit));
 	if (scp->smode.mode == VT_PROCESS) {
 	    p1 = pfind(scp->pid);
     	    if (scp->proc == p1 && scp->proc != td->td_proc) {
 		if (p1)
 		    PROC_UNLOCK(p1);
 		DPRINTF(5, ("error EPERM\n"));
 		return EPERM;
 	    }
 	    if (p1)
 		PROC_UNLOCK(p1);
 	}
 	s = spltty();
 	if (mode->mode == VT_AUTO) {
 	    scp->smode.mode = VT_AUTO;
 	    scp->proc = NULL;
 	    scp->pid = 0;
 	    DPRINTF(5, ("VT_AUTO, "));
 	    if ((scp == sc->cur_scp) && (sc->unit == sc_console_unit))
 		cnavailable(sc_consptr, TRUE);
 	    /* were we in the middle of the vty switching process? */
 	    if (finish_vt_rel(scp, TRUE, &s) == 0)
 		DPRINTF(5, ("reset WAIT_REL, "));
 	    if (finish_vt_acq(scp) == 0)
 		DPRINTF(5, ("reset WAIT_ACQ, "));
 	} else {
 	    if (!ISSIGVALID(mode->relsig) || !ISSIGVALID(mode->acqsig)
 		|| !ISSIGVALID(mode->frsig)) {
 		splx(s);
 		DPRINTF(5, ("error EINVAL\n"));
 		return EINVAL;
 	    }
 	    DPRINTF(5, ("VT_PROCESS %d, ", td->td_proc->p_pid));
 	    bcopy(data, &scp->smode, sizeof(struct vt_mode));
 	    scp->proc = td->td_proc;
 	    scp->pid = scp->proc->p_pid;
 	    if ((scp == sc->cur_scp) && (sc->unit == sc_console_unit))
 		cnavailable(sc_consptr, FALSE);
 	}
 	splx(s);
 	DPRINTF(5, ("\n"));
 	return 0;
     }
 
     case VT_GETMODE:    	/* get screen switcher mode */
 	bcopy(&scp->smode, data, sizeof(struct vt_mode));
 	return 0;
 
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
     case _IO('v', 4):
 	ival = IOCPARM_IVAL(data);
 	data = (caddr_t)&ival;
 	/* FALLTHROUGH */
 #endif
     case VT_RELDISP:    	/* screen switcher ioctl */
 	s = spltty();
 	/*
 	 * This must be the current vty which is in the VT_PROCESS
 	 * switching mode...
 	 */
 	if ((scp != sc->cur_scp) || (scp->smode.mode != VT_PROCESS)) {
 	    splx(s);
 	    return EINVAL;
 	}
 	/* ...and this process is controlling it. */
 	if (scp->proc != td->td_proc) {
 	    splx(s);
 	    return EPERM;
 	}
 	error = EINVAL;
 	switch(*(int *)data) {
 	case VT_FALSE:  	/* user refuses to release screen, abort */
 	    if ((error = finish_vt_rel(scp, FALSE, &s)) == 0)
 		DPRINTF(5, ("%s%d: VT_FALSE\n", SC_DRIVER_NAME, sc->unit));
 	    break;
 	case VT_TRUE:   	/* user has released screen, go on */
 	    if ((error = finish_vt_rel(scp, TRUE, &s)) == 0)
 		DPRINTF(5, ("%s%d: VT_TRUE\n", SC_DRIVER_NAME, sc->unit));
 	    break;
 	case VT_ACKACQ: 	/* acquire acknowledged, switch completed */
 	    if ((error = finish_vt_acq(scp)) == 0)
 		DPRINTF(5, ("%s%d: VT_ACKACQ\n", SC_DRIVER_NAME, sc->unit));
 	    break;
 	default:
 	    break;
 	}
 	splx(s);
 	return error;
 
     case VT_OPENQRY:    	/* return free virtual console */
 	for (i = sc->first_vty; i < sc->first_vty + sc->vtys; i++) {
 	    tp = SC_DEV(sc, i);
 	    if (!tty_opened_ns(tp)) {
 		*(int *)data = i + 1;
 		return 0;
 	    }
 	}
 	return EINVAL;
 
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
     case _IO('v', 5):
 	ival = IOCPARM_IVAL(data);
 	data = (caddr_t)&ival;
 	/* FALLTHROUGH */
 #endif
     case VT_ACTIVATE:   	/* switch to screen *data */
 	i = (*(int *)data == 0) ? scp->index : (*(int *)data - 1);
 	s = spltty();
 	error = sc_clean_up(sc->cur_scp);
 	splx(s);
 	if (error)
 	    return error;
 	error = sc_switch_scr(sc, i);
 	return (error);
 
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
     case _IO('v', 6):
 	ival = IOCPARM_IVAL(data);
 	data = (caddr_t)&ival;
 	/* FALLTHROUGH */
 #endif
     case VT_WAITACTIVE: 	/* wait for switch to occur */
 	i = (*(int *)data == 0) ? scp->index : (*(int *)data - 1);
 	if ((i < sc->first_vty) || (i >= sc->first_vty + sc->vtys))
 	    return EINVAL;
 	if (i == sc->cur_scp->index)
 	    return 0;
 	error = tsleep(VTY_WCHAN(sc, i), (PZERO + 1) | PCATCH, "waitvt", 0);
 	return error;
 
     case VT_GETACTIVE:		/* get active vty # */
 	*(int *)data = sc->cur_scp->index + 1;
 	return 0;
 
     case VT_GETINDEX:		/* get this vty # */
 	*(int *)data = scp->index + 1;
 	return 0;
 
     case VT_LOCKSWITCH:		/* prevent vty switching */
 	if ((*(int *)data) & 0x01)
 	    sc->flags |= SC_SCRN_VTYLOCK;
 	else
 	    sc->flags &= ~SC_SCRN_VTYLOCK;
 	return 0;
 
     case KDENABIO:      	/* allow io operations */
 	error = priv_check(td, PRIV_IO);
 	if (error != 0)
 	    return error;
 	error = securelevel_gt(td->td_ucred, 0);
 	if (error != 0)
 		return error;
 #ifdef __i386__
 	td->td_frame->tf_eflags |= PSL_IOPL;
 #elif defined(__amd64__)
 	td->td_frame->tf_rflags |= PSL_IOPL;
 #endif
 	return 0;
 
     case KDDISABIO:     	/* disallow io operations (default) */
 #ifdef __i386__
 	td->td_frame->tf_eflags &= ~PSL_IOPL;
 #elif defined(__amd64__)
 	td->td_frame->tf_rflags &= ~PSL_IOPL;
 #endif
 	return 0;
 
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
     case _IO('K', 20):
 	ival = IOCPARM_IVAL(data);
 	data = (caddr_t)&ival;
 	/* FALLTHROUGH */
 #endif
     case KDSKBSTATE:    	/* set keyboard state (locks) */
 	if (*(int *)data & ~LOCK_MASK)
 	    return EINVAL;
 	scp->status &= ~LOCK_MASK;
 	scp->status |= *(int *)data;
 	if (scp == sc->cur_scp)
 	    update_kbd_state(scp, scp->status, LOCK_MASK);
 	return 0;
 
     case KDGKBSTATE:    	/* get keyboard state (locks) */
 	if (scp == sc->cur_scp)
 	    save_kbd_state(scp);
 	*(int *)data = scp->status & LOCK_MASK;
 	return 0;
 
     case KDGETREPEAT:      	/* get keyboard repeat & delay rates */
     case KDSETREPEAT:      	/* set keyboard repeat & delay rates (new) */
 	error = kbdd_ioctl(sc->kbd, cmd, data);
 	if (error == ENOIOCTL)
 	    error = ENODEV;
 	return error;
 
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
     case _IO('K', 67):
 	ival = IOCPARM_IVAL(data);
 	data = (caddr_t)&ival;
 	/* FALLTHROUGH */
 #endif
     case KDSETRAD:      	/* set keyboard repeat & delay rates (old) */
 	if (*(int *)data & ~0x7f)
 	    return EINVAL;
 	error = kbdd_ioctl(sc->kbd, KDSETRAD, data);
 	if (error == ENOIOCTL)
 	    error = ENODEV;
 	return error;
 
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
     case _IO('K', 7):
 	ival = IOCPARM_IVAL(data);
 	data = (caddr_t)&ival;
 	/* FALLTHROUGH */
 #endif
     case KDSKBMODE:     	/* set keyboard mode */
 	switch (*(int *)data) {
 	case K_XLATE:   	/* switch to XLT ascii mode */
 	case K_RAW: 		/* switch to RAW scancode mode */
 	case K_CODE: 		/* switch to CODE mode */
 	    scp->kbd_mode = *(int *)data;
 	    if (scp == sc->cur_scp)
 		(void)kbdd_ioctl(sc->kbd, KDSKBMODE, data);
 	    return 0;
 	default:
 	    return EINVAL;
 	}
 	/* NOT REACHED */
 
     case KDGKBMODE:     	/* get keyboard mode */
 	*(int *)data = scp->kbd_mode;
 	return 0;
 
     case KDGKBINFO:
 	error = kbdd_ioctl(sc->kbd, cmd, data);
 	if (error == ENOIOCTL)
 	    error = ENODEV;
 	return error;
 
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
     case _IO('K', 8):
 	ival = IOCPARM_IVAL(data);
 	data = (caddr_t)&ival;
 	/* FALLTHROUGH */
 #endif
     case KDMKTONE:      	/* sound the bell */
 	if (*(int*)data)
 	    sc_bell(scp, (*(int*)data)&0xffff,
 		    (((*(int*)data)>>16)&0xffff)*hz/1000);
 	else
 	    sc_bell(scp, scp->bell_pitch, scp->bell_duration);
 	return 0;
 
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
     case _IO('K', 63):
 	ival = IOCPARM_IVAL(data);
 	data = (caddr_t)&ival;
 	/* FALLTHROUGH */
 #endif
     case KIOCSOUND:     	/* make tone (*data) hz */
 	if (scp == sc->cur_scp) {
 	    if (*(int *)data)
 		return sc_tone(*(int *)data);
 	    else
 		return sc_tone(0);
 	}
 	return 0;
 
     case KDGKBTYPE:     	/* get keyboard type */
 	error = kbdd_ioctl(sc->kbd, cmd, data);
 	if (error == ENOIOCTL) {
 	    /* always return something? XXX */
 	    *(int *)data = 0;
 	}
 	return 0;
 
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
     case _IO('K', 66):
 	ival = IOCPARM_IVAL(data);
 	data = (caddr_t)&ival;
 	/* FALLTHROUGH */
 #endif
     case KDSETLED:      	/* set keyboard LED status */
 	if (*(int *)data & ~LED_MASK)	/* FIXME: LOCK_MASK? */
 	    return EINVAL;
 	scp->status &= ~LED_MASK;
 	scp->status |= *(int *)data;
 	if (scp == sc->cur_scp)
 	    update_kbd_leds(scp, scp->status);
 	return 0;
 
     case KDGETLED:      	/* get keyboard LED status */
 	if (scp == sc->cur_scp)
 	    save_kbd_state(scp);
 	*(int *)data = scp->status & LED_MASK;
 	return 0;
 
     case KBADDKBD:		/* add/remove keyboard to/from mux */
     case KBRELKBD:
 	error = kbdd_ioctl(sc->kbd, cmd, data);
 	if (error == ENOIOCTL)
 	    error = ENODEV;
 	return error;
 
 #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \
     defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
     case _IO('c', 110):
 	ival = IOCPARM_IVAL(data);
 	data = (caddr_t)&ival;
 	/* FALLTHROUGH */
 #endif
     case CONS_SETKBD: 		/* set the new keyboard */
 	{
 	    keyboard_t *newkbd;
 
 	    s = spltty();
 	    newkbd = kbd_get_keyboard(*(int *)data);
 	    if (newkbd == NULL) {
 		splx(s);
 		return EINVAL;
 	    }
 	    error = 0;
 	    if (sc->kbd != newkbd) {
 		i = kbd_allocate(newkbd->kb_name, newkbd->kb_unit,
 				 (void *)&sc->keyboard, sckbdevent, sc);
 		/* i == newkbd->kb_index */
 		if (i >= 0) {
 		    if (sc->kbd != NULL) {
 			save_kbd_state(sc->cur_scp);
 			kbd_release(sc->kbd, (void *)&sc->keyboard);
 		    }
 		    sc->kbd = kbd_get_keyboard(i); /* sc->kbd == newkbd */
 		    sc->keyboard = i;
 		    (void)kbdd_ioctl(sc->kbd, KDSKBMODE,
 			      (caddr_t)&sc->cur_scp->kbd_mode);
 		    update_kbd_state(sc->cur_scp, sc->cur_scp->status,
 				     LOCK_MASK);
 		} else {
 		    error = EPERM;	/* XXX */
 		}
 	    }
 	    splx(s);
 	    return error;
 	}
 
     case CONS_RELKBD: 		/* release the current keyboard */
 	s = spltty();
 	error = 0;
 	if (sc->kbd != NULL) {
 	    save_kbd_state(sc->cur_scp);
 	    error = kbd_release(sc->kbd, (void *)&sc->keyboard);
 	    if (error == 0) {
 		sc->kbd = NULL;
 		sc->keyboard = -1;
 	    }
 	}
 	splx(s);
 	return error;
 
     case CONS_GETTERM:		/* get the current terminal emulator info */
 	{
 	    sc_term_sw_t *sw;
 
 	    if (((term_info_t *)data)->ti_index == 0) {
 		sw = scp->tsw;
 	    } else {
 		sw = sc_term_match_by_number(((term_info_t *)data)->ti_index);
 	    }
 	    if (sw != NULL) {
 		strncpy(((term_info_t *)data)->ti_name, sw->te_name, 
 			sizeof(((term_info_t *)data)->ti_name));
 		strncpy(((term_info_t *)data)->ti_desc, sw->te_desc, 
 			sizeof(((term_info_t *)data)->ti_desc));
 		((term_info_t *)data)->ti_flags = 0;
 		return 0;
 	    } else {
 		((term_info_t *)data)->ti_name[0] = '\0';
 		((term_info_t *)data)->ti_desc[0] = '\0';
 		((term_info_t *)data)->ti_flags = 0;
 		return EINVAL;
 	    }
 	}
 
     case CONS_SETTERM:		/* set the current terminal emulator */
 	s = spltty();
 	error = sc_init_emulator(scp, ((term_info_t *)data)->ti_name);
 	/* FIXME: what if scp == sc_console! XXX */
 	splx(s);
 	return error;
 
     case GIO_SCRNMAP:   	/* get output translation table */
 	bcopy(&sc->scr_map, data, sizeof(sc->scr_map));
 	return 0;
 
     case PIO_SCRNMAP:   	/* set output translation table */
 	bcopy(data, &sc->scr_map, sizeof(sc->scr_map));
 	for (i=0; i<sizeof(sc->scr_map); i++) {
 	    sc->scr_rmap[sc->scr_map[i]] = i;
 	}
 	return 0;
 
     case GIO_KEYMAP:		/* get keyboard translation table */
     case PIO_KEYMAP:		/* set keyboard translation table */
     case OGIO_KEYMAP:		/* get keyboard translation table (compat) */
     case OPIO_KEYMAP:		/* set keyboard translation table (compat) */
     case GIO_DEADKEYMAP:	/* get accent key translation table */
     case PIO_DEADKEYMAP:	/* set accent key translation table */
     case GETFKEY:		/* get function key string */
     case SETFKEY:		/* set function key string */
 	error = kbdd_ioctl(sc->kbd, cmd, data);
 	if (error == ENOIOCTL)
 	    error = ENODEV;
 	return error;
 
 #ifndef SC_NO_FONT_LOADING
 
     case PIO_FONT8x8:   	/* set 8x8 dot font */
 	if (!ISFONTAVAIL(sc->adp->va_flags))
 	    return ENXIO;
 	bcopy(data, sc->font_8, 8*256);
 	sc->fonts_loaded |= FONT_8;
 	/*
 	 * FONT KLUDGE
 	 * Always use the font page #0. XXX
 	 * Don't load if the current font size is not 8x8.
 	 */
 	if (ISTEXTSC(sc->cur_scp) && (sc->cur_scp->font_size < 14))
 	    sc_load_font(sc->cur_scp, 0, 8, 8, sc->font_8, 0, 256);
 	return 0;
 
     case GIO_FONT8x8:   	/* get 8x8 dot font */
 	if (!ISFONTAVAIL(sc->adp->va_flags))
 	    return ENXIO;
 	if (sc->fonts_loaded & FONT_8) {
 	    bcopy(sc->font_8, data, 8*256);
 	    return 0;
 	}
 	else
 	    return ENXIO;
 
     case PIO_FONT8x14:  	/* set 8x14 dot font */
 	if (!ISFONTAVAIL(sc->adp->va_flags))
 	    return ENXIO;
 	bcopy(data, sc->font_14, 14*256);
 	sc->fonts_loaded |= FONT_14;
 	/*
 	 * FONT KLUDGE
 	 * Always use the font page #0. XXX
 	 * Don't load if the current font size is not 8x14.
 	 */
 	if (ISTEXTSC(sc->cur_scp)
 	    && (sc->cur_scp->font_size >= 14)
 	    && (sc->cur_scp->font_size < 16))
 	    sc_load_font(sc->cur_scp, 0, 14, 8, sc->font_14, 0, 256);
 	return 0;
 
     case GIO_FONT8x14:  	/* get 8x14 dot font */
 	if (!ISFONTAVAIL(sc->adp->va_flags))
 	    return ENXIO;
 	if (sc->fonts_loaded & FONT_14) {
 	    bcopy(sc->font_14, data, 14*256);
 	    return 0;
 	}
 	else
 	    return ENXIO;
 
     case PIO_FONT8x16:  	/* set 8x16 dot font */
 	if (!ISFONTAVAIL(sc->adp->va_flags))
 	    return ENXIO;
 	bcopy(data, sc->font_16, 16*256);
 	sc->fonts_loaded |= FONT_16;
 	/*
 	 * FONT KLUDGE
 	 * Always use the font page #0. XXX
 	 * Don't load if the current font size is not 8x16.
 	 */
 	if (ISTEXTSC(sc->cur_scp) && (sc->cur_scp->font_size >= 16))
 	    sc_load_font(sc->cur_scp, 0, 16, 8, sc->font_16, 0, 256);
 	return 0;
 
     case GIO_FONT8x16:  	/* get 8x16 dot font */
 	if (!ISFONTAVAIL(sc->adp->va_flags))
 	    return ENXIO;
 	if (sc->fonts_loaded & FONT_16) {
 	    bcopy(sc->font_16, data, 16*256);
 	    return 0;
 	}
 	else
 	    return ENXIO;
 
 #endif /* SC_NO_FONT_LOADING */
 
     default:
 	break;
     }
 
     return (ENOIOCTL);
 }
 
 static int
 consolectl_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
     struct thread *td)
 {
 
 	return sctty_ioctl(dev->si_drv1, cmd, data, td);
 }
 
 static int
 consolectl_close(struct cdev *dev, int flags, int mode, struct thread *td)
 {
 #ifndef SC_NO_SYSMOUSE
 	mouse_info_t info;
 	memset(&info, 0, sizeof(info));
 	info.operation = MOUSE_ACTION;
 
 	/*
 	 * Make sure all buttons are released when moused and other
 	 * console daemons exit, so that no buttons are left pressed.
 	 */
 	(void) sctty_ioctl(dev->si_drv1, CONS_MOUSECTL, (caddr_t)&info, td);
 #endif
 	return (0);
 }
 
 static void
 sc_cnprobe(struct consdev *cp)
 {
     int unit;
     int flags;
 
     if (!vty_enabled(VTY_SC)) {
 	cp->cn_pri = CN_DEAD;
 	return;
     }
 
     cp->cn_pri = sc_get_cons_priority(&unit, &flags);
 
     /* a video card is always required */
     if (!scvidprobe(unit, flags, TRUE))
 	cp->cn_pri = CN_DEAD;
 
     /* syscons will become console even when there is no keyboard */
     sckbdprobe(unit, flags, TRUE);
 
     if (cp->cn_pri == CN_DEAD)
 	return;
 
     /* initialize required fields */
     strcpy(cp->cn_name, "ttyv0");
 }
 
 static void
 sc_cninit(struct consdev *cp)
 {
     int unit;
     int flags;
 
     sc_get_cons_priority(&unit, &flags);
     scinit(unit, flags | SC_KERNEL_CONSOLE);
     sc_console_unit = unit;
     sc_console = sc_get_stat(sc_get_softc(unit, SC_KERNEL_CONSOLE)->dev[0]);
     sc_consptr = cp;
 }
 
 static void
 sc_cnterm(struct consdev *cp)
 {
     /* we are not the kernel console any more, release everything */
 
     if (sc_console_unit < 0)
 	return;			/* shouldn't happen */
 
 #if 0 /* XXX */
     sc_clear_screen(sc_console);
     sccnupdate(sc_console);
 #endif
 
     scterm(sc_console_unit, SC_KERNEL_CONSOLE);
     sc_console_unit = -1;
     sc_console = NULL;
 }
 
 static void sccnclose(sc_softc_t *sc, struct sc_cnstate *sp);
 static void sccnopen(sc_softc_t *sc, struct sc_cnstate *sp, int flags);
+static void sccnscrlock(sc_softc_t *sc, struct sc_cnstate *sp);
+static void sccnscrunlock(sc_softc_t *sc, struct sc_cnstate *sp);
 
 static void
+sccnscrlock(sc_softc_t *sc, struct sc_cnstate *sp)
+{
+    SC_VIDEO_LOCK(sc);
+}
+
+static void
+sccnscrunlock(sc_softc_t *sc, struct sc_cnstate *sp)
+{
+    SC_VIDEO_UNLOCK(sc);
+}
+
+static void
 sccnopen(sc_softc_t *sc, struct sc_cnstate *sp, int flags)
 {
     int kbd_mode;
 
     /* assert(sc_console_unit >= 0) */
 
     sp->kbd_opened = FALSE;
     sp->scr_opened = FALSE;
 
     /* Opening the keyboard is optional. */
     if (!(flags & 1) || sc->kbd == NULL)
 	goto over_keyboard;
 
     /*
      * Make sure the keyboard is accessible even when the kbd device
      * driver is disabled.
      */
     kbdd_enable(sc->kbd);
 
     /* Switch the keyboard to console mode (K_XLATE, polled) on all scp's. */
     kbd_mode = K_XLATE;
     (void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&kbd_mode);
     sc->kbd_open_level++;
     kbdd_poll(sc->kbd, TRUE);
 
     sp->kbd_opened = TRUE;
 over_keyboard: ;
 
     /* The screen is opened iff locking it succeeds. */
+    sccnscrlock(sc, sp);
     sp->scr_opened = TRUE;
 
     /* The screen switch is optional. */
     if (!(flags & 2))
 	return;
 
     /* try to switch to the kernel console screen */
     if (!cold &&
 	sc->cur_scp->index != sc_console->index &&
 	sc->cur_scp->smode.mode == VT_AUTO &&
 	sc_console->smode.mode == VT_AUTO)
 	    sc_switch_scr(sc, sc_console->index);
 }
 
 static void
 sccnclose(sc_softc_t *sc, struct sc_cnstate *sp)
 {
     sp->scr_opened = FALSE;
+    sccnscrunlock(sc, sp);
 
     if (!sp->kbd_opened)
 	return;
 
     /* Restore keyboard mode (for the current, possibly-changed scp). */
     kbdd_poll(sc->kbd, FALSE);
     if (--sc->kbd_open_level == 0)
 	(void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&sc->cur_scp->kbd_mode);
 
     kbdd_disable(sc->kbd);
     sp->kbd_opened = FALSE;
 }
 
 /*
  * Grabbing switches the screen and keyboard focus to sc_console and the
  * keyboard mode to (K_XLATE, polled).  Only switching to polled mode is
  * essential (for preventing the interrupt handler from eating input
  * between polls).  Focus is part of the UI, and the other switches are
  * work just was well when they are done on every entry and exit.
  *
  * Screen switches while grabbed are supported, and to maintain focus for
  * this ungrabbing and closing only restore the polling state and then
  * the keyboard mode if on the original screen.
  */
 
 static void
 sc_cngrab(struct consdev *cp)
 {
     sc_softc_t *sc;
     int lev;
 
     sc = sc_console->sc;
     lev = atomic_fetchadd_int(&sc->grab_level, 1);
-    if (lev >= 0 && lev < 2)
+    if (lev >= 0 && lev < 2) {
 	sccnopen(sc, &sc->grab_state[lev], 1 | 2);
+	sccnscrunlock(sc, &sc->grab_state[lev]);
+    }
 }
 
 static void
 sc_cnungrab(struct consdev *cp)
 {
     sc_softc_t *sc;
     int lev;
 
     sc = sc_console->sc;
     lev = atomic_load_acq_int(&sc->grab_level) - 1;
-    if (lev >= 0 && lev < 2)
+    if (lev >= 0 && lev < 2) {
+	sccnscrlock(sc, &sc->grab_state[lev]);
 	sccnclose(sc, &sc->grab_state[lev]);
+    }
     atomic_add_int(&sc->grab_level, -1);
 }
 
 static void
 sc_cnputc(struct consdev *cd, int c)
 {
+    struct sc_cnstate st;
     u_char buf[1];
     scr_stat *scp = sc_console;
 #ifndef SC_NO_HISTORY
 #if 0
     struct tty *tp;
 #endif
 #endif /* !SC_NO_HISTORY */
     int s;
 
     /* assert(sc_console != NULL) */
 
-    SC_VIDEO_LOCK(scp->sc);
+    sccnopen(scp->sc, &st, 0);
 
 #ifndef SC_NO_HISTORY
     if (scp == scp->sc->cur_scp && scp->status & SLKED) {
 	scp->status &= ~SLKED;
 	update_kbd_state(scp, scp->status, SLKED);
 	if (scp->status & BUFFER_SAVED) {
 	    if (!sc_hist_restore(scp))
 		sc_remove_cutmarking(scp);
 	    scp->status &= ~BUFFER_SAVED;
 	    scp->status |= CURSOR_ENABLED;
 	    sc_draw_cursor_image(scp);
 	}
 #if 0
 	/*
 	 * XXX: Now that TTY's have their own locks, we cannot process
 	 * any data after disabling scroll lock. cnputs already holds a
 	 * spinlock.
 	 */
 	tp = SC_DEV(scp->sc, scp->index);
 	/* XXX "tp" can be NULL */
 	tty_lock(tp);
 	if (tty_opened(tp))
 	    sctty_outwakeup(tp);
 	tty_unlock(tp);
 #endif
     }
 #endif /* !SC_NO_HISTORY */
 
     buf[0] = c;
     sc_puts(scp, buf, 1, 1);
 
     s = spltty();	/* block sckbdevent and scrn_timer */
     sccnupdate(scp);
     splx(s);
-    SC_VIDEO_UNLOCK(scp->sc);
+    sccnclose(scp->sc, &st);
 }
 
 static int
 sc_cngetc(struct consdev *cd)
 {
     static struct fkeytab fkey;
     static int fkeycp;
     scr_stat *scp;
     const u_char *p;
     int s = spltty();	/* block sckbdevent and scrn_timer while we poll */
     int c;
 
     /* assert(sc_console != NULL) */
 
     /* 
      * Stop the screen saver and update the screen if necessary.
      * What if we have been running in the screen saver code... XXX
      */
     sc_touch_scrn_saver();
     scp = sc_console->sc->cur_scp;	/* XXX */
     sccnupdate(scp);
 
     if (fkeycp < fkey.len) {
 	splx(s);
 	return fkey.str[fkeycp++];
     }
 
     if (scp->sc->kbd == NULL) {
 	splx(s);
 	return -1;
     }
 
     c = scgetc(scp->sc, SCGETC_CN | SCGETC_NONBLOCK, NULL);
 
     switch (KEYFLAGS(c)) {
     case 0:	/* normal char */
 	return KEYCHAR(c);
     case FKEY:	/* function key */
 	p = (*scp->tsw->te_fkeystr)(scp, c);
 	if (p != NULL) {
 	    fkey.len = strlen(p);
 	    bcopy(p, fkey.str, fkey.len);
 	    fkeycp = 1;
 	    return fkey.str[0];
 	}
 	p = kbdd_get_fkeystr(scp->sc->kbd, KEYCHAR(c), (size_t *)&fkeycp);
 	fkey.len = fkeycp;
 	if ((p != NULL) && (fkey.len > 0)) {
 	    bcopy(p, fkey.str, fkey.len);
 	    fkeycp = 1;
 	    return fkey.str[0];
 	}
 	return c;	/* XXX */
     case NOKEY:
     case ERRKEY:
     default:
 	return -1;
     }
     /* NOT REACHED */
 }
 
 static void
 sccnupdate(scr_stat *scp)
 {
     /* this is a cut-down version of scrn_timer()... */
 
     if (suspend_in_progress || scp->sc->font_loading_in_progress)
 	return;
 
     if (kdb_active || panicstr || shutdown_in_progress) {
 	sc_touch_scrn_saver();
     } else if (scp != scp->sc->cur_scp) {
 	return;
     }
 
     if (!run_scrn_saver)
 	scp->sc->flags &= ~SC_SCRN_IDLE;
 #ifdef DEV_SPLASH
     if ((saver_mode != CONS_LKM_SAVER) || !(scp->sc->flags & SC_SCRN_IDLE))
 	if (scp->sc->flags & SC_SCRN_BLANKED)
             stop_scrn_saver(scp->sc, current_saver);
 #endif
 
     if (scp != scp->sc->cur_scp || scp->sc->blink_in_progress
 	|| scp->sc->switch_in_progress)
 	return;
     /*
      * FIXME: unlike scrn_timer(), we call scrn_update() from here even
      * when write_in_progress is non-zero.  XXX
      */
 
     if (!ISGRAPHSC(scp) && !(scp->sc->flags & SC_SCRN_BLANKED))
 	scrn_update(scp, TRUE);
 }
 
 static void
 scrn_timer(void *arg)
 {
 #ifndef PC98
     static time_t kbd_time_stamp = 0;
 #endif
     sc_softc_t *sc;
     scr_stat *scp;
     int again, rate;
 
     again = (arg != NULL);
     if (arg != NULL)
 	sc = (sc_softc_t *)arg;
     else if (sc_console != NULL)
 	sc = sc_console->sc;
     else
 	return;
 
     /* find the vty to update */
     scp = sc->cur_scp;
 
     /* don't do anything when we are performing some I/O operations */
     if (suspend_in_progress || sc->font_loading_in_progress)
 	goto done;
 
 #ifndef PC98
     if ((sc->kbd == NULL) && (sc->config & SC_AUTODETECT_KBD)) {
 	/* try to allocate a keyboard automatically */
 	if (kbd_time_stamp != time_uptime) {
 	    kbd_time_stamp = time_uptime;
 	    sc->keyboard = sc_allocate_keyboard(sc, -1);
 	    if (sc->keyboard >= 0) {
 		sc->kbd = kbd_get_keyboard(sc->keyboard);
 		(void)kbdd_ioctl(sc->kbd, KDSKBMODE,
 			  (caddr_t)&sc->cur_scp->kbd_mode);
 		update_kbd_state(sc->cur_scp, sc->cur_scp->status,
 				 LOCK_MASK);
 	    }
 	}
     }
 #endif /* PC98 */
 
     /* should we stop the screen saver? */
     if (kdb_active || panicstr || shutdown_in_progress)
 	sc_touch_scrn_saver();
     if (run_scrn_saver) {
 	if (time_uptime > sc->scrn_time_stamp + scrn_blank_time)
 	    sc->flags |= SC_SCRN_IDLE;
 	else
 	    sc->flags &= ~SC_SCRN_IDLE;
     } else {
 	sc->scrn_time_stamp = time_uptime;
 	sc->flags &= ~SC_SCRN_IDLE;
 	if (scrn_blank_time > 0)
 	    run_scrn_saver = TRUE;
     }
 #ifdef DEV_SPLASH
     if ((saver_mode != CONS_LKM_SAVER) || !(sc->flags & SC_SCRN_IDLE))
 	if (sc->flags & SC_SCRN_BLANKED)
             stop_scrn_saver(sc, current_saver);
 #endif
 
     /* should we just return ? */
     if (sc->blink_in_progress || sc->switch_in_progress
 	|| sc->write_in_progress)
 	goto done;
 
     /* Update the screen */
     scp = sc->cur_scp;		/* cur_scp may have changed... */
     if (!ISGRAPHSC(scp) && !(sc->flags & SC_SCRN_BLANKED))
 	scrn_update(scp, TRUE);
 
 #ifdef DEV_SPLASH
     /* should we activate the screen saver? */
     if ((saver_mode == CONS_LKM_SAVER) && (sc->flags & SC_SCRN_IDLE))
 	if (!ISGRAPHSC(scp) || (sc->flags & SC_SCRN_BLANKED))
 	    (*current_saver)(sc, TRUE);
 #endif
 
 done:
     if (again) {
 	/*
 	 * Use reduced "refresh" rate if we are in graphics and that is not a
 	 * graphical screen saver.  In such case we just have nothing to do.
 	 */
 	if (ISGRAPHSC(scp) && !(sc->flags & SC_SCRN_BLANKED))
 	    rate = 2;
 	else
 	    rate = 30;
 	callout_reset_sbt(&sc->ctimeout, SBT_1S / rate, 0,
 	    scrn_timer, sc, C_PREL(1));
     }
 }
 
 static int
 and_region(int *s1, int *e1, int s2, int e2)
 {
     if (*e1 < s2 || e2 < *s1)
 	return FALSE;
     *s1 = imax(*s1, s2);
     *e1 = imin(*e1, e2);
     return TRUE;
 }
 
 static void 
 scrn_update(scr_stat *scp, int show_cursor)
 {
     int start;
     int end;
     int s;
     int e;
 
     /* assert(scp == scp->sc->cur_scp) */
 
     SC_VIDEO_LOCK(scp->sc);
 
 #ifndef SC_NO_CUTPASTE
     /* remove the previous mouse pointer image if necessary */
     if (scp->status & MOUSE_VISIBLE) {
 	s = scp->mouse_pos;
 	e = scp->mouse_pos + scp->xsize + 1;
 	if ((scp->status & (MOUSE_MOVED | MOUSE_HIDDEN))
 	    || and_region(&s, &e, scp->start, scp->end)
 	    || ((scp->status & CURSOR_ENABLED) && 
 		(scp->cursor_pos != scp->cursor_oldpos) &&
 		(and_region(&s, &e, scp->cursor_pos, scp->cursor_pos)
 		 || and_region(&s, &e, scp->cursor_oldpos, scp->cursor_oldpos)))) {
 	    sc_remove_mouse_image(scp);
 	    if (scp->end >= scp->xsize*scp->ysize)
 		scp->end = scp->xsize*scp->ysize - 1;
 	}
     }
 #endif /* !SC_NO_CUTPASTE */
 
 #if 1
     /* debug: XXX */
     if (scp->end >= scp->xsize*scp->ysize) {
 	printf("scrn_update(): scp->end %d > size_of_screen!!\n", scp->end);
 	scp->end = scp->xsize*scp->ysize - 1;
     }
     if (scp->start < 0) {
 	printf("scrn_update(): scp->start %d < 0\n", scp->start);
 	scp->start = 0;
     }
 #endif
 
     /* update screen image */
     if (scp->start <= scp->end)  {
 	if (scp->mouse_cut_end >= 0) {
 	    /* there is a marked region for cut & paste */
 	    if (scp->mouse_cut_start <= scp->mouse_cut_end) {
 		start = scp->mouse_cut_start;
 		end = scp->mouse_cut_end;
 	    } else {
 		start = scp->mouse_cut_end;
 		end = scp->mouse_cut_start - 1;
 	    }
 	    s = start;
 	    e = end;
 	    /* does the cut-mark region overlap with the update region? */
 	    if (and_region(&s, &e, scp->start, scp->end)) {
 		(*scp->rndr->draw)(scp, s, e - s + 1, TRUE);
 		s = 0;
 		e = start - 1;
 		if (and_region(&s, &e, scp->start, scp->end))
 		    (*scp->rndr->draw)(scp, s, e - s + 1, FALSE);
 		s = end + 1;
 		e = scp->xsize*scp->ysize - 1;
 		if (and_region(&s, &e, scp->start, scp->end))
 		    (*scp->rndr->draw)(scp, s, e - s + 1, FALSE);
 	    } else {
 		(*scp->rndr->draw)(scp, scp->start,
 				   scp->end - scp->start + 1, FALSE);
 	    }
 	} else {
 	    (*scp->rndr->draw)(scp, scp->start,
 			       scp->end - scp->start + 1, FALSE);
 	}
     }
 
     /* we are not to show the cursor and the mouse pointer... */
     if (!show_cursor) {
         scp->end = 0;
         scp->start = scp->xsize*scp->ysize - 1;
 	SC_VIDEO_UNLOCK(scp->sc);
 	return;
     }
 
     /* update cursor image */
     if (scp->status & CURSOR_ENABLED) {
 	s = scp->start;
 	e = scp->end;
         /* did cursor move since last time ? */
         if (scp->cursor_pos != scp->cursor_oldpos) {
             /* do we need to remove old cursor image ? */
             if (!and_region(&s, &e, scp->cursor_oldpos, scp->cursor_oldpos))
                 sc_remove_cursor_image(scp);
             sc_draw_cursor_image(scp);
         } else {
             if (and_region(&s, &e, scp->cursor_pos, scp->cursor_pos))
 		/* cursor didn't move, but has been overwritten */
 		sc_draw_cursor_image(scp);
 	    else if (scp->curs_attr.flags & CONS_BLINK_CURSOR)
 		/* if it's a blinking cursor, update it */
 		(*scp->rndr->blink_cursor)(scp, scp->cursor_pos,
 					   sc_inside_cutmark(scp,
 					       scp->cursor_pos));
         }
     }
 
 #ifndef SC_NO_CUTPASTE
     /* update "pseudo" mouse pointer image */
     if (scp->sc->flags & SC_MOUSE_ENABLED) {
 	if (!(scp->status & (MOUSE_VISIBLE | MOUSE_HIDDEN))) {
 	    scp->status &= ~MOUSE_MOVED;
 	    sc_draw_mouse_image(scp);
 	}
     }
 #endif /* SC_NO_CUTPASTE */
 
     scp->end = 0;
     scp->start = scp->xsize*scp->ysize - 1;
 
     SC_VIDEO_UNLOCK(scp->sc);
 }
 
 #ifdef DEV_SPLASH
 static int
 scsplash_callback(int event, void *arg)
 {
     sc_softc_t *sc;
     int error;
 
     sc = (sc_softc_t *)arg;
 
     switch (event) {
     case SPLASH_INIT:
 	if (add_scrn_saver(scsplash_saver) == 0) {
 	    sc->flags &= ~SC_SAVER_FAILED;
 	    run_scrn_saver = TRUE;
 	    if (cold && !(boothowto & RB_VERBOSE)) {
 		scsplash_stick(TRUE);
 		(*current_saver)(sc, TRUE);
 	    }
 	}
 	return 0;
 
     case SPLASH_TERM:
 	if (current_saver == scsplash_saver) {
 	    scsplash_stick(FALSE);
 	    error = remove_scrn_saver(scsplash_saver);
 	    if (error)
 		return error;
 	}
 	return 0;
 
     default:
 	return EINVAL;
     }
 }
 
 static void
 scsplash_saver(sc_softc_t *sc, int show)
 {
     static int busy = FALSE;
     scr_stat *scp;
 
     if (busy)
 	return;
     busy = TRUE;
 
     scp = sc->cur_scp;
     if (show) {
 	if (!(sc->flags & SC_SAVER_FAILED)) {
 	    if (!(sc->flags & SC_SCRN_BLANKED))
 		set_scrn_saver_mode(scp, -1, NULL, 0);
 	    switch (splash(sc->adp, TRUE)) {
 	    case 0:		/* succeeded */
 		break;
 	    case EAGAIN:	/* try later */
 		restore_scrn_saver_mode(scp, FALSE);
 		sc_touch_scrn_saver();		/* XXX */
 		break;
 	    default:
 		sc->flags |= SC_SAVER_FAILED;
 		scsplash_stick(FALSE);
 		restore_scrn_saver_mode(scp, TRUE);
 		printf("scsplash_saver(): failed to put up the image\n");
 		break;
 	    }
 	}
     } else if (!sticky_splash) {
 	if ((sc->flags & SC_SCRN_BLANKED) && (splash(sc->adp, FALSE) == 0))
 	    restore_scrn_saver_mode(scp, TRUE);
     }
     busy = FALSE;
 }
 
 static int
 add_scrn_saver(void (*this_saver)(sc_softc_t *, int))
 {
 #if 0
     int error;
 
     if (current_saver != none_saver) {
 	error = remove_scrn_saver(current_saver);
 	if (error)
 	    return error;
     }
 #endif
     if (current_saver != none_saver)
 	return EBUSY;
 
     run_scrn_saver = FALSE;
     saver_mode = CONS_LKM_SAVER;
     current_saver = this_saver;
     return 0;
 }
 
 static int
 remove_scrn_saver(void (*this_saver)(sc_softc_t *, int))
 {
     if (current_saver != this_saver)
 	return EINVAL;
 
 #if 0
     /*
      * In order to prevent `current_saver' from being called by
      * the timeout routine `scrn_timer()' while we manipulate 
      * the saver list, we shall set `current_saver' to `none_saver' 
      * before stopping the current saver, rather than blocking by `splXX()'.
      */
     current_saver = none_saver;
     if (scrn_blanked)
         stop_scrn_saver(this_saver);
 #endif
 
     /* unblank all blanked screens */
     wait_scrn_saver_stop(NULL);
     if (scrn_blanked)
 	return EBUSY;
 
     current_saver = none_saver;
     return 0;
 }
 
 static int
 set_scrn_saver_mode(scr_stat *scp, int mode, u_char *pal, int border)
 {
     int s;
 
     /* assert(scp == scp->sc->cur_scp) */
     s = spltty();
     if (!ISGRAPHSC(scp))
 	sc_remove_cursor_image(scp);
     scp->splash_save_mode = scp->mode;
     scp->splash_save_status = scp->status & (GRAPHICS_MODE | PIXEL_MODE);
     scp->status &= ~(GRAPHICS_MODE | PIXEL_MODE);
     scp->status |= (UNKNOWN_MODE | SAVER_RUNNING);
     scp->sc->flags |= SC_SCRN_BLANKED;
     ++scrn_blanked;
     splx(s);
     if (mode < 0)
 	return 0;
     scp->mode = mode;
     if (set_mode(scp) == 0) {
 	if (scp->sc->adp->va_info.vi_flags & V_INFO_GRAPHICS)
 	    scp->status |= GRAPHICS_MODE;
 #ifndef SC_NO_PALETTE_LOADING
 	if (pal != NULL)
 	    vidd_load_palette(scp->sc->adp, pal);
 #endif
 	sc_set_border(scp, border);
 	return 0;
     } else {
 	s = spltty();
 	scp->mode = scp->splash_save_mode;
 	scp->status &= ~(UNKNOWN_MODE | SAVER_RUNNING);
 	scp->status |= scp->splash_save_status;
 	splx(s);
 	return 1;
     }
 }
 
 static int
 restore_scrn_saver_mode(scr_stat *scp, int changemode)
 {
     int mode;
     int status;
     int s;
 
     /* assert(scp == scp->sc->cur_scp) */
     s = spltty();
     mode = scp->mode;
     status = scp->status;
     scp->mode = scp->splash_save_mode;
     scp->status &= ~(UNKNOWN_MODE | SAVER_RUNNING);
     scp->status |= scp->splash_save_status;
     scp->sc->flags &= ~SC_SCRN_BLANKED;
     if (!changemode) {
 	if (!ISGRAPHSC(scp))
 	    sc_draw_cursor_image(scp);
 	--scrn_blanked;
 	splx(s);
 	return 0;
     }
     if (set_mode(scp) == 0) {
 #ifndef SC_NO_PALETTE_LOADING
 #ifdef SC_PIXEL_MODE
 	if (scp->sc->adp->va_info.vi_mem_model == V_INFO_MM_DIRECT)
 	    vidd_load_palette(scp->sc->adp, scp->sc->palette2);
 	else
 #endif
 	vidd_load_palette(scp->sc->adp, scp->sc->palette);
 #endif
 	--scrn_blanked;
 	splx(s);
 	return 0;
     } else {
 	scp->mode = mode;
 	scp->status = status;
 	splx(s);
 	return 1;
     }
 }
 
 static void
 stop_scrn_saver(sc_softc_t *sc, void (*saver)(sc_softc_t *, int))
 {
     (*saver)(sc, FALSE);
     run_scrn_saver = FALSE;
     /* the screen saver may have chosen not to stop after all... */
     if (sc->flags & SC_SCRN_BLANKED)
 	return;
 
     mark_all(sc->cur_scp);
     if (sc->delayed_next_scr)
 	sc_switch_scr(sc, sc->delayed_next_scr - 1);
     if (!kdb_active)
 	wakeup(&scrn_blanked);
 }
 
 static int
 wait_scrn_saver_stop(sc_softc_t *sc)
 {
     int error = 0;
 
     while (scrn_blanked > 0) {
 	run_scrn_saver = FALSE;
 	if (sc && !(sc->flags & SC_SCRN_BLANKED)) {
 	    error = 0;
 	    break;
 	}
 	error = tsleep(&scrn_blanked, PZERO | PCATCH, "scrsav", 0);
 	if ((error != 0) && (error != ERESTART))
 	    break;
     }
     run_scrn_saver = FALSE;
     return error;
 }
 #endif /* DEV_SPLASH */
 
 void
 sc_touch_scrn_saver(void)
 {
     scsplash_stick(FALSE);
     run_scrn_saver = FALSE;
 }
 
 int
 sc_switch_scr(sc_softc_t *sc, u_int next_scr)
 {
     scr_stat *cur_scp;
     struct tty *tp;
     struct proc *p;
     int s;
 
     DPRINTF(5, ("sc0: sc_switch_scr() %d ", next_scr + 1));
 
     if (sc->cur_scp == NULL)
 	return (0);
 
     /* prevent switch if previously requested */
     if (sc->flags & SC_SCRN_VTYLOCK) {
 	    sc_bell(sc->cur_scp, sc->cur_scp->bell_pitch,
 		sc->cur_scp->bell_duration);
 	    return EPERM;
     }
 
     /* delay switch if the screen is blanked or being updated */
     if ((sc->flags & SC_SCRN_BLANKED) || sc->write_in_progress
 	|| sc->blink_in_progress) {
 	sc->delayed_next_scr = next_scr + 1;
 	sc_touch_scrn_saver();
 	DPRINTF(5, ("switch delayed\n"));
 	return 0;
     }
     sc->delayed_next_scr = 0;
 
     s = spltty();
     cur_scp = sc->cur_scp;
 
     /* we are in the middle of the vty switching process... */
     if (sc->switch_in_progress
 	&& (cur_scp->smode.mode == VT_PROCESS)
 	&& cur_scp->proc) {
 	p = pfind(cur_scp->pid);
 	if (cur_scp->proc != p) {
 	    if (p)
 		PROC_UNLOCK(p);
 	    /* 
 	     * The controlling process has died!!.  Do some clean up.
 	     * NOTE:`cur_scp->proc' and `cur_scp->smode.mode' 
 	     * are not reset here yet; they will be cleared later.
 	     */
 	    DPRINTF(5, ("cur_scp controlling process %d died, ",
 	       cur_scp->pid));
 	    if (cur_scp->status & SWITCH_WAIT_REL) {
 		/*
 		 * Force the previous switch to finish, but return now 
 		 * with error.
 		 */
 		DPRINTF(5, ("reset WAIT_REL, "));
 		finish_vt_rel(cur_scp, TRUE, &s);
 		splx(s);
 		DPRINTF(5, ("finishing previous switch\n"));
 		return EINVAL;
 	    } else if (cur_scp->status & SWITCH_WAIT_ACQ) {
 		/* let's assume screen switch has been completed. */
 		DPRINTF(5, ("reset WAIT_ACQ, "));
 		finish_vt_acq(cur_scp);
 	    } else {
 		/* 
 	 	 * We are in between screen release and acquisition, and
 		 * reached here via scgetc() or scrn_timer() which has 
 		 * interrupted exchange_scr(). Don't do anything stupid.
 		 */
 		DPRINTF(5, ("waiting nothing, "));
 	    }
 	} else {
 	    if (p)
 		PROC_UNLOCK(p);
 	    /*
 	     * The controlling process is alive, but not responding... 
 	     * It is either buggy or it may be just taking time.
 	     * The following code is a gross kludge to cope with this
 	     * problem for which there is no clean solution. XXX
 	     */
 	    if (cur_scp->status & SWITCH_WAIT_REL) {
 		switch (sc->switch_in_progress++) {
 		case 1:
 		    break;
 		case 2:
 		    DPRINTF(5, ("sending relsig again, "));
 		    signal_vt_rel(cur_scp);
 		    break;
 		case 3:
 		    break;
 		case 4:
 		default:
 		    /*
 		     * Act as if the controlling program returned
 		     * VT_FALSE.
 		     */
 		    DPRINTF(5, ("force reset WAIT_REL, "));
 		    finish_vt_rel(cur_scp, FALSE, &s);
 		    splx(s);
 		    DPRINTF(5, ("act as if VT_FALSE was seen\n"));
 		    return EINVAL;
 		}
 	    } else if (cur_scp->status & SWITCH_WAIT_ACQ) {
 		switch (sc->switch_in_progress++) {
 		case 1:
 		    break;
 		case 2:
 		    DPRINTF(5, ("sending acqsig again, "));
 		    signal_vt_acq(cur_scp);
 		    break;
 		case 3:
 		    break;
 		case 4:
 		default:
 		     /* clear the flag and finish the previous switch */
 		    DPRINTF(5, ("force reset WAIT_ACQ, "));
 		    finish_vt_acq(cur_scp);
 		    break;
 		}
 	    }
 	}
     }
 
     /*
      * Return error if an invalid argument is given, or vty switch
      * is still in progress.
      */
     if ((next_scr < sc->first_vty) || (next_scr >= sc->first_vty + sc->vtys)
 	|| sc->switch_in_progress) {
 	splx(s);
 	sc_bell(cur_scp, bios_value.bell_pitch, BELL_DURATION);
 	DPRINTF(5, ("error 1\n"));
 	return EINVAL;
     }
 
     /*
      * Don't allow switching away from the graphics mode vty
      * if the switch mode is VT_AUTO, unless the next vty is the same 
      * as the current or the current vty has been closed (but showing).
      */
     tp = SC_DEV(sc, cur_scp->index);
     if ((cur_scp->index != next_scr)
 	&& tty_opened_ns(tp)
 	&& (cur_scp->smode.mode == VT_AUTO)
 	&& ISGRAPHSC(cur_scp)) {
 	splx(s);
 	sc_bell(cur_scp, bios_value.bell_pitch, BELL_DURATION);
 	DPRINTF(5, ("error, graphics mode\n"));
 	return EINVAL;
     }
 
     /*
      * Is the wanted vty open? Don't allow switching to a closed vty.
      * If we are in DDB, don't switch to a vty in the VT_PROCESS mode.
      * Note that we always allow the user to switch to the kernel 
      * console even if it is closed.
      */
     if ((sc_console == NULL) || (next_scr != sc_console->index)) {
 	tp = SC_DEV(sc, next_scr);
 	if (!tty_opened_ns(tp)) {
 	    splx(s);
 	    sc_bell(cur_scp, bios_value.bell_pitch, BELL_DURATION);
 	    DPRINTF(5, ("error 2, requested vty isn't open!\n"));
 	    return EINVAL;
 	}
 	if (kdb_active && SC_STAT(tp)->smode.mode == VT_PROCESS) {
 	    splx(s);
 	    DPRINTF(5, ("error 3, requested vty is in the VT_PROCESS mode\n"));
 	    return EINVAL;
 	}
     }
 
     /* this is the start of vty switching process... */
     ++sc->switch_in_progress;
     sc->old_scp = cur_scp;
     sc->new_scp = sc_get_stat(SC_DEV(sc, next_scr));
     if (sc->new_scp == sc->old_scp) {
 	sc->switch_in_progress = 0;
 	/*
 	 * XXX wakeup() locks the scheduler lock which will hang if
 	 * the lock is in an in-between state, e.g., when we stop at
 	 * a breakpoint at fork_exit.  It has always been wrong to call
 	 * wakeup() when the debugger is active.  In RELENG_4, wakeup()
 	 * is supposed to be locked by splhigh(), but the debugger may
 	 * be invoked at splhigh().
 	 */
 	if (!kdb_active)
 	    wakeup(VTY_WCHAN(sc,next_scr));
 	splx(s);
 	DPRINTF(5, ("switch done (new == old)\n"));
 	return 0;
     }
 
     /* has controlling process died? */
     vt_proc_alive(sc->old_scp);
     vt_proc_alive(sc->new_scp);
 
     /* wait for the controlling process to release the screen, if necessary */
     if (signal_vt_rel(sc->old_scp)) {
 	splx(s);
 	return 0;
     }
 
     /* go set up the new vty screen */
     splx(s);
     exchange_scr(sc);
     s = spltty();
 
     /* wake up processes waiting for this vty */
     if (!kdb_active)
 	wakeup(VTY_WCHAN(sc,next_scr));
 
     /* wait for the controlling process to acknowledge, if necessary */
     if (signal_vt_acq(sc->cur_scp)) {
 	splx(s);
 	return 0;
     }
 
     sc->switch_in_progress = 0;
     if (sc->unit == sc_console_unit)
 	cnavailable(sc_consptr,  TRUE);
     splx(s);
     DPRINTF(5, ("switch done\n"));
 
     return 0;
 }
 
 static int
 do_switch_scr(sc_softc_t *sc, int s)
 {
     vt_proc_alive(sc->new_scp);
 
     splx(s);
     exchange_scr(sc);
     s = spltty();
     /* sc->cur_scp == sc->new_scp */
     wakeup(VTY_WCHAN(sc,sc->cur_scp->index));
 
     /* wait for the controlling process to acknowledge, if necessary */
     if (!signal_vt_acq(sc->cur_scp)) {
 	sc->switch_in_progress = 0;
 	if (sc->unit == sc_console_unit)
 	    cnavailable(sc_consptr,  TRUE);
     }
 
     return s;
 }
 
 static int
 vt_proc_alive(scr_stat *scp)
 {
     struct proc *p;
 
     if (scp->proc) {
 	if ((p = pfind(scp->pid)) != NULL)
 	    PROC_UNLOCK(p);
 	if (scp->proc == p)
 	    return TRUE;
 	scp->proc = NULL;
 	scp->smode.mode = VT_AUTO;
 	DPRINTF(5, ("vt controlling process %d died\n", scp->pid));
     }
     return FALSE;
 }
 
 static int
 signal_vt_rel(scr_stat *scp)
 {
     if (scp->smode.mode != VT_PROCESS)
 	return FALSE;
     scp->status |= SWITCH_WAIT_REL;
     PROC_LOCK(scp->proc);
     kern_psignal(scp->proc, scp->smode.relsig);
     PROC_UNLOCK(scp->proc);
     DPRINTF(5, ("sending relsig to %d\n", scp->pid));
     return TRUE;
 }
 
 static int
 signal_vt_acq(scr_stat *scp)
 {
     if (scp->smode.mode != VT_PROCESS)
 	return FALSE;
     if (scp->sc->unit == sc_console_unit)
 	cnavailable(sc_consptr,  FALSE);
     scp->status |= SWITCH_WAIT_ACQ;
     PROC_LOCK(scp->proc);
     kern_psignal(scp->proc, scp->smode.acqsig);
     PROC_UNLOCK(scp->proc);
     DPRINTF(5, ("sending acqsig to %d\n", scp->pid));
     return TRUE;
 }
 
 static int
 finish_vt_rel(scr_stat *scp, int release, int *s)
 {
     if (scp == scp->sc->old_scp && scp->status & SWITCH_WAIT_REL) {
 	scp->status &= ~SWITCH_WAIT_REL;
 	if (release)
 	    *s = do_switch_scr(scp->sc, *s);
 	else
 	    scp->sc->switch_in_progress = 0;
 	return 0;
     }
     return EINVAL;
 }
 
 static int
 finish_vt_acq(scr_stat *scp)
 {
     if (scp == scp->sc->new_scp && scp->status & SWITCH_WAIT_ACQ) {
 	scp->status &= ~SWITCH_WAIT_ACQ;
 	scp->sc->switch_in_progress = 0;
 	return 0;
     }
     return EINVAL;
 }
 
 static void
 exchange_scr(sc_softc_t *sc)
 {
     scr_stat *scp;
 
     /* save the current state of video and keyboard */
     sc_move_cursor(sc->old_scp, sc->old_scp->xpos, sc->old_scp->ypos);
     if (!ISGRAPHSC(sc->old_scp))
 	sc_remove_cursor_image(sc->old_scp);
     if (sc->old_scp->kbd_mode == K_XLATE)
 	save_kbd_state(sc->old_scp);
 
     /* set up the video for the new screen */
     scp = sc->cur_scp = sc->new_scp;
 #ifdef PC98
     if (sc->old_scp->mode != scp->mode || ISUNKNOWNSC(sc->old_scp) || ISUNKNOWNSC(sc->new_scp))
 #else
     if (sc->old_scp->mode != scp->mode || ISUNKNOWNSC(sc->old_scp))
 #endif
 	set_mode(scp);
 #ifndef __sparc64__
     else
 	sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize,
 		    (void *)sc->adp->va_window, FALSE);
 #endif
     scp->status |= MOUSE_HIDDEN;
     sc_move_cursor(scp, scp->xpos, scp->ypos);
     if (!ISGRAPHSC(scp))
 	sc_set_cursor_image(scp);
 #ifndef SC_NO_PALETTE_LOADING
     if (ISGRAPHSC(sc->old_scp)) {
 #ifdef SC_PIXEL_MODE
 	if (sc->adp->va_info.vi_mem_model == V_INFO_MM_DIRECT)
 	    vidd_load_palette(sc->adp, sc->palette2);
 	else
 #endif
 	vidd_load_palette(sc->adp, sc->palette);
     }
 #endif
     sc_set_border(scp, scp->border);
 
     /* set up the keyboard for the new screen */
     if (sc->kbd_open_level == 0 && sc->old_scp->kbd_mode != scp->kbd_mode)
 	(void)kbdd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode);
     update_kbd_state(scp, scp->status, LOCK_MASK);
 
     mark_all(scp);
 }
 
 static void
 sc_puts(scr_stat *scp, u_char *buf, int len, int kernel)
 {
 #ifdef DEV_SPLASH
     /* make screensaver happy */
     if (!sticky_splash && scp == scp->sc->cur_scp && !sc_saver_keyb_only)
 	run_scrn_saver = FALSE;
 #endif
 
     if (scp->tsw)
 	(*scp->tsw->te_puts)(scp, buf, len, kernel);
     if (scp->sc->delayed_next_scr)
 	sc_switch_scr(scp->sc, scp->sc->delayed_next_scr - 1);
 }
 
 void
 sc_draw_cursor_image(scr_stat *scp)
 {
     /* assert(scp == scp->sc->cur_scp); */
     SC_VIDEO_LOCK(scp->sc);
     (*scp->rndr->draw_cursor)(scp, scp->cursor_pos,
 			      scp->curs_attr.flags & CONS_BLINK_CURSOR, TRUE,
 			      sc_inside_cutmark(scp, scp->cursor_pos));
     scp->cursor_oldpos = scp->cursor_pos;
     SC_VIDEO_UNLOCK(scp->sc);
 }
 
 void
 sc_remove_cursor_image(scr_stat *scp)
 {
     /* assert(scp == scp->sc->cur_scp); */
     SC_VIDEO_LOCK(scp->sc);
     (*scp->rndr->draw_cursor)(scp, scp->cursor_oldpos,
 			      scp->curs_attr.flags & CONS_BLINK_CURSOR, FALSE,
 			      sc_inside_cutmark(scp, scp->cursor_oldpos));
     SC_VIDEO_UNLOCK(scp->sc);
 }
 
 static void
 update_cursor_image(scr_stat *scp)
 {
     /* assert(scp == scp->sc->cur_scp); */
     sc_remove_cursor_image(scp);
     sc_set_cursor_image(scp);
     sc_draw_cursor_image(scp);
 }
 
 void
 sc_set_cursor_image(scr_stat *scp)
 {
     scp->curs_attr.flags = scp->curr_curs_attr.flags;
     if (scp->curs_attr.flags & CONS_HIDDEN_CURSOR) {
 	/* hidden cursor is internally represented as zero-height underline */
 	scp->curs_attr.flags = CONS_CHAR_CURSOR;
 	scp->curs_attr.base = scp->curs_attr.height = 0;
     } else if (scp->curs_attr.flags & CONS_CHAR_CURSOR) {
 	scp->curs_attr.base = imin(scp->curr_curs_attr.base,
 				  scp->font_size - 1);
 	scp->curs_attr.height = imin(scp->curr_curs_attr.height,
 				    scp->font_size - scp->curs_attr.base);
     } else {	/* block cursor */
 	scp->curs_attr.base = 0;
 	scp->curs_attr.height = scp->font_size;
     }
 
     /* assert(scp == scp->sc->cur_scp); */
     SC_VIDEO_LOCK(scp->sc);
     (*scp->rndr->set_cursor)(scp, scp->curs_attr.base, scp->curs_attr.height,
 			     scp->curs_attr.flags & CONS_BLINK_CURSOR);
     SC_VIDEO_UNLOCK(scp->sc);
 }
 
 static void
 change_cursor_shape(scr_stat *scp, int flags, int base, int height)
 {
     if ((scp == scp->sc->cur_scp) && !ISGRAPHSC(scp))
 	sc_remove_cursor_image(scp);
 
     if (base >= 0)
 	scp->curr_curs_attr.base = base;
     if (height >= 0)
 	scp->curr_curs_attr.height = height;
     if (flags & CONS_RESET_CURSOR)
 	scp->curr_curs_attr = scp->dflt_curs_attr;
     else
 	scp->curr_curs_attr.flags = flags & CONS_CURSOR_ATTRS;
 
     if ((scp == scp->sc->cur_scp) && !ISGRAPHSC(scp)) {
 	sc_set_cursor_image(scp);
 	sc_draw_cursor_image(scp);
     }
 }
 
 void
 sc_change_cursor_shape(scr_stat *scp, int flags, int base, int height)
 {
     sc_softc_t *sc;
     struct tty *tp;
     int s;
     int i;
 
     s = spltty();
     if ((flags != -1) && (flags & CONS_LOCAL_CURSOR)) {
 	/* local (per vty) change */
 	change_cursor_shape(scp, flags, base, height);
 	splx(s);
 	return;
     }
 
     /* global change */
     sc = scp->sc;
     if (base >= 0)
 	sc->curs_attr.base = base;
     if (height >= 0)
 	sc->curs_attr.height = height;
     if (flags != -1) {
 	if (flags & CONS_RESET_CURSOR)
 	    sc->curs_attr = sc->dflt_curs_attr;
 	else
 	    sc->curs_attr.flags = flags & CONS_CURSOR_ATTRS;
     }
 
     for (i = sc->first_vty; i < sc->first_vty + sc->vtys; ++i) {
 	if ((tp = SC_DEV(sc, i)) == NULL)
 	    continue;
 	if ((scp = sc_get_stat(tp)) == NULL)
 	    continue;
 	scp->dflt_curs_attr = sc->curs_attr;
 	change_cursor_shape(scp, CONS_RESET_CURSOR, -1, -1);
     }
     splx(s);
 }
 
 static void
 scinit(int unit, int flags)
 {
 
     /*
      * When syscons is being initialized as the kernel console, malloc()
      * is not yet functional, because various kernel structures has not been
      * fully initialized yet.  Therefore, we need to declare the following
      * static buffers for the console.  This is less than ideal, 
      * but is necessry evil for the time being.  XXX
      */
 #ifdef PC98
     static u_short sc_buffer[ROW*COL*2];/* XXX */
 #else
     static u_short sc_buffer[ROW*COL];	/* XXX */
 #endif
 #ifndef SC_NO_FONT_LOADING
     static u_char font_8[256*8];
     static u_char font_14[256*14];
     static u_char font_16[256*16];
 #endif
 
     sc_softc_t *sc;
     scr_stat *scp;
     video_adapter_t *adp;
     int col;
     int row;
     int i;
 
     /* one time initialization */
     if (init_done == COLD)
 	sc_get_bios_values(&bios_value);
     init_done = WARM;
 
     /*
      * Allocate resources.  Even if we are being called for the second
      * time, we must allocate them again, because they might have 
      * disappeared...
      */
     sc = sc_get_softc(unit, flags & SC_KERNEL_CONSOLE);
     if ((sc->flags & SC_INIT_DONE) == 0)
 	SC_VIDEO_LOCKINIT(sc);
 
     adp = NULL;
     if (sc->adapter >= 0) {
 	vid_release(sc->adp, (void *)&sc->adapter);
 	adp = sc->adp;
 	sc->adp = NULL;
     }
     if (sc->keyboard >= 0) {
 	DPRINTF(5, ("sc%d: releasing kbd%d\n", unit, sc->keyboard));
 	i = kbd_release(sc->kbd, (void *)&sc->keyboard);
 	DPRINTF(5, ("sc%d: kbd_release returned %d\n", unit, i));
 	if (sc->kbd != NULL) {
 	    DPRINTF(5, ("sc%d: kbd != NULL!, index:%d, unit:%d, flags:0x%x\n",
 		unit, sc->kbd->kb_index, sc->kbd->kb_unit, sc->kbd->kb_flags));
 	}
 	sc->kbd = NULL;
     }
     sc->adapter = vid_allocate("*", unit, (void *)&sc->adapter);
     sc->adp = vid_get_adapter(sc->adapter);
     /* assert((sc->adapter >= 0) && (sc->adp != NULL)) */
 
     sc->keyboard = sc_allocate_keyboard(sc, unit);
     DPRINTF(1, ("sc%d: keyboard %d\n", unit, sc->keyboard));
 
     sc->kbd = kbd_get_keyboard(sc->keyboard);
     if (sc->kbd != NULL) {
 	DPRINTF(1, ("sc%d: kbd index:%d, unit:%d, flags:0x%x\n",
 		unit, sc->kbd->kb_index, sc->kbd->kb_unit, sc->kbd->kb_flags));
     }
 
     if (!(sc->flags & SC_INIT_DONE) || (adp != sc->adp)) {
 
 	sc->initial_mode = sc->adp->va_initial_mode;
 
 #ifndef SC_NO_FONT_LOADING
 	if (flags & SC_KERNEL_CONSOLE) {
 	    sc->font_8 = font_8;
 	    sc->font_14 = font_14;
 	    sc->font_16 = font_16;
 	} else if (sc->font_8 == NULL) {
 	    /* assert(sc_malloc) */
 	    sc->font_8 = malloc(sizeof(font_8), M_DEVBUF, M_WAITOK);
 	    sc->font_14 = malloc(sizeof(font_14), M_DEVBUF, M_WAITOK);
 	    sc->font_16 = malloc(sizeof(font_16), M_DEVBUF, M_WAITOK);
 	}
 #endif
 
 	/* extract the hardware cursor location and hide the cursor for now */
 	vidd_read_hw_cursor(sc->adp, &col, &row);
 	vidd_set_hw_cursor(sc->adp, -1, -1);
 
 	/* set up the first console */
 	sc->first_vty = unit*MAXCONS;
 	sc->vtys = MAXCONS;		/* XXX: should be configurable */
 	if (flags & SC_KERNEL_CONSOLE) {
 	    /*
 	     * Set up devs structure but don't use it yet, calling make_dev()
 	     * might panic kernel.  Wait for sc_attach_unit() to actually
 	     * create the devices.
 	     */
 	    sc->dev = main_devs;
 	    scp = &main_console;
 	    init_scp(sc, sc->first_vty, scp);
 	    sc_vtb_init(&scp->vtb, VTB_MEMORY, scp->xsize, scp->ysize,
 			(void *)sc_buffer, FALSE);
 
 	    /* move cursors to the initial positions */
 	    if (col >= scp->xsize)
 		col = 0;
 	    if (row >= scp->ysize)
 		row = scp->ysize - 1;
 	    scp->xpos = col;
 	    scp->ypos = row;
 	    scp->cursor_pos = scp->cursor_oldpos = row*scp->xsize + col;
 
 	    if (sc_init_emulator(scp, SC_DFLT_TERM))
 		sc_init_emulator(scp, "*");
 	    (*scp->tsw->te_default_attr)(scp,
 					 user_default.std_color,
 					 user_default.rev_color);
 	} else {
 	    /* assert(sc_malloc) */
 	    sc->dev = malloc(sizeof(struct tty *)*sc->vtys, M_DEVBUF,
 	        M_WAITOK|M_ZERO);
 	    sc->dev[0] = sc_alloc_tty(0, unit * MAXCONS);
 	    scp = alloc_scp(sc, sc->first_vty);
 	    SC_STAT(sc->dev[0]) = scp;
 	}
 	sc->cur_scp = scp;
 
 #ifndef __sparc64__
 	/* copy screen to temporary buffer */
 	sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize,
 		    (void *)scp->sc->adp->va_window, FALSE);
 	if (ISTEXTSC(scp))
 	    sc_vtb_copy(&scp->scr, 0, &scp->vtb, 0, scp->xsize*scp->ysize);
 #endif
 
 	if (bios_value.cursor_end < scp->font_size)
 	    sc->dflt_curs_attr.base = scp->font_size - 
 					  bios_value.cursor_end - 1;
 	else
 	    sc->dflt_curs_attr.base = 0;
 	i = bios_value.cursor_end - bios_value.cursor_start + 1;
 	sc->dflt_curs_attr.height = imin(i, scp->font_size);
 	sc->dflt_curs_attr.flags = 0;
 	sc->curs_attr = sc->dflt_curs_attr;
 	scp->curr_curs_attr = scp->dflt_curs_attr = sc->curs_attr;
 
 #ifndef SC_NO_SYSMOUSE
 	sc_mouse_move(scp, scp->xpixel/2, scp->ypixel/2);
 #endif
 	if (!ISGRAPHSC(scp)) {
     	    sc_set_cursor_image(scp);
     	    sc_draw_cursor_image(scp);
 	}
 
 	/* save font and palette */
 #ifndef SC_NO_FONT_LOADING
 	sc->fonts_loaded = 0;
 	if (ISFONTAVAIL(sc->adp->va_flags)) {
 #ifdef SC_DFLT_FONT
 	    bcopy(dflt_font_8, sc->font_8, sizeof(dflt_font_8));
 	    bcopy(dflt_font_14, sc->font_14, sizeof(dflt_font_14));
 	    bcopy(dflt_font_16, sc->font_16, sizeof(dflt_font_16));
 	    sc->fonts_loaded = FONT_16 | FONT_14 | FONT_8;
 	    if (scp->font_size < 14) {
 		sc_load_font(scp, 0, 8, 8, sc->font_8, 0, 256);
 	    } else if (scp->font_size >= 16) {
 		sc_load_font(scp, 0, 16, 8, sc->font_16, 0, 256);
 	    } else {
 		sc_load_font(scp, 0, 14, 8, sc->font_14, 0, 256);
 	    }
 #else /* !SC_DFLT_FONT */
 	    if (scp->font_size < 14) {
 		sc_save_font(scp, 0, 8, 8, sc->font_8, 0, 256);
 		sc->fonts_loaded = FONT_8;
 	    } else if (scp->font_size >= 16) {
 		sc_save_font(scp, 0, 16, 8, sc->font_16, 0, 256);
 		sc->fonts_loaded = FONT_16;
 	    } else {
 		sc_save_font(scp, 0, 14, 8, sc->font_14, 0, 256);
 		sc->fonts_loaded = FONT_14;
 	    }
 #endif /* SC_DFLT_FONT */
 	    /* FONT KLUDGE: always use the font page #0. XXX */
 	    sc_show_font(scp, 0);
 	}
 #endif /* !SC_NO_FONT_LOADING */
 
 #ifndef SC_NO_PALETTE_LOADING
 	vidd_save_palette(sc->adp, sc->palette);
 #ifdef SC_PIXEL_MODE
 	for (i = 0; i < sizeof(sc->palette2); i++)
 		sc->palette2[i] = i / 3;
 #endif
 #endif
 
 #ifdef DEV_SPLASH
 	if (!(sc->flags & SC_SPLASH_SCRN)) {
 	    /* we are ready to put up the splash image! */
 	    splash_init(sc->adp, scsplash_callback, sc);
 	    sc->flags |= SC_SPLASH_SCRN;
 	}
 #endif
     }
 
     /* the rest is not necessary, if we have done it once */
     if (sc->flags & SC_INIT_DONE)
 	return;
 
     /* initialize mapscrn arrays to a one to one map */
     for (i = 0; i < sizeof(sc->scr_map); i++)
 	sc->scr_map[i] = sc->scr_rmap[i] = i;
 #ifdef PC98
     sc->scr_map[0x5c] = (u_char)0xfc;	/* for backslash */
 #endif
 
     sc->flags |= SC_INIT_DONE;
 }
 
 static void
 scterm(int unit, int flags)
 {
     sc_softc_t *sc;
     scr_stat *scp;
 
     sc = sc_get_softc(unit, flags & SC_KERNEL_CONSOLE);
     if (sc == NULL)
 	return;			/* shouldn't happen */
 
 #ifdef DEV_SPLASH
     /* this console is no longer available for the splash screen */
     if (sc->flags & SC_SPLASH_SCRN) {
 	splash_term(sc->adp);
 	sc->flags &= ~SC_SPLASH_SCRN;
     }
 #endif
 
 #if 0 /* XXX */
     /* move the hardware cursor to the upper-left corner */
     vidd_set_hw_cursor(sc->adp, 0, 0);
 #endif
 
     /* release the keyboard and the video card */
     if (sc->keyboard >= 0)
 	kbd_release(sc->kbd, &sc->keyboard);
     if (sc->adapter >= 0)
 	vid_release(sc->adp, &sc->adapter);
 
     /* stop the terminal emulator, if any */
     scp = sc_get_stat(sc->dev[0]);
     if (scp->tsw)
 	(*scp->tsw->te_term)(scp, &scp->ts);
     if (scp->ts != NULL)
 	free(scp->ts, M_DEVBUF);
     mtx_destroy(&sc->video_mtx);
 
     /* clear the structure */
     if (!(flags & SC_KERNEL_CONSOLE)) {
 	/* XXX: We need delete_dev() for this */
 	free(sc->dev, M_DEVBUF);
 #if 0
 	/* XXX: We need a ttyunregister for this */
 	free(sc->tty, M_DEVBUF);
 #endif
 #ifndef SC_NO_FONT_LOADING
 	free(sc->font_8, M_DEVBUF);
 	free(sc->font_14, M_DEVBUF);
 	free(sc->font_16, M_DEVBUF);
 #endif
 	/* XXX vtb, history */
     }
     bzero(sc, sizeof(*sc));
     sc->keyboard = -1;
     sc->adapter = -1;
 }
 
 static void
 scshutdown(__unused void *arg, __unused int howto)
 {
 
 	KASSERT(sc_console != NULL, ("sc_console != NULL"));
 	KASSERT(sc_console->sc != NULL, ("sc_console->sc != NULL"));
 	KASSERT(sc_console->sc->cur_scp != NULL,
 	    ("sc_console->sc->cur_scp != NULL"));
 
 	sc_touch_scrn_saver();
 	if (!cold &&
 	    sc_console->sc->cur_scp->index != sc_console->index &&
 	    sc_console->sc->cur_scp->smode.mode == VT_AUTO &&
 	    sc_console->smode.mode == VT_AUTO)
 		sc_switch_scr(sc_console->sc, sc_console->index);
 	shutdown_in_progress = TRUE;
 }
 
 static void
 scsuspend(__unused void *arg)
 {
 	int retry;
 
 	KASSERT(sc_console != NULL, ("sc_console != NULL"));
 	KASSERT(sc_console->sc != NULL, ("sc_console->sc != NULL"));
 	KASSERT(sc_console->sc->cur_scp != NULL,
 	    ("sc_console->sc->cur_scp != NULL"));
 
 	sc_susp_scr = sc_console->sc->cur_scp->index;
 	if (sc_no_suspend_vtswitch ||
 	    sc_susp_scr == sc_console->index) {
 		sc_touch_scrn_saver();
 		sc_susp_scr = -1;
 		return;
 	}
 	for (retry = 0; retry < 10; retry++) {
 		sc_switch_scr(sc_console->sc, sc_console->index);
 		if (!sc_console->sc->switch_in_progress)
 			break;
 		pause("scsuspend", hz);
 	}
 	suspend_in_progress = TRUE;
 }
 
 static void
 scresume(__unused void *arg)
 {
 
 	KASSERT(sc_console != NULL, ("sc_console != NULL"));
 	KASSERT(sc_console->sc != NULL, ("sc_console->sc != NULL"));
 	KASSERT(sc_console->sc->cur_scp != NULL,
 	    ("sc_console->sc->cur_scp != NULL"));
 
 	suspend_in_progress = FALSE;
 	if (sc_susp_scr < 0) {
 		update_font(sc_console->sc->cur_scp);
 		return;
 	}
 	sc_switch_scr(sc_console->sc, sc_susp_scr);
 }
 
 int
 sc_clean_up(scr_stat *scp)
 {
 #ifdef DEV_SPLASH
     int error;
 #endif
 
     if (scp->sc->flags & SC_SCRN_BLANKED) {
 	sc_touch_scrn_saver();
 #ifdef DEV_SPLASH
 	if ((error = wait_scrn_saver_stop(scp->sc)))
 	    return error;
 #endif
     }
     scp->status |= MOUSE_HIDDEN;
     sc_remove_mouse_image(scp);
     sc_remove_cutmarking(scp);
     return 0;
 }
 
 void
 sc_alloc_scr_buffer(scr_stat *scp, int wait, int discard)
 {
     sc_vtb_t new;
     sc_vtb_t old;
 
     old = scp->vtb;
     sc_vtb_init(&new, VTB_MEMORY, scp->xsize, scp->ysize, NULL, wait);
     if (!discard && (old.vtb_flags & VTB_VALID)) {
 	/* retain the current cursor position and buffer contants */
 	scp->cursor_oldpos = scp->cursor_pos;
 	/* 
 	 * This works only if the old buffer has the same size as or larger 
 	 * than the new one. XXX
 	 */
 	sc_vtb_copy(&old, 0, &new, 0, scp->xsize*scp->ysize);
 	scp->vtb = new;
     } else {
 	scp->vtb = new;
 	sc_vtb_destroy(&old);
     }
 
 #ifndef SC_NO_SYSMOUSE
     /* move the mouse cursor at the center of the screen */
     sc_mouse_move(scp, scp->xpixel / 2, scp->ypixel / 2);
 #endif
 }
 
 static scr_stat
 *alloc_scp(sc_softc_t *sc, int vty)
 {
     scr_stat *scp;
 
     /* assert(sc_malloc) */
 
     scp = (scr_stat *)malloc(sizeof(scr_stat), M_DEVBUF, M_WAITOK);
     init_scp(sc, vty, scp);
 
     sc_alloc_scr_buffer(scp, TRUE, TRUE);
     if (sc_init_emulator(scp, SC_DFLT_TERM))
 	sc_init_emulator(scp, "*");
 
 #ifndef SC_NO_CUTPASTE
     sc_alloc_cut_buffer(scp, TRUE);
 #endif
 
 #ifndef SC_NO_HISTORY
     sc_alloc_history_buffer(scp, 0, 0, TRUE);
 #endif
 
     return scp;
 }
 
 static void
 init_scp(sc_softc_t *sc, int vty, scr_stat *scp)
 {
     video_info_t info;
 
     bzero(scp, sizeof(*scp));
 
     scp->index = vty;
     scp->sc = sc;
     scp->status = 0;
     scp->mode = sc->initial_mode;
     vidd_get_info(sc->adp, scp->mode, &info);
     if (info.vi_flags & V_INFO_GRAPHICS) {
 	scp->status |= GRAPHICS_MODE;
 	scp->xpixel = info.vi_width;
 	scp->ypixel = info.vi_height;
 	scp->xsize = info.vi_width/info.vi_cwidth;
 	scp->ysize = info.vi_height/info.vi_cheight;
 	scp->font_size = 0;
 	scp->font = NULL;
     } else {
 	scp->xsize = info.vi_width;
 	scp->ysize = info.vi_height;
 	scp->xpixel = scp->xsize*info.vi_cwidth;
 	scp->ypixel = scp->ysize*info.vi_cheight;
     }
 
     scp->font_size = info.vi_cheight;
     scp->font_width = info.vi_cwidth;
 #ifndef SC_NO_FONT_LOADING
     if (info.vi_cheight < 14)
 	scp->font = sc->font_8;
     else if (info.vi_cheight >= 16)
 	scp->font = sc->font_16;
     else
 	scp->font = sc->font_14;
 #else
     scp->font = NULL;
 #endif
 
     sc_vtb_init(&scp->vtb, VTB_MEMORY, 0, 0, NULL, FALSE);
 #ifndef __sparc64__
     sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, 0, 0, NULL, FALSE);
 #endif
     scp->xoff = scp->yoff = 0;
     scp->xpos = scp->ypos = 0;
     scp->start = scp->xsize * scp->ysize - 1;
     scp->end = 0;
     scp->tsw = NULL;
     scp->ts = NULL;
     scp->rndr = NULL;
     scp->border = (SC_NORM_ATTR >> 4) & 0x0f;
     scp->curr_curs_attr = scp->dflt_curs_attr = sc->curs_attr;
     scp->mouse_cut_start = scp->xsize*scp->ysize;
     scp->mouse_cut_end = -1;
     scp->mouse_signal = 0;
     scp->mouse_pid = 0;
     scp->mouse_proc = NULL;
     scp->kbd_mode = K_XLATE;
     scp->bell_pitch = bios_value.bell_pitch;
     scp->bell_duration = BELL_DURATION;
     scp->status |= (bios_value.shift_state & NLKED);
     scp->status |= CURSOR_ENABLED | MOUSE_HIDDEN;
     scp->pid = 0;
     scp->proc = NULL;
     scp->smode.mode = VT_AUTO;
     scp->history = NULL;
     scp->history_pos = 0;
     scp->history_size = 0;
 }
 
 int
 sc_init_emulator(scr_stat *scp, char *name)
 {
     sc_term_sw_t *sw;
     sc_rndr_sw_t *rndr;
     void *p;
     int error;
 
     if (name == NULL)	/* if no name is given, use the current emulator */
 	sw = scp->tsw;
     else		/* ...otherwise find the named emulator */
 	sw = sc_term_match(name);
     if (sw == NULL)
 	return EINVAL;
 
     rndr = NULL;
     if (strcmp(sw->te_renderer, "*") != 0) {
 	rndr = sc_render_match(scp, sw->te_renderer,
 			       scp->status & (GRAPHICS_MODE | PIXEL_MODE));
     }
     if (rndr == NULL) {
 	rndr = sc_render_match(scp, scp->sc->adp->va_name,
 			       scp->status & (GRAPHICS_MODE | PIXEL_MODE));
 	if (rndr == NULL)
 	    return ENODEV;
     }
 
     if (sw == scp->tsw) {
 	error = (*sw->te_init)(scp, &scp->ts, SC_TE_WARM_INIT);
 	scp->rndr = rndr;
 	scp->rndr->init(scp);
 	sc_clear_screen(scp);
 	/* assert(error == 0); */
 	return error;
     }
 
     if (sc_malloc && (sw->te_size > 0))
 	p = malloc(sw->te_size, M_DEVBUF, M_NOWAIT);
     else
 	p = NULL;
     error = (*sw->te_init)(scp, &p, SC_TE_COLD_INIT);
     if (error)
 	return error;
 
     if (scp->tsw)
 	(*scp->tsw->te_term)(scp, &scp->ts);
     if (scp->ts != NULL)
 	free(scp->ts, M_DEVBUF);
     scp->tsw = sw;
     scp->ts = p;
     scp->rndr = rndr;
     scp->rndr->init(scp);
 
     /* XXX */
     (*sw->te_default_attr)(scp, user_default.std_color, user_default.rev_color);
     sc_clear_screen(scp);
 
     return 0;
 }
 
 /*
  * scgetc(flags) - get character from keyboard.
  * If flags & SCGETC_CN, then avoid harmful side effects.
  * If flags & SCGETC_NONBLOCK, then wait until a key is pressed, else
  * return NOKEY if there is nothing there.
  */
 static u_int
 scgetc(sc_softc_t *sc, u_int flags, struct sc_cnstate *sp)
 {
     scr_stat *scp;
 #ifndef SC_NO_HISTORY
     struct tty *tp;
 #endif
     u_int c;
     int this_scr;
     int f;
     int i;
 
     if (sc->kbd == NULL)
 	return NOKEY;
 
 next_code:
 #if 1
     /* I don't like this, but... XXX */
     if (flags & SCGETC_CN)
 	sccnupdate(sc->cur_scp);
 #endif
     scp = sc->cur_scp;
     /* first see if there is something in the keyboard port */
     for (;;) {
 	c = kbdd_read_char(sc->kbd, !(flags & SCGETC_NONBLOCK));
 	if (c == ERRKEY) {
 	    if (!(flags & SCGETC_CN))
 		sc_bell(scp, bios_value.bell_pitch, BELL_DURATION);
 	} else if (c == NOKEY)
 	    return c;
 	else
 	    break;
     }
 
     /* make screensaver happy */
     if (!(c & RELKEY))
 	sc_touch_scrn_saver();
 
     if (!(flags & SCGETC_CN))
 	random_harvest_queue(&c, sizeof(c), 1, RANDOM_KEYBOARD);
 
     if (sc->kbd_open_level == 0 && scp->kbd_mode != K_XLATE)
 	return KEYCHAR(c);
 
     /* if scroll-lock pressed allow history browsing */
     if (!ISGRAPHSC(scp) && scp->history && scp->status & SLKED) {
 
 	scp->status &= ~CURSOR_ENABLED;
 	sc_remove_cursor_image(scp);
 
 #ifndef SC_NO_HISTORY
 	if (!(scp->status & BUFFER_SAVED)) {
 	    scp->status |= BUFFER_SAVED;
 	    sc_hist_save(scp);
 	}
 	switch (c) {
 	/* FIXME: key codes */
 	case SPCLKEY | FKEY | F(49):  /* home key */
 	    sc_remove_cutmarking(scp);
 	    sc_hist_home(scp);
 	    goto next_code;
 
 	case SPCLKEY | FKEY | F(57):  /* end key */
 	    sc_remove_cutmarking(scp);
 	    sc_hist_end(scp);
 	    goto next_code;
 
 	case SPCLKEY | FKEY | F(50):  /* up arrow key */
 	    sc_remove_cutmarking(scp);
 	    if (sc_hist_up_line(scp))
 		if (!(flags & SCGETC_CN))
 		    sc_bell(scp, bios_value.bell_pitch, BELL_DURATION);
 	    goto next_code;
 
 	case SPCLKEY | FKEY | F(58):  /* down arrow key */
 	    sc_remove_cutmarking(scp);
 	    if (sc_hist_down_line(scp))
 		if (!(flags & SCGETC_CN))
 		    sc_bell(scp, bios_value.bell_pitch, BELL_DURATION);
 	    goto next_code;
 
 	case SPCLKEY | FKEY | F(51):  /* page up key */
 	    sc_remove_cutmarking(scp);
 	    for (i=0; i<scp->ysize; i++)
 	    if (sc_hist_up_line(scp)) {
 		if (!(flags & SCGETC_CN))
 		    sc_bell(scp, bios_value.bell_pitch, BELL_DURATION);
 		break;
 	    }
 	    goto next_code;
 
 	case SPCLKEY | FKEY | F(59):  /* page down key */
 	    sc_remove_cutmarking(scp);
 	    for (i=0; i<scp->ysize; i++)
 	    if (sc_hist_down_line(scp)) {
 		if (!(flags & SCGETC_CN))
 		    sc_bell(scp, bios_value.bell_pitch, BELL_DURATION);
 		break;
 	    }
 	    goto next_code;
 	}
 #endif /* SC_NO_HISTORY */
     }
 
     /* 
      * Process and consume special keys here.  Return a plain char code
      * or a char code with the META flag or a function key code.
      */
     if (c & RELKEY) {
 	/* key released */
 	/* goto next_code */
     } else {
 	/* key pressed */
 	if (c & SPCLKEY) {
 	    c &= ~SPCLKEY;
 	    switch (KEYCHAR(c)) {
 	    /* LOCKING KEYS */
 	    case NLK: case CLK: case ALK:
 		break;
 	    case SLK:
 		(void)kbdd_ioctl(sc->kbd, KDGKBSTATE, (caddr_t)&f);
 		if (f & SLKED) {
 		    scp->status |= SLKED;
 		} else {
 		    if (scp->status & SLKED) {
 			scp->status &= ~SLKED;
 #ifndef SC_NO_HISTORY
 			if (scp->status & BUFFER_SAVED) {
 			    if (!sc_hist_restore(scp))
 				sc_remove_cutmarking(scp);
 			    scp->status &= ~BUFFER_SAVED;
 			    scp->status |= CURSOR_ENABLED;
 			    sc_draw_cursor_image(scp);
 			}
 			/* Only safe in Giant-locked context. */
 			tp = SC_DEV(sc, scp->index);
 			if (!(flags & SCGETC_CN) && tty_opened_ns(tp))
 			    sctty_outwakeup(tp);
 #endif
 		    }
 		}
 		break;
 
 	    case PASTE:
 #ifndef SC_NO_CUTPASTE
 		sc_mouse_paste(scp);
 #endif
 		break;
 
 	    /* NON-LOCKING KEYS */
 	    case NOP:
 	    case LSH:  case RSH:  case LCTR: case RCTR:
 	    case LALT: case RALT: case ASH:  case META:
 		break;
 
 	    case BTAB:
 		if (!(sc->flags & SC_SCRN_BLANKED))
 		    return c;
 		break;
 
 	    case SPSC:
 #ifdef DEV_SPLASH
 		/* force activatation/deactivation of the screen saver */
 		if (!(sc->flags & SC_SCRN_BLANKED)) {
 		    run_scrn_saver = TRUE;
 		    sc->scrn_time_stamp -= scrn_blank_time;
 		}
 		if (cold) {
 		    /*
 		     * While devices are being probed, the screen saver need
 		     * to be invoked explicitly. XXX
 		     */
 		    if (sc->flags & SC_SCRN_BLANKED) {
 			scsplash_stick(FALSE);
 			stop_scrn_saver(sc, current_saver);
 		    } else {
 			if (!ISGRAPHSC(scp)) {
 			    scsplash_stick(TRUE);
 			    (*current_saver)(sc, TRUE);
 			}
 		    }
 		}
 #endif /* DEV_SPLASH */
 		break;
 
 	    case RBT:
 #ifndef SC_DISABLE_REBOOT
 		if (enable_reboot && !(flags & SCGETC_CN))
 			shutdown_nice(0);
 #endif
 		break;
 
 	    case HALT:
 #ifndef SC_DISABLE_REBOOT
 		if (enable_reboot && !(flags & SCGETC_CN))
 			shutdown_nice(RB_HALT);
 #endif
 		break;
 
 	    case PDWN:
 #ifndef SC_DISABLE_REBOOT
 		if (enable_reboot && !(flags & SCGETC_CN))
 			shutdown_nice(RB_HALT|RB_POWEROFF);
 #endif
 		break;
 
 	    case SUSP:
 		power_pm_suspend(POWER_SLEEP_STATE_SUSPEND);
 		break;
 	    case STBY:
 		power_pm_suspend(POWER_SLEEP_STATE_STANDBY);
 		break;
 
 	    case DBG:
 #ifndef SC_DISABLE_KDBKEY
 		if (enable_kdbkey)
 			kdb_break();
 #endif
 		break;
 
 	    case PNC:
 		if (enable_panic_key)
 			panic("Forced by the panic key");
 		break;
 
 	    case NEXT:
 		this_scr = scp->index;
 		for (i = (this_scr - sc->first_vty + 1)%sc->vtys;
 			sc->first_vty + i != this_scr; 
 			i = (i + 1)%sc->vtys) {
 		    struct tty *tp = SC_DEV(sc, sc->first_vty + i);
 		    if (tty_opened_ns(tp)) {
 			sc_switch_scr(scp->sc, sc->first_vty + i);
 			break;
 		    }
 		}
 		break;
 
 	    case PREV:
 		this_scr = scp->index;
 		for (i = (this_scr - sc->first_vty + sc->vtys - 1)%sc->vtys;
 			sc->first_vty + i != this_scr;
 			i = (i + sc->vtys - 1)%sc->vtys) {
 		    struct tty *tp = SC_DEV(sc, sc->first_vty + i);
 		    if (tty_opened_ns(tp)) {
 			sc_switch_scr(scp->sc, sc->first_vty + i);
 			break;
 		    }
 		}
 		break;
 
 	    default:
 		if (KEYCHAR(c) >= F_SCR && KEYCHAR(c) <= L_SCR) {
 		    sc_switch_scr(scp->sc, sc->first_vty + KEYCHAR(c) - F_SCR);
 		    break;
 		}
 		/* assert(c & FKEY) */
 		if (!(sc->flags & SC_SCRN_BLANKED))
 		    return c;
 		break;
 	    }
 	    /* goto next_code */
 	} else {
 	    /* regular keys (maybe MKEY is set) */
 #if !defined(SC_DISABLE_KDBKEY) && defined(KDB)
 	    if (enable_kdbkey)
 		kdb_alt_break(c, &sc->sc_altbrk);
 #endif
 	    if (!(sc->flags & SC_SCRN_BLANKED))
 		return c;
 	}
     }
 
     goto next_code;
 }
 
 static int
 sctty_mmap(struct tty *tp, vm_ooffset_t offset, vm_paddr_t *paddr,
     int nprot, vm_memattr_t *memattr)
 {
     scr_stat *scp;
 
     scp = sc_get_stat(tp);
     if (scp != scp->sc->cur_scp)
 	return -1;
     return vidd_mmap(scp->sc->adp, offset, paddr, nprot, memattr);
 }
 
 static void
 update_font(scr_stat *scp)
 {
 #ifndef SC_NO_FONT_LOADING
     /* load appropriate font */
     if (!(scp->status & GRAPHICS_MODE)) {
 	if (!(scp->status & PIXEL_MODE) && ISFONTAVAIL(scp->sc->adp->va_flags)) {
 	    if (scp->font_size < 14) {
 		if (scp->sc->fonts_loaded & FONT_8)
 		    sc_load_font(scp, 0, 8, 8, scp->sc->font_8, 0, 256);
 	    } else if (scp->font_size >= 16) {
 		if (scp->sc->fonts_loaded & FONT_16)
 		    sc_load_font(scp, 0, 16, 8, scp->sc->font_16, 0, 256);
 	    } else {
 		if (scp->sc->fonts_loaded & FONT_14)
 		    sc_load_font(scp, 0, 14, 8, scp->sc->font_14, 0, 256);
 	    }
 	    /*
 	     * FONT KLUDGE:
 	     * This is an interim kludge to display correct font.
 	     * Always use the font page #0 on the video plane 2.
 	     * Somehow we cannot show the font in other font pages on
 	     * some video cards... XXX
 	     */ 
 	    sc_show_font(scp, 0);
 	}
 	mark_all(scp);
     }
 #endif /* !SC_NO_FONT_LOADING */
 }
 
 static int
 save_kbd_state(scr_stat *scp)
 {
     int state;
     int error;
 
     error = kbdd_ioctl(scp->sc->kbd, KDGKBSTATE, (caddr_t)&state);
     if (error == ENOIOCTL)
 	error = ENODEV;
     if (error == 0) {
 	scp->status &= ~LOCK_MASK;
 	scp->status |= state;
     }
     return error;
 }
 
 static int
 update_kbd_state(scr_stat *scp, int new_bits, int mask)
 {
     int state;
     int error;
 
     if (mask != LOCK_MASK) {
 	error = kbdd_ioctl(scp->sc->kbd, KDGKBSTATE, (caddr_t)&state);
 	if (error == ENOIOCTL)
 	    error = ENODEV;
 	if (error)
 	    return error;
 	state &= ~mask;
 	state |= new_bits & mask;
     } else {
 	state = new_bits & LOCK_MASK;
     }
     error = kbdd_ioctl(scp->sc->kbd, KDSKBSTATE, (caddr_t)&state);
     if (error == ENOIOCTL)
 	error = ENODEV;
     return error;
 }
 
 static int
 update_kbd_leds(scr_stat *scp, int which)
 {
     int error;
 
     which &= LOCK_MASK;
     error = kbdd_ioctl(scp->sc->kbd, KDSETLED, (caddr_t)&which);
     if (error == ENOIOCTL)
 	error = ENODEV;
     return error;
 }
 
 int
 set_mode(scr_stat *scp)
 {
     video_info_t info;
 
     /* reject unsupported mode */
     if (vidd_get_info(scp->sc->adp, scp->mode, &info))
 	return 1;
 
     /* if this vty is not currently showing, do nothing */
     if (scp != scp->sc->cur_scp)
 	return 0;
 
     /* setup video hardware for the given mode */
     vidd_set_mode(scp->sc->adp, scp->mode);
     scp->rndr->init(scp);
 #ifndef __sparc64__
     sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize,
 		(void *)scp->sc->adp->va_window, FALSE);
 #endif
 
     update_font(scp);
 
     sc_set_border(scp, scp->border);
     sc_set_cursor_image(scp);
 
     return 0;
 }
 
 void
 sc_set_border(scr_stat *scp, int color)
 {
     SC_VIDEO_LOCK(scp->sc);
     (*scp->rndr->draw_border)(scp, color);
     SC_VIDEO_UNLOCK(scp->sc);
 }
 
 #ifndef SC_NO_FONT_LOADING
 void
 sc_load_font(scr_stat *scp, int page, int size, int width, u_char *buf,
 	     int base, int count)
 {
     sc_softc_t *sc;
 
     sc = scp->sc;
     sc->font_loading_in_progress = TRUE;
     vidd_load_font(sc->adp, page, size, width, buf, base, count);
     sc->font_loading_in_progress = FALSE;
 }
 
 void
 sc_save_font(scr_stat *scp, int page, int size, int width, u_char *buf,
 	     int base, int count)
 {
     sc_softc_t *sc;
 
     sc = scp->sc;
     sc->font_loading_in_progress = TRUE;
     vidd_save_font(sc->adp, page, size, width, buf, base, count);
     sc->font_loading_in_progress = FALSE;
 }
 
 void
 sc_show_font(scr_stat *scp, int page)
 {
     vidd_show_font(scp->sc->adp, page);
 }
 #endif /* !SC_NO_FONT_LOADING */
 
 void
 sc_paste(scr_stat *scp, const u_char *p, int count) 
 {
     struct tty *tp;
     u_char *rmap;
 
     tp = SC_DEV(scp->sc, scp->sc->cur_scp->index);
     if (!tty_opened_ns(tp))
 	return;
     rmap = scp->sc->scr_rmap;
     for (; count > 0; --count)
 	ttydisc_rint(tp, rmap[*p++], 0);
     ttydisc_rint_done(tp);
 }
 
 void
 sc_respond(scr_stat *scp, const u_char *p, int count, int wakeup) 
 {
     struct tty *tp;
 
     tp = SC_DEV(scp->sc, scp->sc->cur_scp->index);
     if (!tty_opened_ns(tp))
 	return;
     ttydisc_rint_simple(tp, p, count);
     if (wakeup) {
 	/* XXX: we can't always call ttydisc_rint_done() here! */
 	ttydisc_rint_done(tp);
     }
 }
 
 void
 sc_bell(scr_stat *scp, int pitch, int duration)
 {
     if (cold || kdb_active || shutdown_in_progress || !enable_bell)
 	return;
 
     if (scp != scp->sc->cur_scp && (scp->sc->flags & SC_QUIET_BELL))
 	return;
 
     if (scp->sc->flags & SC_VISUAL_BELL) {
 	if (scp->sc->blink_in_progress)
 	    return;
 	scp->sc->blink_in_progress = 3;
 	if (scp != scp->sc->cur_scp)
 	    scp->sc->blink_in_progress += 2;
 	blink_screen(scp->sc->cur_scp);
     } else if (duration != 0 && pitch != 0) {
 	if (scp != scp->sc->cur_scp)
 	    pitch *= 2;
 	sysbeep(1193182 / pitch, duration);
     }
 }
 
 static void
 blink_screen(void *arg)
 {
     scr_stat *scp = arg;
     struct tty *tp;
 
     if (ISGRAPHSC(scp) || (scp->sc->blink_in_progress <= 1)) {
 	scp->sc->blink_in_progress = 0;
     	mark_all(scp);
 	tp = SC_DEV(scp->sc, scp->index);
 	if (tty_opened_ns(tp))
 	    sctty_outwakeup(tp);
 	if (scp->sc->delayed_next_scr)
 	    sc_switch_scr(scp->sc, scp->sc->delayed_next_scr - 1);
     }
     else {
 	(*scp->rndr->draw)(scp, 0, scp->xsize*scp->ysize, 
 			   scp->sc->blink_in_progress & 1);
 	scp->sc->blink_in_progress--;
 	callout_reset_sbt(&scp->sc->cblink, SBT_1S / 15, 0,
 	    blink_screen, scp, C_PREL(0));
     }
 }
 
 /*
  * Until sc_attach_unit() gets called no dev structures will be available
  * to store the per-screen current status.  This is the case when the
  * kernel is initially booting and needs access to its console.  During
  * this early phase of booting the console's current status is kept in
  * one statically defined scr_stat structure, and any pointers to the
  * dev structures will be NULL.
  */
 
 static scr_stat *
 sc_get_stat(struct tty *tp)
 {
 	if (tp == NULL)
 		return (&main_console);
 	return (SC_STAT(tp));
 }
 
 /*
  * Allocate active keyboard. Try to allocate "kbdmux" keyboard first, and,
  * if found, add all non-busy keyboards to "kbdmux". Otherwise look for
  * any keyboard.
  */
 
 static int
 sc_allocate_keyboard(sc_softc_t *sc, int unit)
 {
 	int		 idx0, idx;
 	keyboard_t	*k0, *k;
 	keyboard_info_t	 ki;
 
 	idx0 = kbd_allocate("kbdmux", -1, (void *)&sc->keyboard, sckbdevent, sc);
 	if (idx0 != -1) {
 		k0 = kbd_get_keyboard(idx0);
 
 		for (idx = kbd_find_keyboard2("*", -1, 0);
 		     idx != -1;
 		     idx = kbd_find_keyboard2("*", -1, idx + 1)) {
 			k = kbd_get_keyboard(idx);
 
 			if (idx == idx0 || KBD_IS_BUSY(k))
 				continue;
 
 			bzero(&ki, sizeof(ki));
 			strcpy(ki.kb_name, k->kb_name);
 			ki.kb_unit = k->kb_unit;
 
 			(void)kbdd_ioctl(k0, KBADDKBD, (caddr_t) &ki);
 		}
 	} else
 		idx0 = kbd_allocate("*", unit, (void *)&sc->keyboard, sckbdevent, sc);
 
 	return (idx0);
 }
Index: projects/clang390-import/sys/dev/uart/uart_cpu_powerpc.c
===================================================================
--- projects/clang390-import/sys/dev/uart/uart_cpu_powerpc.c	(revision 305016)
+++ projects/clang390-import/sys/dev/uart/uart_cpu_powerpc.c	(revision 305017)
@@ -1,203 +1,203 @@
 /*-
  * Copyright (c) 2006 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/bus.h>
 #include <machine/ofw_machdep.h>
 
+#include <dev/ofw/ofw_bus_subr.h>
 #include <dev/ofw/openfirm.h>
 #include <dev/uart/uart.h>
 #include <dev/uart/uart_cpu.h>
 
 bus_space_tag_t uart_bus_space_io = &bs_le_tag;
 bus_space_tag_t uart_bus_space_mem = &bs_le_tag;
 
 int
 uart_cpu_eqres(struct uart_bas *b1, struct uart_bas *b2)
 {
 
 	return ((pmap_kextract(b1->bsh) == pmap_kextract(b2->bsh)) ? 1 : 0);
 }
 
 static int
 ofw_get_uart_console(phandle_t opts, phandle_t *result, const char *inputdev,
     const char *outputdev)
 {
 	char buf[64];
 	phandle_t input;
 
 	if (OF_getprop(opts, inputdev, buf, sizeof(buf)) == -1)
 		return (ENXIO);
 	input = OF_finddevice(buf);
 	if (input == -1)
 		return (ENXIO);
 
 	if (outputdev != NULL) {
 		if (OF_getprop(opts, outputdev, buf, sizeof(buf)) == -1)
 			return (ENXIO);
 		if (OF_finddevice(buf) != input)
 			return (ENXIO);
 	}
 
 	*result = input;
 	return (0);
 }
 
 static int
 ofw_get_console_phandle_path(phandle_t node, phandle_t *result,
     const char *prop)
 {
 	union {
 		char buf[64];
 		phandle_t ref;
 	} field;
 	phandle_t output;
 	ssize_t size;
 
 	size = OF_getproplen(node, prop);
 	if (size == -1)
 		return (ENXIO);
 	OF_getprop(node, prop, &field, sizeof(field));
 
 	/* This property might be either a ihandle or path. Hooray. */
 
 	output = -1;
 	if (field.buf[size - 1] == 0)
 		output = OF_finddevice(field.buf);
 	if (output == -1 && size == 4)
 		output = OF_instance_to_package(field.ref);
 	
 	if (output != -1) {
 		*result = output;
 		return (0);
 	}
 
 	return (ENXIO);
 }
 
 int
 uart_cpu_getdev(int devtype, struct uart_devinfo *di)
 {
 	char buf[64];
 	struct uart_class *class;
 	phandle_t input, opts, chosen;
 	int error;
 
 	opts = OF_finddevice("/options");
 	chosen = OF_finddevice("/chosen");
 	switch (devtype) {
 	case UART_DEV_CONSOLE:
 		error = ENXIO;
 		if (chosen != -1 && error != 0)
 			error = ofw_get_uart_console(chosen, &input,
 			    "stdout-path", NULL);
 		if (chosen != -1 && error != 0)
 			error = ofw_get_uart_console(chosen, &input,
 			    "linux,stdout-path", NULL);
 		if (chosen != -1 && error != 0)
 			error = ofw_get_console_phandle_path(chosen, &input,
 			    "stdout");
 		if (chosen != -1 && error != 0)
 			error = ofw_get_uart_console(chosen, &input,
 			    "stdin-path", NULL);
 		if (chosen != -1 && error != 0)
 			error = ofw_get_console_phandle_path(chosen, &input,
 			    "stdin");
 		if (opts != -1 && error != 0)
 			error = ofw_get_uart_console(opts, &input,
 			    "input-device", "output-device");
 		if (opts != -1 && error != 0)
 			error = ofw_get_uart_console(opts, &input,
 			    "input-device-1", "output-device-1");
 		if (error != 0) {
 			input = OF_finddevice("serial0"); /* Last ditch */
 			if (input == -1)
 				error = (ENXIO);
 		}
 
 		if (error != 0)
 			return (error);
 		break;
 	case UART_DEV_DBGPORT:
 		if (!getenv_string("hw.uart.dbgport", buf, sizeof(buf)))
 			return (ENXIO);
 		input = OF_finddevice(buf);
 		if (input == -1)
 			return (ENXIO);
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	if (OF_getprop(input, "device_type", buf, sizeof(buf)) == -1)
 		return (ENXIO);
 	if (strcmp(buf, "serial") != 0)
 		return (ENXIO);
-	if (OF_getprop(input, "compatible", buf, sizeof(buf)) == -1)
-		return (ENXIO);
 
-	if (strncmp(buf, "chrp,es", 7) == 0) {
+	if (ofw_bus_node_is_compatible(input, "chrp,es")) {
 		class = &uart_z8530_class;
 		di->bas.regshft = 4;
 		di->bas.chan = 1;
-	} else if (strcmp(buf,"ns16550") == 0 || strcmp(buf,"ns8250") == 0) {
+	} else if (ofw_bus_node_is_compatible(input,"ns16550") ||
+	    ofw_bus_node_is_compatible(input,"ns8250")) {
 		class = &uart_ns8250_class;
 		di->bas.regshft = 0;
 		di->bas.chan = 0;
 	} else
 		return (ENXIO);
 
 	if (class == NULL)
 		return (ENXIO);
 
 	error = OF_decode_addr(input, 0, &di->bas.bst, &di->bas.bsh, NULL);
 	if (error)
 		return (error);
 
 	di->ops = uart_getops(class);
 
 	if (OF_getprop(input, "clock-frequency", &di->bas.rclk, 
 	    sizeof(di->bas.rclk)) == -1)
 		di->bas.rclk = 230400;
 	if (OF_getprop(input, "current-speed", &di->baudrate, 
 	    sizeof(di->baudrate)) == -1)
 		di->baudrate = 0;
 	OF_getprop(input, "reg-shift", &di->bas.regshft,
 	    sizeof(di->bas.regshft));
 
 	di->databits = 8;
 	di->stopbits = 1;
 	di->parity = UART_PARITY_NONE;
 	return (0);
 }
 
Index: projects/clang390-import/sys/i386/i386/pmap.c
===================================================================
--- projects/clang390-import/sys/i386/i386/pmap.c	(revision 305016)
+++ projects/clang390-import/sys/i386/i386/pmap.c	(revision 305017)
@@ -1,5628 +1,5624 @@
 /*-
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and William Jolitz of UUNET Technologies Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
  */
 /*-
  * Copyright (c) 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jake Burkholder,
  * Safeport Network Services, and Network Associates Laboratories, the
  * Security Research Division of Network Associates, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
  * CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Manages physical address maps.
  *
  *	Since the information managed by this module is
  *	also stored by the logical address mapping module,
  *	this module may throw away valid virtual-to-physical
  *	mappings at almost any time.  However, invalidations
  *	of virtual-to-physical mappings must be done as
  *	requested.
  *
  *	In order to cope with hardware architectures which
  *	make virtual-to-physical map invalidates expensive,
  *	this module may delay invalidate or reduced protection
  *	operations until such time as they are actually
  *	necessary.  This module is given full information as
  *	to which processors are currently using which maps,
  *	and to when physical maps must be made correct.
  */
 
 #include "opt_apic.h"
 #include "opt_cpu.h"
 #include "opt_pmap.h"
 #include "opt_smp.h"
 #include "opt_xbox.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sf_buf.h>
 #include <sys/sx.h>
 #include <sys/vmmeter.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/smp.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/vm_radix.h>
 #include <vm/vm_reserv.h>
 #include <vm/uma.h>
 
 #ifdef DEV_APIC
 #include <sys/bus.h>
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
 #endif
 #include <machine/cpu.h>
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 
 #ifdef XBOX
 #include <machine/xbox.h>
 #endif
 
 #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
 #define CPU_ENABLE_SSE
 #endif
 
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
 #endif
 
 #if !defined(DIAGNOSTIC)
 #ifdef __GNUC_GNU_INLINE__
 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
 #else
 #define PMAP_INLINE	extern inline
 #endif
 #else
 #define PMAP_INLINE
 #endif
 
 #ifdef PV_STATS
 #define PV_STAT(x)	do { x ; } while (0)
 #else
 #define PV_STAT(x)	do { } while (0)
 #endif
 
 #define	pa_index(pa)	((pa) >> PDRSHIFT)
 #define	pa_to_pvh(pa)	(&pv_table[pa_index(pa)])
 
 /*
  * Get PDEs and PTEs for user/kernel address space
  */
 #define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
 #define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
 
 #define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
 #define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
 #define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
 #define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
 #define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
 
 #define pmap_pte_set_w(pte, v)	((v) ? atomic_set_int((u_int *)(pte), PG_W) : \
     atomic_clear_int((u_int *)(pte), PG_W))
 #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
 
 struct pmap kernel_pmap_store;
 LIST_HEAD(pmaplist, pmap);
 static struct pmaplist allpmaps;
 static struct mtx allpmaps_lock;
 
 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
 int pgeflag = 0;		/* PG_G or-in */
 int pseflag = 0;		/* PG_PS or-in */
 
 static int nkpt = NKPT;
 vm_offset_t kernel_vm_end = KERNBASE + NKPT * NBPDR;
 extern u_int32_t KERNend;
 extern u_int32_t KPTphys;
 
 #if defined(PAE) || defined(PAE_TABLES)
 pt_entry_t pg_nx;
 static uma_zone_t pdptzone;
 #endif
 
 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
 
 static int pat_works = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1,
     "Is page attribute table fully functional?");
 
 static int pg_ps_enabled = 1;
 SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &pg_ps_enabled, 0, "Are large page mappings enabled?");
 
 #define	PAT_INDEX_SIZE	8
 static int pat_index[PAT_INDEX_SIZE];	/* cache mode to PAT index conversion */
 
 /*
  * pmap_mapdev support pre initialization (i.e. console)
  */
 #define	PMAP_PREINIT_MAPPING_COUNT	8
 static struct pmap_preinit_mapping {
 	vm_paddr_t	pa;
 	vm_offset_t	va;
 	vm_size_t	sz;
 	int		mode;
 } pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT];
 static int pmap_initialized;
 
 static struct rwlock_padalign pvh_global_lock;
 
 /*
  * Data for the pv entry allocation mechanism
  */
 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
 static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
 static struct md_page *pv_table;
 static int shpgperproc = PMAP_SHPGPERPROC;
 
 struct pv_chunk *pv_chunkbase;		/* KVA block for pv_chunks */
 int pv_maxchunks;			/* How many chunks we have KVA for */
 vm_offset_t pv_vafree;			/* freelist stored in the PTE */
 
 /*
  * All those kernel PT submaps that BSD is so fond of
  */
 struct sysmaps {
 	struct	mtx lock;
 	pt_entry_t *CMAP1;
 	pt_entry_t *CMAP2;
 	caddr_t	CADDR1;
 	caddr_t	CADDR2;
 };
 static struct sysmaps sysmaps_pcpu[MAXCPU];
 pt_entry_t *CMAP3;
 static pd_entry_t *KPTD;
 caddr_t ptvmmap = 0;
 caddr_t CADDR3;
 struct msgbuf *msgbufp = NULL;
 
 /*
  * Crashdump maps.
  */
 static caddr_t crashdumpmap;
 
 static pt_entry_t *PMAP1 = NULL, *PMAP2;
 static pt_entry_t *PADDR1 = NULL, *PADDR2;
 #ifdef SMP
 static int PMAP1cpu;
 static int PMAP1changedcpu;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 
 	   &PMAP1changedcpu, 0,
 	   "Number of times pmap_pte_quick changed CPU with same PMAP1");
 #endif
 static int PMAP1changed;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 
 	   &PMAP1changed, 0,
 	   "Number of times pmap_pte_quick changed PMAP1");
 static int PMAP1unchanged;
 SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 
 	   &PMAP1unchanged, 0,
 	   "Number of times pmap_pte_quick didn't change PMAP1");
 static struct mtx PMAP2mutex;
 
 static void	free_pv_chunk(struct pv_chunk *pc);
 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
 static void	pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
 static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
 static void	pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa);
 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
 		    vm_offset_t va);
 static int	pmap_pvh_wired_mappings(struct md_page *pvh, int count);
 
 static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
 static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot);
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
 static void pmap_flush_page(vm_page_t m);
 static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
 static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
 static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
 static boolean_t pmap_is_referenced_pvh(struct md_page *pvh);
 static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
 static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
 static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
     vm_prot_t prot);
 static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
 static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
     struct spglist *free);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
     struct spglist *free);
 static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
     struct spglist *free);
 static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
 					vm_offset_t va);
 static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
     vm_page_t m);
 static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
     pd_entry_t newpde);
 static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
 
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags);
 
 static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags);
 static void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free);
 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
 static void pmap_pte_release(pt_entry_t *pte);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, struct spglist *);
 #if defined(PAE) || defined(PAE_TABLES)
 static void *pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags,
     int wait);
 #endif
 static void pmap_set_pg(void);
 
 static __inline void pagezero(void *page);
 
 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
 
 /*
  * If you get an error here, then you set KVA_PAGES wrong! See the
  * description of KVA_PAGES in sys/i386/include/pmap.h. It must be
  * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE.
  */
 CTASSERT(KERNBASE % (1 << 24) == 0);
 
 /*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On the i386 this is called after mapping has already been enabled
  *	and just syncs the pmap module with what has already been done.
  *	[We can't call it easily with mapping off since the kernel is not
  *	mapped with PA == VA, hence we would have to relocate every address
  *	from the linked base (virtual) address "KERNBASE" to the actual
  *	(physical) address starting relative to 0]
  */
 void
 pmap_bootstrap(vm_paddr_t firstaddr)
 {
 	vm_offset_t va;
 	pt_entry_t *pte, *unused;
 	struct sysmaps *sysmaps;
 	int i;
 
 	/*
 	 * Add a physical memory segment (vm_phys_seg) corresponding to the
 	 * preallocated kernel page table pages so that vm_page structures
 	 * representing these pages will be created.  The vm_page structures
 	 * are required for promotion of the corresponding kernel virtual
 	 * addresses to superpage mappings.
 	 */
 	vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt));
 
 	/*
 	 * Initialize the first available kernel virtual address.  However,
 	 * using "firstaddr" may waste a few pages of the kernel virtual
 	 * address space, because locore may not have mapped every physical
 	 * page that it allocated.  Preferably, locore would provide a first
 	 * unused virtual address in addition to "firstaddr".
 	 */
 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
 
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
 	/*
 	 * Initialize the kernel pmap (which is statically allocated).
 	 */
 	PMAP_LOCK_INIT(kernel_pmap);
 	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
 #if defined(PAE) || defined(PAE_TABLES)
 	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
 #endif
 	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 
  	/*
 	 * Initialize the global pv list lock.
 	 */
 	rw_init(&pvh_global_lock, "pmap pv global");
 
 	LIST_INIT(&allpmaps);
 
 	/*
 	 * Request a spin mutex so that changes to allpmaps cannot be
 	 * preempted by smp_rendezvous_cpus().  Otherwise,
 	 * pmap_update_pde_kernel() could access allpmaps while it is
 	 * being changed.
 	 */
 	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 
 	/*
 	 * Reserve some special page table entries/VA space for temporary
 	 * mapping of pages.
 	 */
 #define	SYSMAP(c, p, v, n)	\
 	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
 
 	va = virtual_avail;
 	pte = vtopte(va);
 
 	/*
 	 * CMAP1/CMAP2 are used for zeroing and copying pages.
 	 * CMAP3 is used for the idle process page zeroing.
 	 */
 	for (i = 0; i < MAXCPU; i++) {
 		sysmaps = &sysmaps_pcpu[i];
 		mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
 		SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
 		SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
 	}
 	SYSMAP(caddr_t, CMAP3, CADDR3, 1)
 
 	/*
 	 * Crashdump maps.
 	 */
 	SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
 
 	/*
 	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
 	 */
 	SYSMAP(caddr_t, unused, ptvmmap, 1)
 
 	/*
 	 * msgbufp is used to map the system message buffer.
 	 */
 	SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(msgbufsize)))
 
 	/*
 	 * KPTmap is used by pmap_kextract().
 	 *
 	 * KPTmap is first initialized by locore.  However, that initial
 	 * KPTmap can only support NKPT page table pages.  Here, a larger
 	 * KPTmap is created that can support KVA_PAGES page table pages.
 	 */
 	SYSMAP(pt_entry_t *, KPTD, KPTmap, KVA_PAGES)
 
 	for (i = 0; i < NKPT; i++)
 		KPTD[i] = (KPTphys + (i << PAGE_SHIFT)) | pgeflag | PG_RW | PG_V;
 
 	/*
 	 * Adjust the start of the KPTD and KPTmap so that the implementation
 	 * of pmap_kextract() and pmap_growkernel() can be made simpler.
 	 */
 	KPTD -= KPTDI;
 	KPTmap -= i386_btop(KPTDI << PDRSHIFT);
 
 	/*
 	 * PADDR1 and PADDR2 are used by pmap_pte_quick() and pmap_pte(),
 	 * respectively.
 	 */
 	SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1)
 	SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1)
 
 	mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
 
 	virtual_avail = va;
 
 	/*
 	 * Leave in place an identity mapping (virt == phys) for the low 1 MB
 	 * physical memory region that is used by the ACPI wakeup code.  This
 	 * mapping must not have PG_G set. 
 	 */
 #ifdef XBOX
 	/* FIXME: This is gross, but needed for the XBOX. Since we are in such
 	 * an early stadium, we cannot yet neatly map video memory ... :-(
 	 * Better fixes are very welcome! */
 	if (!arch_i386_is_xbox)
 #endif
 	for (i = 1; i < NKPT; i++)
 		PTD[i] = 0;
 
 	/* Initialize the PAT MSR if present. */
 	pmap_init_pat();
 
 	/* Turn on PG_G on kernel page(s) */
 	pmap_set_pg();
 }
 
 static void
 pmap_init_qpages(void)
 {
 	struct pcpu *pc;
 	int i;
 
 	CPU_FOREACH(i) {
 		pc = pcpu_find(i);
 		pc->pc_qmap_addr = kva_alloc(PAGE_SIZE);
 		if (pc->pc_qmap_addr == 0)
 			panic("pmap_init_qpages: unable to allocate KVA");
 	}
 }
 
 SYSINIT(qpages_init, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_qpages, NULL);
 
 /*
  * Setup the PAT MSR.
  */
 void
 pmap_init_pat(void)
 {
 	int pat_table[PAT_INDEX_SIZE];
 	uint64_t pat_msr;
 	u_long cr0, cr4;
 	int i;
 
 	/* Set default PAT index table. */
 	for (i = 0; i < PAT_INDEX_SIZE; i++)
 		pat_table[i] = -1;
 	pat_table[PAT_WRITE_BACK] = 0;
 	pat_table[PAT_WRITE_THROUGH] = 1;
 	pat_table[PAT_UNCACHEABLE] = 3;
 	pat_table[PAT_WRITE_COMBINING] = 3;
 	pat_table[PAT_WRITE_PROTECTED] = 3;
 	pat_table[PAT_UNCACHED] = 3;
 
 	/* Bail if this CPU doesn't implement PAT. */
 	if ((cpu_feature & CPUID_PAT) == 0) {
 		for (i = 0; i < PAT_INDEX_SIZE; i++)
 			pat_index[i] = pat_table[i];
 		pat_works = 0;
 		return;
 	}
 
 	/*
 	 * Due to some Intel errata, we can only safely use the lower 4
 	 * PAT entries.
 	 *
 	 *   Intel Pentium III Processor Specification Update
 	 * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
 	 * or Mode C Paging)
 	 *
 	 *   Intel Pentium IV  Processor Specification Update
 	 * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
 	 */
 	if (cpu_vendor_id == CPU_VENDOR_INTEL &&
 	    !(CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe))
 		pat_works = 0;
 
 	/* Initialize default PAT entries. */
 	pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) |
 	    PAT_VALUE(1, PAT_WRITE_THROUGH) |
 	    PAT_VALUE(2, PAT_UNCACHED) |
 	    PAT_VALUE(3, PAT_UNCACHEABLE) |
 	    PAT_VALUE(4, PAT_WRITE_BACK) |
 	    PAT_VALUE(5, PAT_WRITE_THROUGH) |
 	    PAT_VALUE(6, PAT_UNCACHED) |
 	    PAT_VALUE(7, PAT_UNCACHEABLE);
 
 	if (pat_works) {
 		/*
 		 * Leave the indices 0-3 at the default of WB, WT, UC-, and UC.
 		 * Program 5 and 6 as WP and WC.
 		 * Leave 4 and 7 as WB and UC.
 		 */
 		pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6));
 		pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) |
 		    PAT_VALUE(6, PAT_WRITE_COMBINING);
 		pat_table[PAT_UNCACHED] = 2;
 		pat_table[PAT_WRITE_PROTECTED] = 5;
 		pat_table[PAT_WRITE_COMBINING] = 6;
 	} else {
 		/*
 		 * Just replace PAT Index 2 with WC instead of UC-.
 		 */
 		pat_msr &= ~PAT_MASK(2);
 		pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
 		pat_table[PAT_WRITE_COMBINING] = 2;
 	}
 
 	/* Disable PGE. */
 	cr4 = rcr4();
 	load_cr4(cr4 & ~CR4_PGE);
 
 	/* Disable caches (CD = 1, NW = 0). */
 	cr0 = rcr0();
 	load_cr0((cr0 & ~CR0_NW) | CR0_CD);
 
 	/* Flushes caches and TLBs. */
 	wbinvd();
 	invltlb();
 
 	/* Update PAT and index table. */
 	wrmsr(MSR_PAT, pat_msr);
 	for (i = 0; i < PAT_INDEX_SIZE; i++)
 		pat_index[i] = pat_table[i];
 
 	/* Flush caches and TLBs again. */
 	wbinvd();
 	invltlb();
 
 	/* Restore caches and PGE. */
 	load_cr0(cr0);
 	load_cr4(cr4);
 }
 
 /*
  * Set PG_G on kernel pages.  Only the BSP calls this when SMP is turned on.
  */
 static void
 pmap_set_pg(void)
 {
 	pt_entry_t *pte;
 	vm_offset_t va, endva;
 
 	if (pgeflag == 0)
 		return;
 
 	endva = KERNBASE + KERNend;
 
 	if (pseflag) {
 		va = KERNBASE + KERNLOAD;
 		while (va  < endva) {
 			pdir_pde(PTD, va) |= pgeflag;
 			invltlb();	/* Flush non-PG_G entries. */
 			va += NBPDR;
 		}
 	} else {
 		va = (vm_offset_t)btext;
 		while (va < endva) {
 			pte = vtopte(va);
 			if (*pte)
 				*pte |= pgeflag;
 			invltlb();	/* Flush non-PG_G entries. */
 			va += PAGE_SIZE;
 		}
 	}
 }
 
 /*
  * Initialize a vm_page's machine-dependent fields.
  */
 void
 pmap_page_init(vm_page_t m)
 {
 
 	TAILQ_INIT(&m->md.pv_list);
 	m->md.pat_mode = PAT_WRITE_BACK;
 }
 
 #if defined(PAE) || defined(PAE_TABLES)
 static void *
 pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
 {
 
 	/* Inform UMA that this allocator uses kernel_map/object. */
 	*flags = UMA_SLAB_KERNEL;
 	return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, 0x0ULL,
 	    0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT));
 }
 #endif
 
 /*
  * Abuse the pte nodes for unmapped kva to thread a kva freelist through.
  * Requirements:
  *  - Must deal with pages in order to ensure that none of the PG_* bits
  *    are ever set, PG_V in particular.
  *  - Assumes we can write to ptes without pte_store() atomic ops, even
  *    on PAE systems.  This should be ok.
  *  - Assumes nothing will ever test these addresses for 0 to indicate
  *    no mapping instead of correctly checking PG_V.
  *  - Assumes a vm_offset_t will fit in a pte (true for i386).
  * Because PG_V is never set, there can be no mappings to invalidate.
  */
 static vm_offset_t
 pmap_ptelist_alloc(vm_offset_t *head)
 {
 	pt_entry_t *pte;
 	vm_offset_t va;
 
 	va = *head;
 	if (va == 0)
 		panic("pmap_ptelist_alloc: exhausted ptelist KVA");
 	pte = vtopte(va);
 	*head = *pte;
 	if (*head & PG_V)
 		panic("pmap_ptelist_alloc: va with PG_V set!");
 	*pte = 0;
 	return (va);
 }
 
 static void
 pmap_ptelist_free(vm_offset_t *head, vm_offset_t va)
 {
 	pt_entry_t *pte;
 
 	if (va & PG_V)
 		panic("pmap_ptelist_free: freeing va with PG_V set!");
 	pte = vtopte(va);
 	*pte = *head;		/* virtual! PG_V is 0 though */
 	*head = va;
 }
 
 static void
 pmap_ptelist_init(vm_offset_t *head, void *base, int npages)
 {
 	int i;
 	vm_offset_t va;
 
 	*head = 0;
 	for (i = npages - 1; i >= 0; i--) {
 		va = (vm_offset_t)base + i * PAGE_SIZE;
 		pmap_ptelist_free(head, va);
 	}
 }
 
 
 /*
  *	Initialize the pmap module.
  *	Called by vm_init, to initialize any structures that the pmap
  *	system needs to map virtual memory.
  */
 void
 pmap_init(void)
 {
 	struct pmap_preinit_mapping *ppim;
 	vm_page_t mpte;
 	vm_size_t s;
 	int i, pv_npg;
 
 	/*
 	 * Initialize the vm page array entries for the kernel pmap's
 	 * page table pages.
 	 */ 
 	for (i = 0; i < NKPT; i++) {
 		mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT));
 		KASSERT(mpte >= vm_page_array &&
 		    mpte < &vm_page_array[vm_page_array_size],
 		    ("pmap_init: page table page is out of range"));
 		mpte->pindex = i + KPTDI;
 		mpte->phys_addr = KPTphys + (i << PAGE_SHIFT);
 	}
 
 	/*
 	 * Initialize the address space (zone) for the pv entries.  Set a
 	 * high water mark so that the system can recover from excessive
 	 * numbers of pv entries.
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
 	pv_entry_max = shpgperproc * maxproc + vm_cnt.v_page_count;
 	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
 	pv_entry_max = roundup(pv_entry_max, _NPCPV);
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 
 	/*
 	 * If the kernel is running on a virtual machine, then it must assume
 	 * that MCA is enabled by the hypervisor.  Moreover, the kernel must
 	 * be prepared for the hypervisor changing the vendor and family that
 	 * are reported by CPUID.  Consequently, the workaround for AMD Family
 	 * 10h Erratum 383 is enabled if the processor's feature set does not
 	 * include at least one feature that is only supported by older Intel
 	 * or newer AMD processors.
 	 */
 	if (vm_guest != VM_GUEST_NO && (cpu_feature & CPUID_SS) == 0 &&
 	    (cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI |
 	    CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP |
 	    AMDID2_FMA4)) == 0)
 		workaround_erratum383 = 1;
 
 	/*
 	 * Are large page mappings supported and enabled?
 	 */
 	TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
 	if (pseflag == 0)
 		pg_ps_enabled = 0;
 	else if (pg_ps_enabled) {
 		KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
 		    ("pmap_init: can't assign to pagesizes[1]"));
 		pagesizes[1] = NBPDR;
 	}
 
 	/*
 	 * Calculate the size of the pv head table for superpages.
 	 * Handle the possibility that "vm_phys_segs[...].end" is zero.
 	 */
 	pv_npg = trunc_4mpage(vm_phys_segs[vm_phys_nsegs - 1].end -
 	    PAGE_SIZE) / NBPDR + 1;
 
 	/*
 	 * Allocate memory for the pv head table for superpages.
 	 */
 	s = (vm_size_t)(pv_npg * sizeof(struct md_page));
 	s = round_page(s);
 	pv_table = (struct md_page *)kmem_malloc(kernel_arena, s,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < pv_npg; i++)
 		TAILQ_INIT(&pv_table[i].pv_list);
 
 	pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc);
 	pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks);
 	if (pv_chunkbase == NULL)
 		panic("pmap_init: not enough kvm for pv chunks");
 	pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks);
 #if defined(PAE) || defined(PAE_TABLES)
 	pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
 	    NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
 	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
 	uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
 #endif
 
 	pmap_initialized = 1;
 	if (!bootverbose)
 		return;
 	for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
 		ppim = pmap_preinit_mapping + i;
 		if (ppim->va == 0)
 			continue;
 		printf("PPIM %u: PA=%#jx, VA=%#x, size=%#x, mode=%#x\n", i,
 		    (uintmax_t)ppim->pa, ppim->va, ppim->sz, ppim->mode);
 	}
 }
 
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
 	"Max number of PV entries");
 SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
 	"Page share factor per proc");
 
 static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
     "2/4MB page mapping counters");
 
 static u_long pmap_pde_demotions;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD,
     &pmap_pde_demotions, 0, "2/4MB page demotions");
 
 static u_long pmap_pde_mappings;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
     &pmap_pde_mappings, 0, "2/4MB page mappings");
 
 static u_long pmap_pde_p_failures;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD,
     &pmap_pde_p_failures, 0, "2/4MB page promotion failures");
 
 static u_long pmap_pde_promotions;
 SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD,
     &pmap_pde_promotions, 0, "2/4MB page promotions");
 
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
 
 /*
  * Determine the appropriate bits to set in a PTE or PDE for a specified
  * caching mode.
  */
 int
 pmap_cache_bits(int mode, boolean_t is_pde)
 {
 	int cache_bits, pat_flag, pat_idx;
 
 	if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0)
 		panic("Unknown caching mode %d\n", mode);
 
 	/* The PAT bit is different for PTE's and PDE's. */
 	pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
 
 	/* Map the caching mode to a PAT index. */
 	pat_idx = pat_index[mode];
 
 	/* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
 	cache_bits = 0;
 	if (pat_idx & 0x4)
 		cache_bits |= pat_flag;
 	if (pat_idx & 0x2)
 		cache_bits |= PG_NC_PCD;
 	if (pat_idx & 0x1)
 		cache_bits |= PG_NC_PWT;
 	return (cache_bits);
 }
 
 /*
  * The caller is responsible for maintaining TLB consistency.
  */
 static void
 pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde)
 {
 	pd_entry_t *pde;
 	pmap_t pmap;
 	boolean_t PTD_updated;
 
 	PTD_updated = FALSE;
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_FOREACH(pmap, &allpmaps, pm_list) {
 		if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] &
 		    PG_FRAME))
 			PTD_updated = TRUE;
 		pde = pmap_pde(pmap, va);
 		pde_store(pde, newpde);
 	}
 	mtx_unlock_spin(&allpmaps_lock);
 	KASSERT(PTD_updated,
 	    ("pmap_kenter_pde: current page table is not in allpmaps"));
 }
 
 /*
  * After changing the page size for the specified virtual address in the page
  * table, flush the corresponding entries from the processor's TLB.  Only the
  * calling processor's TLB is affected.
  *
  * The calling thread must be pinned to a processor.
  */
 static void
 pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
 {
 	u_long cr4;
 
 	if ((newpde & PG_PS) == 0)
 		/* Demotion: flush a specific 2MB page mapping. */
 		invlpg(va);
 	else if ((newpde & PG_G) == 0)
 		/*
 		 * Promotion: flush every 4KB page mapping from the TLB
 		 * because there are too many to flush individually.
 		 */
 		invltlb();
 	else {
 		/*
 		 * Promotion: flush every 4KB page mapping from the TLB,
 		 * including any global (PG_G) mappings.
 		 */
 		cr4 = rcr4();
 		load_cr4(cr4 & ~CR4_PGE);
 		/*
 		 * Although preemption at this point could be detrimental to
 		 * performance, it would not lead to an error.  PG_G is simply
 		 * ignored if CR4.PGE is clear.  Moreover, in case this block
 		 * is re-entered, the load_cr4() either above or below will
 		 * modify CR4.PGE flushing the TLB.
 		 */
 		load_cr4(cr4 | CR4_PGE);
 	}
 }
 
 void
 invltlb_glob(void)
 {
 	uint64_t cr4;
 
 	if (pgeflag == 0) {
 		invltlb();
 	} else {
 		cr4 = rcr4();
 		load_cr4(cr4 & ~CR4_PGE);
 		load_cr4(cr4 | CR4_PGE);
 	}
 }
 
 
 #ifdef SMP
 /*
  * For SMP, these functions have to use the IPI mechanism for coherence.
  *
  * N.B.: Before calling any of the following TLB invalidation functions,
  * the calling processor must ensure that all stores updating a non-
  * kernel page table are globally performed.  Otherwise, another
  * processor could cache an old, pre-update entry without being
  * invalidated.  This can happen one of two ways: (1) The pmap becomes
  * active on another processor after its pm_active field is checked by
  * one of the following functions but before a store updating the page
  * table is globally performed. (2) The pmap becomes active on another
  * processor before its pm_active field is checked but due to
  * speculative loads one of the following functions stills reads the
  * pmap as inactive on the other processor.
  * 
  * The kernel page table is exempt because its pm_active field is
  * immutable.  The kernel page table is always active on every
  * processor.
  */
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 	cpuset_t *mask, other_cpus;
 	u_int cpuid;
 
 	sched_pin();
 	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invlpg(va);
 		mask = &all_cpus;
 	} else {
 		cpuid = PCPU_GET(cpuid);
 		other_cpus = all_cpus;
 		CPU_CLR(cpuid, &other_cpus);
 		if (CPU_ISSET(cpuid, &pmap->pm_active))
 			invlpg(va);
 		CPU_AND(&other_cpus, &pmap->pm_active);
 		mask = &other_cpus;
 	}
 	smp_masked_invlpg(*mask, va);
 	sched_unpin();
 }
 
 /* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */
 #define	PMAP_INVLPG_THRESHOLD	(4 * 1024 * PAGE_SIZE)
 
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	cpuset_t *mask, other_cpus;
 	vm_offset_t addr;
 	u_int cpuid;
 
 	if (eva - sva >= PMAP_INVLPG_THRESHOLD) {
 		pmap_invalidate_all(pmap);
 		return;
 	}
 
 	sched_pin();
 	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		mask = &all_cpus;
 	} else {
 		cpuid = PCPU_GET(cpuid);
 		other_cpus = all_cpus;
 		CPU_CLR(cpuid, &other_cpus);
 		if (CPU_ISSET(cpuid, &pmap->pm_active))
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
 		CPU_AND(&other_cpus, &pmap->pm_active);
 		mask = &other_cpus;
 	}
 	smp_masked_invlpg_range(*mask, sva, eva);
 	sched_unpin();
 }
 
 void
 pmap_invalidate_all(pmap_t pmap)
 {
 	cpuset_t *mask, other_cpus;
 	u_int cpuid;
 
 	sched_pin();
 	if (pmap == kernel_pmap) {
 		invltlb_glob();
 		mask = &all_cpus;
 	} else if (!CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invltlb();
 		mask = &all_cpus;
 	} else {
 		cpuid = PCPU_GET(cpuid);
 		other_cpus = all_cpus;
 		CPU_CLR(cpuid, &other_cpus);
 		if (CPU_ISSET(cpuid, &pmap->pm_active))
 			invltlb();
 		CPU_AND(&other_cpus, &pmap->pm_active);
 		mask = &other_cpus;
 	}
 	smp_masked_invltlb(*mask, pmap);
 	sched_unpin();
 }
 
 void
 pmap_invalidate_cache(void)
 {
 
 	sched_pin();
 	wbinvd();
 	smp_cache_flush();
 	sched_unpin();
 }
 
 struct pde_action {
 	cpuset_t invalidate;	/* processors that invalidate their TLB */
 	vm_offset_t va;
 	pd_entry_t *pde;
 	pd_entry_t newpde;
 	u_int store;		/* processor that updates the PDE */
 };
 
 static void
 pmap_update_pde_kernel(void *arg)
 {
 	struct pde_action *act = arg;
 	pd_entry_t *pde;
 	pmap_t pmap;
 
 	if (act->store == PCPU_GET(cpuid)) {
 
 		/*
 		 * Elsewhere, this operation requires allpmaps_lock for
 		 * synchronization.  Here, it does not because it is being
 		 * performed in the context of an all_cpus rendezvous.
 		 */
 		LIST_FOREACH(pmap, &allpmaps, pm_list) {
 			pde = pmap_pde(pmap, act->va);
 			pde_store(pde, act->newpde);
 		}
 	}
 }
 
 static void
 pmap_update_pde_user(void *arg)
 {
 	struct pde_action *act = arg;
 
 	if (act->store == PCPU_GET(cpuid))
 		pde_store(act->pde, act->newpde);
 }
 
 static void
 pmap_update_pde_teardown(void *arg)
 {
 	struct pde_action *act = arg;
 
 	if (CPU_ISSET(PCPU_GET(cpuid), &act->invalidate))
 		pmap_update_pde_invalidate(act->va, act->newpde);
 }
 
 /*
  * Change the page size for the specified virtual address in a way that
  * prevents any possibility of the TLB ever having two entries that map the
  * same virtual address using different page sizes.  This is the recommended
  * workaround for Erratum 383 on AMD Family 10h processors.  It prevents a
  * machine check exception for a TLB state that is improperly diagnosed as a
  * hardware error.
  */
 static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 	struct pde_action act;
 	cpuset_t active, other_cpus;
 	u_int cpuid;
 
 	sched_pin();
 	cpuid = PCPU_GET(cpuid);
 	other_cpus = all_cpus;
 	CPU_CLR(cpuid, &other_cpus);
 	if (pmap == kernel_pmap)
 		active = all_cpus;
 	else
 		active = pmap->pm_active;
 	if (CPU_OVERLAP(&active, &other_cpus)) {
 		act.store = cpuid;
 		act.invalidate = active;
 		act.va = va;
 		act.pde = pde;
 		act.newpde = newpde;
 		CPU_SET(cpuid, &active);
 		smp_rendezvous_cpus(active,
 		    smp_no_rendevous_barrier, pmap == kernel_pmap ?
 		    pmap_update_pde_kernel : pmap_update_pde_user,
 		    pmap_update_pde_teardown, &act);
 	} else {
 		if (pmap == kernel_pmap)
 			pmap_kenter_pde(va, newpde);
 		else
 			pde_store(pde, newpde);
 		if (CPU_ISSET(cpuid, &active))
 			pmap_update_pde_invalidate(va, newpde);
 	}
 	sched_unpin();
 }
 #else /* !SMP */
 /*
  * Normal, non-SMP, 486+ invalidation functions.
  * We inline these within pmap.c for speed.
  */
 PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
 	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invlpg(va);
 }
 
 PMAP_INLINE void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
 	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 }
 
 PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
 	if (pmap == kernel_pmap)
 		invltlb_glob();
 	else if (!CPU_EMPTY(&pmap->pm_active))
 		invltlb();
 }
 
 PMAP_INLINE void
 pmap_invalidate_cache(void)
 {
 
 	wbinvd();
 }
 
 static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 
 	if (pmap == kernel_pmap)
 		pmap_kenter_pde(va, newpde);
 	else
 		pde_store(pde, newpde);
 	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		pmap_update_pde_invalidate(va, newpde);
 }
 #endif /* !SMP */
 
 #define	PMAP_CLFLUSH_THRESHOLD	(2 * 1024 * 1024)
 
 void
 pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force)
 {
 
 	if (force) {
 		sva &= ~(vm_offset_t)cpu_clflush_line_size;
 	} else {
 		KASSERT((sva & PAGE_MASK) == 0,
 		    ("pmap_invalidate_cache_range: sva not page-aligned"));
 		KASSERT((eva & PAGE_MASK) == 0,
 		    ("pmap_invalidate_cache_range: eva not page-aligned"));
 	}
 
 	if ((cpu_feature & CPUID_SS) != 0 && !force)
 		; /* If "Self Snoop" is supported and allowed, do nothing. */
 	else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0 &&
 	    eva - sva < PMAP_CLFLUSH_THRESHOLD) {
 #ifdef DEV_APIC
 		/*
 		 * XXX: Some CPUs fault, hang, or trash the local APIC
 		 * registers if we use CLFLUSH on the local APIC
 		 * range.  The local APIC is always uncached, so we
 		 * don't need to flush for that range anyway.
 		 */
 		if (pmap_kextract(sva) == lapic_paddr)
 			return;
 #endif
 		/*
 		 * Otherwise, do per-cache line flush.  Use the mfence
 		 * instruction to insure that previous stores are
 		 * included in the write-back.  The processor
 		 * propagates flush to other processors in the cache
 		 * coherence domain.
 		 */
 		mfence();
 		for (; sva < eva; sva += cpu_clflush_line_size)
 			clflushopt(sva);
 		mfence();
 	} else if ((cpu_feature & CPUID_CLFSH) != 0 &&
 	    eva - sva < PMAP_CLFLUSH_THRESHOLD) {
 #ifdef DEV_APIC
 		if (pmap_kextract(sva) == lapic_paddr)
 			return;
 #endif
 		/*
 		 * Writes are ordered by CLFLUSH on Intel CPUs.
 		 */
 		if (cpu_vendor_id != CPU_VENDOR_INTEL)
 			mfence();
 		for (; sva < eva; sva += cpu_clflush_line_size)
 			clflush(sva);
 		if (cpu_vendor_id != CPU_VENDOR_INTEL)
 			mfence();
 	} else {
 
 		/*
 		 * No targeted cache flush methods are supported by CPU,
 		 * or the supplied range is bigger than 2MB.
 		 * Globally invalidate cache.
 		 */
 		pmap_invalidate_cache();
 	}
 }
 
 void
 pmap_invalidate_cache_pages(vm_page_t *pages, int count)
 {
 	int i;
 
 	if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
 	    (cpu_feature & CPUID_CLFSH) == 0) {
 		pmap_invalidate_cache();
 	} else {
 		for (i = 0; i < count; i++)
 			pmap_flush_page(pages[i]);
 	}
 }
 
 /*
  * Are we current address space or kernel?
  */
 static __inline int
 pmap_is_current(pmap_t pmap)
 {
 
 	return (pmap == kernel_pmap || pmap ==
 	    vmspace_pmap(curthread->td_proc->p_vmspace));
 }
 
 /*
  * If the given pmap is not the current or kernel pmap, the returned pte must
  * be released by passing it to pmap_pte_release().
  */
 pt_entry_t *
 pmap_pte(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t newpf;
 	pd_entry_t *pde;
 
 	pde = pmap_pde(pmap, va);
 	if (*pde & PG_PS)
 		return (pde);
 	if (*pde != 0) {
 		/* are we current address space or kernel? */
 		if (pmap_is_current(pmap))
 			return (vtopte(va));
 		mtx_lock(&PMAP2mutex);
 		newpf = *pde & PG_FRAME;
 		if ((*PMAP2 & PG_FRAME) != newpf) {
 			*PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M;
 			pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
 		}
 		return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
 	}
 	return (NULL);
 }
 
 /*
  * Releases a pte that was obtained from pmap_pte().  Be prepared for the pte
  * being NULL.
  */
 static __inline void
 pmap_pte_release(pt_entry_t *pte)
 {
 
 	if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2)
 		mtx_unlock(&PMAP2mutex);
 }
 
 /*
  * NB:  The sequence of updating a page table followed by accesses to the
  * corresponding pages is subject to the situation described in the "AMD64
  * Architecture Programmer's Manual Volume 2: System Programming" rev. 3.23,
  * "7.3.1 Special Coherency Considerations".  Therefore, issuing the INVLPG
  * right after modifying the PTE bits is crucial.
  */
 static __inline void
 invlcaddr(void *caddr)
 {
 
 	invlpg((u_int)caddr);
 }
 
 /*
  * Super fast pmap_pte routine best used when scanning
  * the pv lists.  This eliminates many coarse-grained
  * invltlb calls.  Note that many of the pv list
  * scans are across different pmaps.  It is very wasteful
  * to do an entire invltlb for checking a single mapping.
  *
  * If the given pmap is not the current pmap, pvh_global_lock
  * must be held and curthread pinned to a CPU.
  */
 static pt_entry_t *
 pmap_pte_quick(pmap_t pmap, vm_offset_t va)
 {
 	pd_entry_t newpf;
 	pd_entry_t *pde;
 
 	pde = pmap_pde(pmap, va);
 	if (*pde & PG_PS)
 		return (pde);
 	if (*pde != 0) {
 		/* are we current address space or kernel? */
 		if (pmap_is_current(pmap))
 			return (vtopte(va));
 		rw_assert(&pvh_global_lock, RA_WLOCKED);
 		KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
 		newpf = *pde & PG_FRAME;
 		if ((*PMAP1 & PG_FRAME) != newpf) {
 			*PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M;
 #ifdef SMP
 			PMAP1cpu = PCPU_GET(cpuid);
 #endif
 			invlcaddr(PADDR1);
 			PMAP1changed++;
 		} else
 #ifdef SMP
 		if (PMAP1cpu != PCPU_GET(cpuid)) {
 			PMAP1cpu = PCPU_GET(cpuid);
 			invlcaddr(PADDR1);
 			PMAP1changedcpu++;
 		} else
 #endif
 			PMAP1unchanged++;
 		return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
 	}
 	return (0);
 }
 
 /*
  *	Routine:	pmap_extract
  *	Function:
  *		Extract the physical page address associated
  *		with the given map/virtual_address pair.
  */
 vm_paddr_t 
 pmap_extract(pmap_t pmap, vm_offset_t va)
 {
 	vm_paddr_t rtval;
 	pt_entry_t *pte;
 	pd_entry_t pde;
 
 	rtval = 0;
 	PMAP_LOCK(pmap);
 	pde = pmap->pm_pdir[va >> PDRSHIFT];
 	if (pde != 0) {
 		if ((pde & PG_PS) != 0)
 			rtval = (pde & PG_PS_FRAME) | (va & PDRMASK);
 		else {
 			pte = pmap_pte(pmap, va);
 			rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
 			pmap_pte_release(pte);
 		}
 	}
 	PMAP_UNLOCK(pmap);
 	return (rtval);
 }
 
 /*
  *	Routine:	pmap_extract_and_hold
  *	Function:
  *		Atomically extract and hold the physical page
  *		with the given pmap and virtual address pair
  *		if that mapping permits the given protection.
  */
 vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	pd_entry_t pde;
 	pt_entry_t pte, *ptep;
 	vm_page_t m;
 	vm_paddr_t pa;
 
 	pa = 0;
 	m = NULL;
 	PMAP_LOCK(pmap);
 retry:
 	pde = *pmap_pde(pmap, va);
 	if (pde != 0) {
 		if (pde & PG_PS) {
 			if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
 				if (vm_page_pa_tryrelock(pmap, (pde &
 				    PG_PS_FRAME) | (va & PDRMASK), &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
 				    (va & PDRMASK));
 				vm_page_hold(m);
 			}
 		} else {
 			ptep = pmap_pte(pmap, va);
 			pte = *ptep;
 			pmap_pte_release(ptep);
 			if (pte != 0 &&
 			    ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
 				if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
 				    &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
 				vm_page_hold(m);
 			}
 		}
 	}
 	PA_UNLOCK_COND(pa);
 	PMAP_UNLOCK(pmap);
 	return (m);
 }
 
 /***************************************************
  * Low level mapping routines.....
  ***************************************************/
 
 /*
  * Add a wired page to the kva.
  * Note: not SMP coherent.
  *
  * This function may be used before pmap_bootstrap() is called.
  */
 PMAP_INLINE void 
 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	pte_store(pte, pa | PG_RW | PG_V | pgeflag);
 }
 
 static __inline void
 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	pte_store(pte, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
 }
 
 /*
  * Remove a page from the kernel pagetables.
  * Note: not SMP coherent.
  *
  * This function may be used before pmap_bootstrap() is called.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
 {
 	pt_entry_t *pte;
 
 	pte = vtopte(va);
 	pte_clear(pte);
 }
 
 /*
  *	Used to map a range of physical addresses into kernel
  *	virtual address space.
  *
  *	The value passed in '*virt' is a suggested virtual address for
  *	the mapping. Architectures which can support a direct-mapped
  *	physical to virtual region can return the appropriate address
  *	within that region, leaving '*virt' unchanged. Other
  *	architectures should map the pages starting at '*virt' and
  *	update '*virt' with the first usable address after the mapped
  *	region.
  */
 vm_offset_t
 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
 {
 	vm_offset_t va, sva;
 	vm_paddr_t superpage_offset;
 	pd_entry_t newpde;
 
 	va = *virt;
 	/*
 	 * Does the physical address range's size and alignment permit at
 	 * least one superpage mapping to be created?
 	 */ 
 	superpage_offset = start & PDRMASK;
 	if ((end - start) - ((NBPDR - superpage_offset) & PDRMASK) >= NBPDR) {
 		/*
 		 * Increase the starting virtual address so that its alignment
 		 * does not preclude the use of superpage mappings.
 		 */
 		if ((va & PDRMASK) < superpage_offset)
 			va = (va & ~PDRMASK) + superpage_offset;
 		else if ((va & PDRMASK) > superpage_offset)
 			va = ((va + PDRMASK) & ~PDRMASK) + superpage_offset;
 	}
 	sva = va;
 	while (start < end) {
 		if ((start & PDRMASK) == 0 && end - start >= NBPDR &&
 		    pseflag) {
 			KASSERT((va & PDRMASK) == 0,
 			    ("pmap_map: misaligned va %#x", va));
 			newpde = start | PG_PS | pgeflag | PG_RW | PG_V;
 			pmap_kenter_pde(va, newpde);
 			va += NBPDR;
 			start += NBPDR;
 		} else {
 			pmap_kenter(va, start);
 			va += PAGE_SIZE;
 			start += PAGE_SIZE;
 		}
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 	*virt = va;
 	return (sva);
 }
 
 
 /*
  * Add a list of wired pages to the kva
  * this routine is only used for temporary
  * kernel mappings that do not need to have
  * page modification or references recorded.
  * Note that old mappings are simply written
  * over.  The page *must* be wired.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
 {
 	pt_entry_t *endpte, oldpte, pa, *pte;
 	vm_page_t m;
 
 	oldpte = 0;
 	pte = vtopte(sva);
 	endpte = pte + count;
 	while (pte < endpte) {
 		m = *ma++;
 		pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
 		if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) {
 			oldpte |= *pte;
 			pte_store(pte, pa | pgeflag | PG_RW | PG_V);
 		}
 		pte++;
 	}
 	if (__predict_false((oldpte & PG_V) != 0))
 		pmap_invalidate_range(kernel_pmap, sva, sva + count *
 		    PAGE_SIZE);
 }
 
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
  * Note: SMP coherent.  Uses a ranged shootdown IPI.
  */
 void
 pmap_qremove(vm_offset_t sva, int count)
 {
 	vm_offset_t va;
 
 	va = sva;
 	while (count-- > 0) {
 		pmap_kremove(va);
 		va += PAGE_SIZE;
 	}
 	pmap_invalidate_range(kernel_pmap, sva, va);
 }
 
 /***************************************************
  * Page table page management routines.....
  ***************************************************/
 static __inline void
 pmap_free_zero_pages(struct spglist *free)
 {
 	vm_page_t m;
 
 	while ((m = SLIST_FIRST(free)) != NULL) {
 		SLIST_REMOVE_HEAD(free, plinks.s.ss);
 		/* Preserve the page's PG_ZERO setting. */
 		vm_page_free_toq(m);
 	}
 }
 
 /*
  * Schedule the specified unused page table page to be freed.  Specifically,
  * add the page to the specified list of pages that will be released to the
  * physical memory manager after the TLB has been updated.
  */
 static __inline void
 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
     boolean_t set_PG_ZERO)
 {
 
 	if (set_PG_ZERO)
 		m->flags |= PG_ZERO;
 	else
 		m->flags &= ~PG_ZERO;
 	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
 }
 
 /*
  * Inserts the specified page table page into the specified pmap's collection
  * of idle page table pages.  Each of a pmap's page table pages is responsible
  * for mapping a distinct range of virtual addresses.  The pmap's collection is
  * ordered by this virtual address range.
  */
 static __inline int
 pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	return (vm_radix_insert(&pmap->pm_root, mpte));
 }
 
 /*
  * Looks for a page table page mapping the specified virtual address in the
  * specified pmap's collection of idle page table pages.  Returns NULL if there
  * is no page table page corresponding to the specified virtual address.
  */
 static __inline vm_page_t
 pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	return (vm_radix_lookup(&pmap->pm_root, va >> PDRSHIFT));
 }
 
 /*
  * Removes the specified page table page from the specified pmap's collection
  * of idle page table pages.  The specified page table page must be a member of
  * the pmap's collection.
  */
 static __inline void
 pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
 {
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	vm_radix_remove(&pmap->pm_root, mpte->pindex);
 }
 
 /*
  * Decrements a page table page's wire count, which is used to record the
  * number of valid page table entries within the page.  If the wire count
  * drops to zero, then the page table page is unmapped.  Returns TRUE if the
  * page table page was unmapped and FALSE otherwise.
  */
 static inline boolean_t
 pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free)
 {
 
 	--m->wire_count;
 	if (m->wire_count == 0) {
 		_pmap_unwire_ptp(pmap, m, free);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 static void
 _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, struct spglist *free)
 {
 	vm_offset_t pteva;
 
 	/*
 	 * unmap the page table page
 	 */
 	pmap->pm_pdir[m->pindex] = 0;
 	--pmap->pm_stats.resident_count;
 
 	/*
 	 * This is a release store so that the ordinary store unmapping
 	 * the page table page is globally performed before TLB shoot-
 	 * down is begun.
 	 */
 	atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
 
 	/*
 	 * Do an invltlb to make the invalidated mapping
 	 * take effect immediately.
 	 */
 	pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
 	pmap_invalidate_page(pmap, pteva);
 
 	/* 
 	 * Put page on a list so that it is released after
 	 * *ALL* TLB shootdown is done
 	 */
 	pmap_add_delayed_free_list(m, free, TRUE);
 }
 
 /*
  * After removing a page table entry, this routine is used to
  * conditionally free the page, and manage the hold/wire counts.
  */
 static int
 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, struct spglist *free)
 {
 	pd_entry_t ptepde;
 	vm_page_t mpte;
 
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (0);
 	ptepde = *pmap_pde(pmap, va);
 	mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
 	return (pmap_unwire_ptp(pmap, mpte, free));
 }
 
 /*
  * Initialize the pmap for the swapper process.
  */
 void
 pmap_pinit0(pmap_t pmap)
 {
 
 	PMAP_LOCK_INIT(pmap);
 	/*
 	 * Since the page table directory is shared with the kernel pmap,
 	 * which is already included in the list "allpmaps", this pmap does
 	 * not need to be inserted into that list.
 	 */
 	pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
 #if defined(PAE) || defined(PAE_TABLES)
 	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 #endif
 	pmap->pm_root.rt_root = 0;
 	CPU_ZERO(&pmap->pm_active);
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
  */
 int
 pmap_pinit(pmap_t pmap)
 {
 	vm_page_t m, ptdpg[NPGPTD];
 	vm_paddr_t pa;
 	int i;
 
 	/*
 	 * No need to allocate page table space yet but we do need a valid
 	 * page directory table.
 	 */
 	if (pmap->pm_pdir == NULL) {
 		pmap->pm_pdir = (pd_entry_t *)kva_alloc(NBPTD);
 		if (pmap->pm_pdir == NULL)
 			return (0);
 #if defined(PAE) || defined(PAE_TABLES)
 		pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
 		KASSERT(((vm_offset_t)pmap->pm_pdpt &
 		    ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
 		    ("pmap_pinit: pdpt misaligned"));
 		KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
 		    ("pmap_pinit: pdpt above 4g"));
 #endif
 		pmap->pm_root.rt_root = 0;
 	}
 	KASSERT(vm_radix_is_empty(&pmap->pm_root),
 	    ("pmap_pinit: pmap has reserved page table page(s)"));
 
 	/*
 	 * allocate the page directory page(s)
 	 */
 	for (i = 0; i < NPGPTD;) {
 		m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
 		    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
 		if (m == NULL)
 			VM_WAIT;
 		else {
 			ptdpg[i++] = m;
 		}
 	}
 
 	pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
 
 	for (i = 0; i < NPGPTD; i++)
 		if ((ptdpg[i]->flags & PG_ZERO) == 0)
 			pagezero(pmap->pm_pdir + (i * NPDEPG));
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
 	/* Copy the kernel page table directory entries. */
 	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
 	mtx_unlock_spin(&allpmaps_lock);
 
 	/* install self-referential address mapping entry(s) */
 	for (i = 0; i < NPGPTD; i++) {
 		pa = VM_PAGE_TO_PHYS(ptdpg[i]);
 		pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
 #if defined(PAE) || defined(PAE_TABLES)
 		pmap->pm_pdpt[i] = pa | PG_V;
 #endif
 	}
 
 	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
 	return (1);
 }
 
 /*
  * this routine is called if the page table page is not
  * mapped correctly.
  */
 static vm_page_t
 _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags)
 {
 	vm_paddr_t ptepa;
 	vm_page_t m;
 
 	/*
 	 * Allocate a page table page.
 	 */
 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
 		if ((flags & PMAP_ENTER_NOSLEEP) == 0) {
 			PMAP_UNLOCK(pmap);
 			rw_wunlock(&pvh_global_lock);
 			VM_WAIT;
 			rw_wlock(&pvh_global_lock);
 			PMAP_LOCK(pmap);
 		}
 
 		/*
 		 * Indicate the need to retry.  While waiting, the page table
 		 * page may have been allocated.
 		 */
 		return (NULL);
 	}
 	if ((m->flags & PG_ZERO) == 0)
 		pmap_zero_page(m);
 
 	/*
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
 
 	pmap->pm_stats.resident_count++;
 
 	ptepa = VM_PAGE_TO_PHYS(m);
 	pmap->pm_pdir[ptepindex] =
 		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
 
 	return (m);
 }
 
 static vm_page_t
 pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags)
 {
 	u_int ptepindex;
 	pd_entry_t ptepa;
 	vm_page_t m;
 
 	/*
 	 * Calculate pagetable page index
 	 */
 	ptepindex = va >> PDRSHIFT;
 retry:
 	/*
 	 * Get the page directory entry
 	 */
 	ptepa = pmap->pm_pdir[ptepindex];
 
 	/*
 	 * This supports switching from a 4MB page to a
 	 * normal 4K page.
 	 */
 	if (ptepa & PG_PS) {
 		(void)pmap_demote_pde(pmap, &pmap->pm_pdir[ptepindex], va);
 		ptepa = pmap->pm_pdir[ptepindex];
 	}
 
 	/*
 	 * If the page table page is mapped, we just increment the
 	 * hold count, and activate it.
 	 */
 	if (ptepa) {
 		m = PHYS_TO_VM_PAGE(ptepa & PG_FRAME);
 		m->wire_count++;
 	} else {
 		/*
 		 * Here if the pte page isn't mapped, or if it has
 		 * been deallocated. 
 		 */
 		m = _pmap_allocpte(pmap, ptepindex, flags);
 		if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0)
 			goto retry;
 	}
 	return (m);
 }
 
 
 /***************************************************
 * Pmap allocation/deallocation routines.
  ***************************************************/
 
 /*
  * Release any resources held by the given physical map.
  * Called when a pmap initialized by pmap_pinit is being released.
  * Should only be called if the map contains no valid mappings.
  */
 void
 pmap_release(pmap_t pmap)
 {
 	vm_page_t m, ptdpg[NPGPTD];
 	int i;
 
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
 	KASSERT(vm_radix_is_empty(&pmap->pm_root),
 	    ("pmap_release: pmap has reserved page table page(s)"));
 	KASSERT(CPU_EMPTY(&pmap->pm_active),
 	    ("releasing active pmap %p", pmap));
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_REMOVE(pmap, pm_list);
 	mtx_unlock_spin(&allpmaps_lock);
 
 	for (i = 0; i < NPGPTD; i++)
 		ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i] &
 		    PG_FRAME);
 
 	bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
 	    sizeof(*pmap->pm_pdir));
 
 	pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
 
 	for (i = 0; i < NPGPTD; i++) {
 		m = ptdpg[i];
 #if defined(PAE) || defined(PAE_TABLES)
 		KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
 		    ("pmap_release: got wrong ptd page"));
 #endif
 		m->wire_count--;
 		atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 		vm_page_free_zero(m);
 	}
 }
 
 static int
 kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 
 	return (sysctl_handle_long(oidp, &ksize, 0, req));
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "IU", "Size of KVM");
 
 static int
 kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
 	return (sysctl_handle_long(oidp, &kfree, 0, req));
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "IU", "Amount of KVM free");
 
 /*
  * grow the number of kernel page table entries, if needed
  */
 void
 pmap_growkernel(vm_offset_t addr)
 {
 	vm_paddr_t ptppaddr;
 	vm_page_t nkpg;
 	pd_entry_t newpdir;
 
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 	addr = roundup2(addr, NBPDR);
 	if (addr - 1 >= kernel_map->max_offset)
 		addr = kernel_map->max_offset;
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
 			kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
 			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 				kernel_vm_end = kernel_map->max_offset;
 				break;
 			}
 			continue;
 		}
 
 		nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT,
 		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
 		    VM_ALLOC_ZERO);
 		if (nkpg == NULL)
 			panic("pmap_growkernel: no memory to grow kernel");
 
 		nkpt++;
 
 		if ((nkpg->flags & PG_ZERO) == 0)
 			pmap_zero_page(nkpg);
 		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 		pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir;
 
 		pmap_kenter_pde(kernel_vm_end, newpdir);
 		kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
 		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 			kernel_vm_end = kernel_map->max_offset;
 			break;
 		}
 	}
 }
 
 
 /***************************************************
  * page management routines.
  ***************************************************/
 
 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
 CTASSERT(_NPCM == 11);
 CTASSERT(_NPCPV == 336);
 
 static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
 	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
 
 #define	PC_FREE0_9	0xfffffffful	/* Free values for index 0 through 9 */
 #define	PC_FREE10	0x0000fffful	/* Free values for index 10 */
 
 static const uint32_t pc_freemask[_NPCM] = {
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE0_9, PC_FREE0_9,
 	PC_FREE0_9, PC_FREE10
 };
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
 	"Current number of pv entries");
 
 #ifdef PV_STATS
 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
 
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
 	"Current number of pv entry chunks");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
 	"Current number of pv entry chunks allocated");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
 	"Current number of pv entry chunks frees");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
 	"Number of times tried to get a chunk page but failed.");
 
 static long pv_entry_frees, pv_entry_allocs;
 static int pv_entry_spare;
 
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
 	"Current number of pv entry frees");
 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
 	"Current number of pv entry allocs");
 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
 	"Current number of spare pv entries");
 #endif
 
 /*
  * We are in a serious low memory condition.  Resort to
  * drastic measures to free some pages so we can allocate
  * another pv entry chunk.
  */
 static vm_page_t
 pmap_pv_reclaim(pmap_t locked_pmap)
 {
 	struct pch newtail;
 	struct pv_chunk *pc;
 	struct md_page *pvh;
 	pd_entry_t *pde;
 	pmap_t pmap;
 	pt_entry_t *pte, tpte;
 	pv_entry_t pv;
 	vm_offset_t va;
 	vm_page_t m, m_pc;
 	struct spglist free;
 	uint32_t inuse;
 	int bit, field, freed;
 
 	PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
 	pmap = NULL;
 	m_pc = NULL;
 	SLIST_INIT(&free);
 	TAILQ_INIT(&newtail);
 	while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 ||
 	    SLIST_EMPTY(&free))) {
 		TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 		if (pmap != pc->pc_pmap) {
 			if (pmap != NULL) {
 				pmap_invalidate_all(pmap);
 				if (pmap != locked_pmap)
 					PMAP_UNLOCK(pmap);
 			}
 			pmap = pc->pc_pmap;
 			/* Avoid deadlock and lock recursion. */
 			if (pmap > locked_pmap)
 				PMAP_LOCK(pmap);
 			else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
 				pmap = NULL;
 				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
 				continue;
 			}
 		}
 
 		/*
 		 * Destroy every non-wired, 4 KB page mapping in the chunk.
 		 */
 		freed = 0;
 		for (field = 0; field < _NPCM; field++) {
 			for (inuse = ~pc->pc_map[field] & pc_freemask[field];
 			    inuse != 0; inuse &= ~(1UL << bit)) {
 				bit = bsfl(inuse);
 				pv = &pc->pc_pventry[field * 32 + bit];
 				va = pv->pv_va;
 				pde = pmap_pde(pmap, va);
 				if ((*pde & PG_PS) != 0)
 					continue;
 				pte = pmap_pte(pmap, va);
 				tpte = *pte;
 				if ((tpte & PG_W) == 0)
 					tpte = pte_load_clear(pte);
 				pmap_pte_release(pte);
 				if ((tpte & PG_W) != 0)
 					continue;
 				KASSERT(tpte != 0,
 				    ("pmap_pv_reclaim: pmap %p va %x zero pte",
 				    pmap, va));
 				if ((tpte & PG_G) != 0)
 					pmap_invalidate_page(pmap, va);
 				m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
 				if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 					vm_page_dirty(m);
 				if ((tpte & PG_A) != 0)
 					vm_page_aflag_set(m, PGA_REFERENCED);
 				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 				if (TAILQ_EMPTY(&m->md.pv_list) &&
 				    (m->flags & PG_FICTITIOUS) == 0) {
 					pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 					if (TAILQ_EMPTY(&pvh->pv_list)) {
 						vm_page_aflag_clear(m,
 						    PGA_WRITEABLE);
 					}
 				}
 				pc->pc_map[field] |= 1UL << bit;
 				pmap_unuse_pt(pmap, va, &free);
 				freed++;
 			}
 		}
 		if (freed == 0) {
 			TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
 			continue;
 		}
 		/* Every freed mapping is for a 4 KB page. */
 		pmap->pm_stats.resident_count -= freed;
 		PV_STAT(pv_entry_frees += freed);
 		PV_STAT(pv_entry_spare += freed);
 		pv_entry_count -= freed;
 		TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 		for (field = 0; field < _NPCM; field++)
 			if (pc->pc_map[field] != pc_freemask[field]) {
 				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
 				    pc_list);
 				TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
 
 				/*
 				 * One freed pv entry in locked_pmap is
 				 * sufficient.
 				 */
 				if (pmap == locked_pmap)
 					goto out;
 				break;
 			}
 		if (field == _NPCM) {
 			PV_STAT(pv_entry_spare -= _NPCPV);
 			PV_STAT(pc_chunk_count--);
 			PV_STAT(pc_chunk_frees++);
 			/* Entire chunk is free; return it. */
 			m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 			pmap_qremove((vm_offset_t)pc, 1);
 			pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
 			break;
 		}
 	}
 out:
 	TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
 	if (pmap != NULL) {
 		pmap_invalidate_all(pmap);
 		if (pmap != locked_pmap)
 			PMAP_UNLOCK(pmap);
 	}
 	if (m_pc == NULL && pv_vafree != 0 && SLIST_EMPTY(&free)) {
 		m_pc = SLIST_FIRST(&free);
 		SLIST_REMOVE_HEAD(&free, plinks.s.ss);
 		/* Recycle a freed page table page. */
 		m_pc->wire_count = 1;
 		atomic_add_int(&vm_cnt.v_wire_count, 1);
 	}
 	pmap_free_zero_pages(&free);
 	return (m_pc);
 }
 
 /*
  * free the pv_entry back to the free list
  */
 static void
 free_pv_entry(pmap_t pmap, pv_entry_t pv)
 {
 	struct pv_chunk *pc;
 	int idx, field, bit;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(pv_entry_frees++);
 	PV_STAT(pv_entry_spare++);
 	pv_entry_count--;
 	pc = pv_to_chunk(pv);
 	idx = pv - &pc->pc_pventry[0];
 	field = idx / 32;
 	bit = idx % 32;
 	pc->pc_map[field] |= 1ul << bit;
 	for (idx = 0; idx < _NPCM; idx++)
 		if (pc->pc_map[idx] != pc_freemask[idx]) {
 			/*
 			 * 98% of the time, pc is already at the head of the
 			 * list.  If it isn't already, move it to the head.
 			 */
 			if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
 			    pc)) {
 				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 				TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
 				    pc_list);
 			}
 			return;
 		}
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	free_pv_chunk(pc);
 }
 
 static void
 free_pv_chunk(struct pv_chunk *pc)
 {
 	vm_page_t m;
 
  	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
 	PV_STAT(pv_entry_spare -= _NPCPV);
 	PV_STAT(pc_chunk_count--);
 	PV_STAT(pc_chunk_frees++);
 	/* entire chunk is free, return it */
 	m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 	pmap_qremove((vm_offset_t)pc, 1);
 	vm_page_unwire(m, PQ_NONE);
 	vm_page_free(m);
 	pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc);
 }
 
 /*
  * get a new pv_entry, allocating a block from the system
  * when needed.
  */
 static pv_entry_t
 get_pv_entry(pmap_t pmap, boolean_t try)
 {
 	static const struct timeval printinterval = { 60, 0 };
 	static struct timeval lastprint;
 	int bit, field;
 	pv_entry_t pv;
 	struct pv_chunk *pc;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	PV_STAT(pv_entry_allocs++);
 	pv_entry_count++;
 	if (pv_entry_count > pv_entry_high_water)
 		if (ratecheck(&lastprint, &printinterval))
 			printf("Approaching the limit on PV entries, consider "
 			    "increasing either the vm.pmap.shpgperproc or the "
 			    "vm.pmap.pv_entry_max tunable.\n");
 retry:
 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
 	if (pc != NULL) {
 		for (field = 0; field < _NPCM; field++) {
 			if (pc->pc_map[field]) {
 				bit = bsfl(pc->pc_map[field]);
 				break;
 			}
 		}
 		if (field < _NPCM) {
 			pv = &pc->pc_pventry[field * 32 + bit];
 			pc->pc_map[field] &= ~(1ul << bit);
 			/* If this was the last item, move it to tail */
 			for (field = 0; field < _NPCM; field++)
 				if (pc->pc_map[field] != 0) {
 					PV_STAT(pv_entry_spare--);
 					return (pv);	/* not full, return */
 				}
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
 			PV_STAT(pv_entry_spare--);
 			return (pv);
 		}
 	}
 	/*
 	 * Access to the ptelist "pv_vafree" is synchronized by the pvh
 	 * global lock.  If "pv_vafree" is currently non-empty, it will
 	 * remain non-empty until pmap_ptelist_alloc() completes.
 	 */
 	if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
 		if (try) {
 			pv_entry_count--;
 			PV_STAT(pc_chunk_tryfail++);
 			return (NULL);
 		}
 		m = pmap_pv_reclaim(pmap);
 		if (m == NULL)
 			goto retry;
 	}
 	PV_STAT(pc_chunk_count++);
 	PV_STAT(pc_chunk_allocs++);
 	pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree);
 	pmap_qenter((vm_offset_t)pc, &m, 1);
 	pc->pc_pmap = pmap;
 	pc->pc_map[0] = pc_freemask[0] & ~1ul;	/* preallocated bit 0 */
 	for (field = 1; field < _NPCM; field++)
 		pc->pc_map[field] = pc_freemask[field];
 	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
 	pv = &pc->pc_pventry[0];
 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	PV_STAT(pv_entry_spare += _NPCPV - 1);
 	return (pv);
 }
 
 static __inline pv_entry_t
 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			break;
 		}
 	}
 	return (pv);
 }
 
 static void
 pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	KASSERT((pa & PDRMASK) == 0,
 	    ("pmap_pv_demote_pde: pa is not 4mpage aligned"));
 
 	/*
 	 * Transfer the 4mpage's pv entry for this mapping to the first
 	 * page's pv list.
 	 */
 	pvh = pa_to_pvh(pa);
 	va = trunc_4mpage(va);
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found"));
 	m = PHYS_TO_VM_PAGE(pa);
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 	/* Instantiate the remaining NPTEPG - 1 pv entries. */
 	va_last = va + NBPDR - PAGE_SIZE;
 	do {
 		m++;
 		KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 		    ("pmap_pv_demote_pde: page %p is not managed", m));
 		va += PAGE_SIZE;
 		pmap_insert_entry(pmap, va, m);
 	} while (va < va_last);
 }
 
 static void
 pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	vm_offset_t va_last;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	KASSERT((pa & PDRMASK) == 0,
 	    ("pmap_pv_promote_pde: pa is not 4mpage aligned"));
 
 	/*
 	 * Transfer the first page's pv entry for this mapping to the
 	 * 4mpage's pv list.  Aside from avoiding the cost of a call
 	 * to get_pv_entry(), a transfer avoids the possibility that
 	 * get_pv_entry() calls pmap_collect() and that pmap_collect()
 	 * removes one of the mappings that is being promoted.
 	 */
 	m = PHYS_TO_VM_PAGE(pa);
 	va = trunc_4mpage(va);
 	pv = pmap_pvh_remove(&m->md, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found"));
 	pvh = pa_to_pvh(pa);
 	TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 	/* Free the remaining NPTEPG - 1 pv entries. */
 	va_last = va + NBPDR - PAGE_SIZE;
 	do {
 		m++;
 		va += PAGE_SIZE;
 		pmap_pvh_free(&m->md, pmap, va);
 	} while (va < va_last);
 }
 
 static void
 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
 {
 	pv_entry_t pv;
 
 	pv = pmap_pvh_remove(pvh, pmap, va);
 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
 	free_pv_entry(pmap, pv);
 }
 
 static void
 pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
 {
 	struct md_page *pvh;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	pmap_pvh_free(&m->md, pmap, va);
 	if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		if (TAILQ_EMPTY(&pvh->pv_list))
 			vm_page_aflag_clear(m, PGA_WRITEABLE);
 	}
 }
 
 /*
  * Create a pv entry for page at pa for
  * (pmap, va).
  */
 static void
 pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pv = get_pv_entry(pmap, FALSE);
 	pv->pv_va = va;
 	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 }
 
 /*
  * Conditionally create a pv entry.
  */
 static boolean_t
 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
 {
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if (pv_entry_count < pv_entry_high_water && 
 	    (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 		pv->pv_va = va;
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * Create the pv entries for each of the pages within a superpage.
  */
 static boolean_t
 pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	if (pv_entry_count < pv_entry_high_water && 
 	    (pv = get_pv_entry(pmap, TRUE)) != NULL) {
 		pv->pv_va = va;
 		pvh = pa_to_pvh(pa);
 		TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 		return (TRUE);
 	} else
 		return (FALSE);
 }
 
 /*
  * Fills a page table page with mappings to consecutive physical pages.
  */
 static void
 pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte)
 {
 	pt_entry_t *pte;
 
 	for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
 		*pte = newpte;	
 		newpte += PAGE_SIZE;
 	}
 }
 
 /*
  * Tries to demote a 2- or 4MB page mapping.  If demotion fails, the
  * 2- or 4MB page mapping is invalidated.
  */
 static boolean_t
 pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde, oldpde;
 	pt_entry_t *firstpte, newpte;
 	vm_paddr_t mptepa;
 	vm_page_t mpte;
 	struct spglist free;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	oldpde = *pde;
 	KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
 	    ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
 	if ((oldpde & PG_A) != 0 && (mpte = pmap_lookup_pt_page(pmap, va)) !=
 	    NULL)
 		pmap_remove_pt_page(pmap, mpte);
 	else {
 		KASSERT((oldpde & PG_W) == 0,
 		    ("pmap_demote_pde: page table page for a wired mapping"
 		    " is missing"));
 
 		/*
 		 * Invalidate the 2- or 4MB page mapping and return
 		 * "failure" if the mapping was never accessed or the
 		 * allocation of the new page table page fails.
 		 */
 		if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
 		    va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL |
 		    VM_ALLOC_WIRED)) == NULL) {
 			SLIST_INIT(&free);
 			pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
 			pmap_invalidate_page(pmap, trunc_4mpage(va));
 			pmap_free_zero_pages(&free);
 			CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
 			    " in pmap %p", va, pmap);
 			return (FALSE);
 		}
 		if (va < VM_MAXUSER_ADDRESS)
 			pmap->pm_stats.resident_count++;
 	}
 	mptepa = VM_PAGE_TO_PHYS(mpte);
 
 	/*
 	 * If the page mapping is in the kernel's address space, then the
 	 * KPTmap can provide access to the page table page.  Otherwise,
 	 * temporarily map the page table page (mpte) into the kernel's
 	 * address space at either PADDR1 or PADDR2. 
 	 */
 	if (va >= KERNBASE)
 		firstpte = &KPTmap[i386_btop(trunc_4mpage(va))];
 	else if (curthread->td_pinned > 0 && rw_wowned(&pvh_global_lock)) {
 		if ((*PMAP1 & PG_FRAME) != mptepa) {
 			*PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M;
 #ifdef SMP
 			PMAP1cpu = PCPU_GET(cpuid);
 #endif
 			invlcaddr(PADDR1);
 			PMAP1changed++;
 		} else
 #ifdef SMP
 		if (PMAP1cpu != PCPU_GET(cpuid)) {
 			PMAP1cpu = PCPU_GET(cpuid);
 			invlcaddr(PADDR1);
 			PMAP1changedcpu++;
 		} else
 #endif
 			PMAP1unchanged++;
 		firstpte = PADDR1;
 	} else {
 		mtx_lock(&PMAP2mutex);
 		if ((*PMAP2 & PG_FRAME) != mptepa) {
 			*PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M;
 			pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
 		}
 		firstpte = PADDR2;
 	}
 	newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
 	KASSERT((oldpde & PG_A) != 0,
 	    ("pmap_demote_pde: oldpde is missing PG_A"));
 	KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
 	    ("pmap_demote_pde: oldpde is missing PG_M"));
 	newpte = oldpde & ~PG_PS;
 	if ((newpte & PG_PDE_PAT) != 0)
 		newpte ^= PG_PDE_PAT | PG_PTE_PAT;
 
 	/*
 	 * If the page table page is new, initialize it.
 	 */
 	if (mpte->wire_count == 1) {
 		mpte->wire_count = NPTEPG;
 		pmap_fill_ptp(firstpte, newpte);
 	}
 	KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
 	    ("pmap_demote_pde: firstpte and newpte map different physical"
 	    " addresses"));
 
 	/*
 	 * If the mapping has changed attributes, update the page table
 	 * entries.
 	 */ 
 	if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
 		pmap_fill_ptp(firstpte, newpte);
 	
 	/*
 	 * Demote the mapping.  This pmap is locked.  The old PDE has
 	 * PG_A set.  If the old PDE has PG_RW set, it also has PG_M
 	 * set.  Thus, there is no danger of a race with another
 	 * processor changing the setting of PG_A and/or PG_M between
 	 * the read above and the store below. 
 	 */
 	if (workaround_erratum383)
 		pmap_update_pde(pmap, va, pde, newpde);
 	else if (pmap == kernel_pmap)
 		pmap_kenter_pde(va, newpde);
 	else
 		pde_store(pde, newpde);	
 	if (firstpte == PADDR2)
 		mtx_unlock(&PMAP2mutex);
 
 	/*
 	 * Invalidate the recursive mapping of the page table page.
 	 */
 	pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
 
 	/*
 	 * Demote the pv entry.  This depends on the earlier demotion
 	 * of the mapping.  Specifically, the (re)creation of a per-
 	 * page pv entry might trigger the execution of pmap_collect(),
 	 * which might reclaim a newly (re)created per-page pv entry
 	 * and destroy the associated mapping.  In order to destroy
 	 * the mapping, the PDE must have already changed from mapping
 	 * the 2mpage to referencing the page table page.
 	 */
 	if ((oldpde & PG_MANAGED) != 0)
 		pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME);
 
 	pmap_pde_demotions++;
 	CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#x"
 	    " in pmap %p", va, pmap);
 	return (TRUE);
 }
 
 /*
  * Removes a 2- or 4MB page mapping from the kernel pmap.
  */
 static void
 pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde;
 	vm_paddr_t mptepa;
 	vm_page_t mpte;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	mpte = pmap_lookup_pt_page(pmap, va);
 	if (mpte == NULL)
 		panic("pmap_remove_kernel_pde: Missing pt page.");
 
 	pmap_remove_pt_page(pmap, mpte);
 	mptepa = VM_PAGE_TO_PHYS(mpte);
 	newpde = mptepa | PG_M | PG_A | PG_RW | PG_V;
 
 	/*
 	 * Initialize the page table page.
 	 */
 	pagezero((void *)&KPTmap[i386_btop(trunc_4mpage(va))]);
 
 	/*
 	 * Remove the mapping.
 	 */
 	if (workaround_erratum383)
 		pmap_update_pde(pmap, va, pde, newpde);
 	else 
 		pmap_kenter_pde(va, newpde);
 
 	/*
 	 * Invalidate the recursive mapping of the page table page.
 	 */
 	pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
 }
 
 /*
  * pmap_remove_pde: do the things to unmap a superpage in a process
  */
 static void
 pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
     struct spglist *free)
 {
 	struct md_page *pvh;
 	pd_entry_t oldpde;
 	vm_offset_t eva, va;
 	vm_page_t m, mpte;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & PDRMASK) == 0,
 	    ("pmap_remove_pde: sva is not 4mpage aligned"));
 	oldpde = pte_load_clear(pdq);
 	if (oldpde & PG_W)
 		pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE;
 
 	/*
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
 	if (oldpde & PG_G)
 		pmap_invalidate_page(kernel_pmap, sva);
 	pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 	if (oldpde & PG_MANAGED) {
 		pvh = pa_to_pvh(oldpde & PG_PS_FRAME);
 		pmap_pvh_free(pvh, pmap, sva);
 		eva = sva + NBPDR;
 		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
 		    va < eva; va += PAGE_SIZE, m++) {
 			if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
 				vm_page_dirty(m);
 			if (oldpde & PG_A)
 				vm_page_aflag_set(m, PGA_REFERENCED);
 			if (TAILQ_EMPTY(&m->md.pv_list) &&
 			    TAILQ_EMPTY(&pvh->pv_list))
 				vm_page_aflag_clear(m, PGA_WRITEABLE);
 		}
 	}
 	if (pmap == kernel_pmap) {
 		pmap_remove_kernel_pde(pmap, pdq, sva);
 	} else {
 		mpte = pmap_lookup_pt_page(pmap, sva);
 		if (mpte != NULL) {
 			pmap_remove_pt_page(pmap, mpte);
 			pmap->pm_stats.resident_count--;
 			KASSERT(mpte->wire_count == NPTEPG,
 			    ("pmap_remove_pde: pte page wire count error"));
 			mpte->wire_count = 0;
 			pmap_add_delayed_free_list(mpte, free, FALSE);
 			atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 		}
 	}
 }
 
 /*
  * pmap_remove_pte: do the things to unmap a page in a process
  */
 static int
 pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va,
     struct spglist *free)
 {
 	pt_entry_t oldpte;
 	vm_page_t m;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	oldpte = pte_load_clear(ptq);
 	KASSERT(oldpte != 0,
 	    ("pmap_remove_pte: pmap %p va %x zero pte", pmap, va));
 	if (oldpte & PG_W)
 		pmap->pm_stats.wired_count -= 1;
 	/*
 	 * Machines that don't support invlpg, also don't support
 	 * PG_G.
 	 */
 	if (oldpte & PG_G)
 		pmap_invalidate_page(kernel_pmap, va);
 	pmap->pm_stats.resident_count -= 1;
 	if (oldpte & PG_MANAGED) {
 		m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME);
 		if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			vm_page_dirty(m);
 		if (oldpte & PG_A)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 		pmap_remove_entry(pmap, m, va);
 	}
 	return (pmap_unuse_pt(pmap, va, free));
 }
 
 /*
  * Remove a single page from a process address space
  */
 static void
 pmap_remove_page(pmap_t pmap, vm_offset_t va, struct spglist *free)
 {
 	pt_entry_t *pte;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
 		return;
 	pmap_remove_pte(pmap, pte, va, free);
 	pmap_invalidate_page(pmap, va);
 }
 
 /*
  *	Remove the given range of addresses from the specified map.
  *
  *	It is assumed that the start and end are properly
  *	rounded to the page size.
  */
 void
 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t pdnxt;
 	pd_entry_t ptpaddr;
 	pt_entry_t *pte;
 	struct spglist free;
 	int anyvalid;
 
 	/*
 	 * Perform an unsynchronized read.  This is, however, safe.
 	 */
 	if (pmap->pm_stats.resident_count == 0)
 		return;
 
 	anyvalid = 0;
 	SLIST_INIT(&free);
 
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	PMAP_LOCK(pmap);
 
 	/*
 	 * special handling of removing one page.  a very
 	 * common operation and easy to short circuit some
 	 * code.
 	 */
 	if ((sva + PAGE_SIZE == eva) && 
 	    ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
 		pmap_remove_page(pmap, sva, &free);
 		goto out;
 	}
 
 	for (; sva < eva; sva = pdnxt) {
 		u_int pdirindex;
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
 		if (pdnxt < sva)
 			pdnxt = eva;
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
 		pdirindex = sva >> PDRSHIFT;
 		ptpaddr = pmap->pm_pdir[pdirindex];
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			/*
 			 * Are we removing the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + NBPDR == pdnxt && eva >= pdnxt) {
 				/*
 				 * The TLB entry for a PG_G mapping is
 				 * invalidated by pmap_remove_pde().
 				 */
 				if ((ptpaddr & PG_G) == 0)
 					anyvalid = 1;
 				pmap_remove_pde(pmap,
 				    &pmap->pm_pdir[pdirindex], sva, &free);
 				continue;
 			} else if (!pmap_demote_pde(pmap,
 			    &pmap->pm_pdir[pdirindex], sva)) {
 				/* The large page mapping was destroyed. */
 				continue;
 			}
 		}
 
 		/*
 		 * Limit our scan to either the end of the va represented
 		 * by the current page table page, or to the end of the
 		 * range being removed.
 		 */
 		if (pdnxt > eva)
 			pdnxt = eva;
 
 		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 		    sva += PAGE_SIZE) {
 			if (*pte == 0)
 				continue;
 
 			/*
 			 * The TLB entry for a PG_G mapping is invalidated
 			 * by pmap_remove_pte().
 			 */
 			if ((*pte & PG_G) == 0)
 				anyvalid = 1;
 			if (pmap_remove_pte(pmap, pte, sva, &free))
 				break;
 		}
 	}
 out:
 	sched_unpin();
 	if (anyvalid)
 		pmap_invalidate_all(pmap);
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(&free);
 }
 
 /*
  *	Routine:	pmap_remove_all
  *	Function:
  *		Removes this physical page from
  *		all physical maps in which it resides.
  *		Reflects back modify bits to the pager.
  *
  *	Notes:
  *		Original versions of this routine were very
  *		inefficient because they iteratively called
  *		pmap_remove (slow...)
  */
 
 void
 pmap_remove_all(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	pmap_t pmap;
 	pt_entry_t *pte, tpte;
 	pd_entry_t *pde;
 	vm_offset_t va;
 	struct spglist free;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_all: page %p is not managed", m));
 	SLIST_INIT(&free);
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		goto small_mappings;
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, va);
 		(void)pmap_demote_pde(pmap, pde, va);
 		PMAP_UNLOCK(pmap);
 	}
 small_mappings:
 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pmap->pm_stats.resident_count--;
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found"
 		    " a 4mpage in page %p's pv list", m));
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		tpte = pte_load_clear(pte);
 		KASSERT(tpte != 0, ("pmap_remove_all: pmap %p va %x zero pte",
 		    pmap, pv->pv_va));
 		if (tpte & PG_W)
 			pmap->pm_stats.wired_count--;
 		if (tpte & PG_A)
 			vm_page_aflag_set(m, PGA_REFERENCED);
 
 		/*
 		 * Update the vm_page_t clean and reference bits.
 		 */
 		if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			vm_page_dirty(m);
 		pmap_unuse_pt(pmap, pv->pv_va, &free);
 		pmap_invalidate_page(pmap, pv->pv_va);
 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 		free_pv_entry(pmap, pv);
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	pmap_free_zero_pages(&free);
 }
 
 /*
  * pmap_protect_pde: do the things to protect a 4mpage in a process
  */
 static boolean_t
 pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot)
 {
 	pd_entry_t newpde, oldpde;
 	vm_offset_t eva, va;
 	vm_page_t m;
 	boolean_t anychanged;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	KASSERT((sva & PDRMASK) == 0,
 	    ("pmap_protect_pde: sva is not 4mpage aligned"));
 	anychanged = FALSE;
 retry:
 	oldpde = newpde = *pde;
 	if (oldpde & PG_MANAGED) {
 		eva = sva + NBPDR;
 		for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
 		    va < eva; va += PAGE_SIZE, m++)
 			if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
 				vm_page_dirty(m);
 	}
 	if ((prot & VM_PROT_WRITE) == 0)
 		newpde &= ~(PG_RW | PG_M);
 #if defined(PAE) || defined(PAE_TABLES)
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpde |= pg_nx;
 #endif
 	if (newpde != oldpde) {
 		if (!pde_cmpset(pde, oldpde, newpde))
 			goto retry;
 		if (oldpde & PG_G)
 			pmap_invalidate_page(pmap, sva);
 		else
 			anychanged = TRUE;
 	}
 	return (anychanged);
 }
 
 /*
  *	Set the physical protection on the
  *	specified range of this map as requested.
  */
 void
 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
 {
 	vm_offset_t pdnxt;
 	pd_entry_t ptpaddr;
 	pt_entry_t *pte;
 	boolean_t anychanged, pv_lists_locked;
 
 	KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
 	if (prot == VM_PROT_NONE) {
 		pmap_remove(pmap, sva, eva);
 		return;
 	}
 
 #if defined(PAE) || defined(PAE_TABLES)
 	if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) ==
 	    (VM_PROT_WRITE|VM_PROT_EXECUTE))
 		return;
 #else
 	if (prot & VM_PROT_WRITE)
 		return;
 #endif
 
 	if (pmap_is_current(pmap))
 		pv_lists_locked = FALSE;
 	else {
 		pv_lists_locked = TRUE;
 resume:
 		rw_wlock(&pvh_global_lock);
 		sched_pin();
 	}
 	anychanged = FALSE;
 
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = pdnxt) {
 		pt_entry_t obits, pbits;
 		u_int pdirindex;
 
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
 		if (pdnxt < sva)
 			pdnxt = eva;
 
 		pdirindex = sva >> PDRSHIFT;
 		ptpaddr = pmap->pm_pdir[pdirindex];
 
 		/*
 		 * Weed out invalid mappings. Note: we assume that the page
 		 * directory table is always allocated, and in kernel virtual.
 		 */
 		if (ptpaddr == 0)
 			continue;
 
 		/*
 		 * Check for large page.
 		 */
 		if ((ptpaddr & PG_PS) != 0) {
 			/*
 			 * Are we protecting the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + NBPDR == pdnxt && eva >= pdnxt) {
 				/*
 				 * The TLB entry for a PG_G mapping is
 				 * invalidated by pmap_protect_pde().
 				 */
 				if (pmap_protect_pde(pmap,
 				    &pmap->pm_pdir[pdirindex], sva, prot))
 					anychanged = TRUE;
 				continue;
 			} else {
 				if (!pv_lists_locked) {
 					pv_lists_locked = TRUE;
 					if (!rw_try_wlock(&pvh_global_lock)) {
 						if (anychanged)
 							pmap_invalidate_all(
 							    pmap);
 						PMAP_UNLOCK(pmap);
 						goto resume;
 					}
 					sched_pin();
 				}
 				if (!pmap_demote_pde(pmap,
 				    &pmap->pm_pdir[pdirindex], sva)) {
 					/*
 					 * The large page mapping was
 					 * destroyed.
 					 */
 					continue;
 				}
 			}
 		}
 
 		if (pdnxt > eva)
 			pdnxt = eva;
 
 		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 		    sva += PAGE_SIZE) {
 			vm_page_t m;
 
 retry:
 			/*
 			 * Regardless of whether a pte is 32 or 64 bits in
 			 * size, PG_RW, PG_A, and PG_M are among the least
 			 * significant 32 bits.
 			 */
 			obits = pbits = *pte;
 			if ((pbits & PG_V) == 0)
 				continue;
 
 			if ((prot & VM_PROT_WRITE) == 0) {
 				if ((pbits & (PG_MANAGED | PG_M | PG_RW)) ==
 				    (PG_MANAGED | PG_M | PG_RW)) {
 					m = PHYS_TO_VM_PAGE(pbits & PG_FRAME);
 					vm_page_dirty(m);
 				}
 				pbits &= ~(PG_RW | PG_M);
 			}
 #if defined(PAE) || defined(PAE_TABLES)
 			if ((prot & VM_PROT_EXECUTE) == 0)
 				pbits |= pg_nx;
 #endif
 
 			if (pbits != obits) {
 #if defined(PAE) || defined(PAE_TABLES)
 				if (!atomic_cmpset_64(pte, obits, pbits))
 					goto retry;
 #else
 				if (!atomic_cmpset_int((u_int *)pte, obits,
 				    pbits))
 					goto retry;
 #endif
 				if (obits & PG_G)
 					pmap_invalidate_page(pmap, sva);
 				else
 					anychanged = TRUE;
 			}
 		}
 	}
 	if (anychanged)
 		pmap_invalidate_all(pmap);
 	if (pv_lists_locked) {
 		sched_unpin();
 		rw_wunlock(&pvh_global_lock);
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * Tries to promote the 512 or 1024, contiguous 4KB page mappings that are
  * within a single page table page (PTP) to a single 2- or 4MB page mapping.
  * For promotion to occur, two conditions must be met: (1) the 4KB page
  * mappings must map aligned, contiguous physical memory and (2) the 4KB page
  * mappings must have identical characteristics.
  *
  * Managed (PG_MANAGED) mappings within the kernel address space are not
  * promoted.  The reason is that kernel PDEs are replicated in each pmap but
  * pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel
  * pmap.
  */
 static void
 pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde;
 	pt_entry_t *firstpte, oldpte, pa, *pte;
 	vm_offset_t oldpteva;
 	vm_page_t mpte;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * Examine the first PTE in the specified PTP.  Abort if this PTE is
 	 * either invalid, unused, or does not map the first 4KB physical page
 	 * within a 2- or 4MB page.
 	 */
 	firstpte = pmap_pte_quick(pmap, trunc_4mpage(va));
 setpde:
 	newpde = *firstpte;
 	if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) {
 		pmap_pde_p_failures++;
 		CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 		    " in pmap %p", va, pmap);
 		return;
 	}
 	if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) {
 		pmap_pde_p_failures++;
 		CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 		    " in pmap %p", va, pmap);
 		return;
 	}
 	if ((newpde & (PG_M | PG_RW)) == PG_RW) {
 		/*
 		 * When PG_M is already clear, PG_RW can be cleared without
 		 * a TLB invalidation.
 		 */
 		if (!atomic_cmpset_int((u_int *)firstpte, newpde, newpde &
 		    ~PG_RW))  
 			goto setpde;
 		newpde &= ~PG_RW;
 	}
 
 	/* 
 	 * Examine each of the other PTEs in the specified PTP.  Abort if this
 	 * PTE maps an unexpected 4KB physical page or does not have identical
 	 * characteristics to the first PTE.
 	 */
 	pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE;
 	for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) {
 setpte:
 		oldpte = *pte;
 		if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) {
 			pmap_pde_p_failures++;
 			CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 			    " in pmap %p", va, pmap);
 			return;
 		}
 		if ((oldpte & (PG_M | PG_RW)) == PG_RW) {
 			/*
 			 * When PG_M is already clear, PG_RW can be cleared
 			 * without a TLB invalidation.
 			 */
 			if (!atomic_cmpset_int((u_int *)pte, oldpte,
 			    oldpte & ~PG_RW))
 				goto setpte;
 			oldpte &= ~PG_RW;
 			oldpteva = (oldpte & PG_FRAME & PDRMASK) |
 			    (va & ~PDRMASK);
 			CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#x"
 			    " in pmap %p", oldpteva, pmap);
 		}
 		if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) {
 			pmap_pde_p_failures++;
 			CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x"
 			    " in pmap %p", va, pmap);
 			return;
 		}
 		pa -= PAGE_SIZE;
 	}
 
 	/*
 	 * Save the page table page in its current state until the PDE
 	 * mapping the superpage is demoted by pmap_demote_pde() or
 	 * destroyed by pmap_remove_pde(). 
 	 */
 	mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
 	KASSERT(mpte >= vm_page_array &&
 	    mpte < &vm_page_array[vm_page_array_size],
 	    ("pmap_promote_pde: page table page is out of range"));
 	KASSERT(mpte->pindex == va >> PDRSHIFT,
 	    ("pmap_promote_pde: page table page's pindex is wrong"));
 	if (pmap_insert_pt_page(pmap, mpte)) {
 		pmap_pde_p_failures++;
 		CTR2(KTR_PMAP,
 		    "pmap_promote_pde: failure for va %#x in pmap %p", va,
 		    pmap);
 		return;
 	}
 
 	/*
 	 * Promote the pv entries.
 	 */
 	if ((newpde & PG_MANAGED) != 0)
 		pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME);
 
 	/*
 	 * Propagate the PAT index to its proper position.
 	 */
 	if ((newpde & PG_PTE_PAT) != 0)
 		newpde ^= PG_PDE_PAT | PG_PTE_PAT;
 
 	/*
 	 * Map the superpage.
 	 */
 	if (workaround_erratum383)
 		pmap_update_pde(pmap, va, pde, PG_PS | newpde);
 	else if (pmap == kernel_pmap)
 		pmap_kenter_pde(va, PG_PS | newpde);
 	else
 		pde_store(pde, PG_PS | newpde);
 
 	pmap_pde_promotions++;
 	CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x"
 	    " in pmap %p", va, pmap);
 }
 
 /*
  *	Insert the given physical page (p) at
  *	the specified virtual address (v) in the
  *	target physical map with the protection requested.
  *
  *	If specified, the page will be wired down, meaning
  *	that the related pte can not be reclaimed.
  *
  *	NB:  This is the only routine which MAY NOT lazy-evaluate
  *	or lose information.  That is, this routine must actually
  *	insert this page into the given map NOW.
  */
 int
 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
     u_int flags, int8_t psind)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	pt_entry_t newpte, origpte;
 	pv_entry_t pv;
 	vm_paddr_t opa, pa;
 	vm_page_t mpte, om;
 	boolean_t invlva, wired;
 
 	va = trunc_page(va);
 	mpte = NULL;
 	wired = (flags & PMAP_ENTER_WIRED) != 0;
 
 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
 	KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
 	    ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
 	    va));
 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
 		VM_OBJECT_ASSERT_LOCKED(m->object);
 
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	sched_pin();
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		mpte = pmap_allocpte(pmap, va, flags);
 		if (mpte == NULL) {
 			KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0,
 			    ("pmap_allocpte failed with sleep allowed"));
 			sched_unpin();
 			rw_wunlock(&pvh_global_lock);
 			PMAP_UNLOCK(pmap);
 			return (KERN_RESOURCE_SHORTAGE);
 		}
 	}
 
 	pde = pmap_pde(pmap, va);
 	if ((*pde & PG_PS) != 0)
 		panic("pmap_enter: attempted pmap_enter on 4MB page");
 	pte = pmap_pte_quick(pmap, va);
 
 	/*
 	 * Page Directory table entry not valid, we need a new PT page
 	 */
 	if (pte == NULL) {
 		panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x",
 			(uintmax_t)pmap->pm_pdir[PTDPTDI], va);
 	}
 
 	pa = VM_PAGE_TO_PHYS(m);
 	om = NULL;
 	origpte = *pte;
 	opa = origpte & PG_FRAME;
 
 	/*
 	 * Mapping has not changed, must be protection or wiring change.
 	 */
 	if (origpte && (opa == pa)) {
 		/*
 		 * Wiring change, just update stats. We don't worry about
 		 * wiring PT pages as they remain resident as long as there
 		 * are valid mappings in them. Hence, if a user page is wired,
 		 * the PT page will be also.
 		 */
 		if (wired && ((origpte & PG_W) == 0))
 			pmap->pm_stats.wired_count++;
 		else if (!wired && (origpte & PG_W))
 			pmap->pm_stats.wired_count--;
 
 		/*
 		 * Remove extra pte reference
 		 */
 		if (mpte)
 			mpte->wire_count--;
 
 		if (origpte & PG_MANAGED) {
 			om = m;
 			pa |= PG_MANAGED;
 		}
 		goto validate;
 	} 
 
 	pv = NULL;
 
 	/*
 	 * Mapping has changed, invalidate old range and fall through to
 	 * handle validating new mapping.
 	 */
 	if (opa) {
 		if (origpte & PG_W)
 			pmap->pm_stats.wired_count--;
 		if (origpte & PG_MANAGED) {
 			om = PHYS_TO_VM_PAGE(opa);
 			pv = pmap_pvh_remove(&om->md, pmap, va);
 		}
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			KASSERT(mpte->wire_count > 0,
 			    ("pmap_enter: missing reference to page table page,"
 			     " va: 0x%x", va));
 		}
 	} else
 		pmap->pm_stats.resident_count++;
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
 		    ("pmap_enter: managed mapping within the clean submap"));
 		if (pv == NULL)
 			pv = get_pv_entry(pmap, FALSE);
 		pv->pv_va = va;
 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		pa |= PG_MANAGED;
 	} else if (pv != NULL)
 		free_pv_entry(pmap, pv);
 
 	/*
 	 * Increment counters
 	 */
 	if (wired)
 		pmap->pm_stats.wired_count++;
 
 validate:
 	/*
 	 * Now validate mapping with desired protection/wiring.
 	 */
 	newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V);
 	if ((prot & VM_PROT_WRITE) != 0) {
 		newpte |= PG_RW;
 		if ((newpte & PG_MANAGED) != 0)
 			vm_page_aflag_set(m, PGA_WRITEABLE);
 	}
 #if defined(PAE) || defined(PAE_TABLES)
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpte |= pg_nx;
 #endif
 	if (wired)
 		newpte |= PG_W;
 	if (va < VM_MAXUSER_ADDRESS)
 		newpte |= PG_U;
 	if (pmap == kernel_pmap)
 		newpte |= pgeflag;
 
 	/*
 	 * if the mapping or permission bits are different, we need
 	 * to update the pte.
 	 */
 	if ((origpte & ~(PG_M|PG_A)) != newpte) {
 		newpte |= PG_A;
 		if ((flags & VM_PROT_WRITE) != 0)
 			newpte |= PG_M;
 		if (origpte & PG_V) {
 			invlva = FALSE;
 			origpte = pte_load_store(pte, newpte);
 			if (origpte & PG_A) {
 				if (origpte & PG_MANAGED)
 					vm_page_aflag_set(om, PGA_REFERENCED);
 				if (opa != VM_PAGE_TO_PHYS(m))
 					invlva = TRUE;
 #if defined(PAE) || defined(PAE_TABLES)
 				if ((origpte & PG_NX) == 0 &&
 				    (newpte & PG_NX) != 0)
 					invlva = TRUE;
 #endif
 			}
 			if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 				if ((origpte & PG_MANAGED) != 0)
 					vm_page_dirty(om);
 				if ((prot & VM_PROT_WRITE) == 0)
 					invlva = TRUE;
 			}
 			if ((origpte & PG_MANAGED) != 0 &&
 			    TAILQ_EMPTY(&om->md.pv_list) &&
 			    ((om->flags & PG_FICTITIOUS) != 0 ||
 			    TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list)))
 				vm_page_aflag_clear(om, PGA_WRITEABLE);
 			if (invlva)
 				pmap_invalidate_page(pmap, va);
 		} else
 			pte_store(pte, newpte);
 	}
 
 	/*
 	 * If both the page table page and the reservation are fully
 	 * populated, then attempt promotion.
 	 */
 	if ((mpte == NULL || mpte->wire_count == NPTEPG) &&
 	    pg_ps_enabled && (m->flags & PG_FICTITIOUS) == 0 &&
 	    vm_reserv_level_iffullpop(m) == 0)
 		pmap_promote_pde(pmap, pde, va);
 
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	return (KERN_SUCCESS);
 }
 
 /*
  * Tries to create a 2- or 4MB page mapping.  Returns TRUE if successful and
  * FALSE otherwise.  Fails if (1) a page table page cannot be allocated without
  * blocking, (2) a mapping already exists at the specified virtual address, or
  * (3) a pv entry cannot be allocated without reclaiming another pv entry. 
  */
 static boolean_t
 pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 	pd_entry_t *pde, newpde;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	pde = pmap_pde(pmap, va);
 	if (*pde != 0) {
 		CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 		    " in pmap %p", va, pmap);
 		return (FALSE);
 	}
 	newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) |
 	    PG_PS | PG_V;
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		newpde |= PG_MANAGED;
 
 		/*
 		 * Abort this mapping if its PV entry could not be created.
 		 */
 		if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) {
 			CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return (FALSE);
 		}
 	}
 #if defined(PAE) || defined(PAE_TABLES)
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		newpde |= pg_nx;
 #endif
 	if (va < VM_MAXUSER_ADDRESS)
 		newpde |= PG_U;
 
 	/*
 	 * Increment counters.
 	 */
 	pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE;
 
 	/*
 	 * Map the superpage.
 	 */
 	pde_store(pde, newpde);
 
 	pmap_pde_mappings++;
 	CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx"
 	    " in pmap %p", va, pmap);
 	return (TRUE);
 }
 
 /*
  * Maps a sequence of resident pages belonging to the same object.
  * The sequence begins with the given page m_start.  This page is
  * mapped at the given virtual address start.  Each subsequent page is
  * mapped at a virtual address that is offset from start by the same
  * amount as the page is offset from m_start within the object.  The
  * last page in the sequence is the page with the largest offset from
  * m_start that can be mapped at a virtual address less than the given
  * virtual address end.  Not every virtual page between start and end
  * is mapped; only those for which a resident page exists with the
  * corresponding offset from m_start are mapped.
  */
 void
 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
     vm_page_t m_start, vm_prot_t prot)
 {
 	vm_offset_t va;
 	vm_page_t m, mpte;
 	vm_pindex_t diff, psize;
 
 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
 
 	psize = atop(end - start);
 	mpte = NULL;
 	m = m_start;
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
 		va = start + ptoa(diff);
 		if ((va & PDRMASK) == 0 && va + NBPDR <= end &&
 		    m->psind == 1 && pg_ps_enabled &&
 		    pmap_enter_pde(pmap, va, m, prot))
 			m = &m[NBPDR / PAGE_SIZE - 1];
 		else
 			mpte = pmap_enter_quick_locked(pmap, va, m, prot,
 			    mpte);
 		m = TAILQ_NEXT(m, listq);
 	}
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  * this code makes some *MAJOR* assumptions:
  * 1. Current pmap & pmap exists.
  * 2. Not wired.
  * 3. Read access.
  * 4. No page table pages.
  * but is *MUCH* faster than pmap_enter...
  */
 
 void
 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
 {
 
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL);
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 }
 
 static vm_page_t
 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
     vm_prot_t prot, vm_page_t mpte)
 {
 	pt_entry_t *pte;
 	vm_paddr_t pa;
 	struct spglist free;
 
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->oflags & VPO_UNMANAGED) != 0,
 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 
 	/*
 	 * In the case that a page table page is not
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
 		u_int ptepindex;
 		pd_entry_t ptepa;
 
 		/*
 		 * Calculate pagetable page index
 		 */
 		ptepindex = va >> PDRSHIFT;
 		if (mpte && (mpte->pindex == ptepindex)) {
 			mpte->wire_count++;
 		} else {
 			/*
 			 * Get the page directory entry
 			 */
 			ptepa = pmap->pm_pdir[ptepindex];
 
 			/*
 			 * If the page table page is mapped, we just increment
 			 * the hold count, and activate it.
 			 */
 			if (ptepa) {
 				if (ptepa & PG_PS)
 					return (NULL);
 				mpte = PHYS_TO_VM_PAGE(ptepa & PG_FRAME);
 				mpte->wire_count++;
 			} else {
 				mpte = _pmap_allocpte(pmap, ptepindex,
 				    PMAP_ENTER_NOSLEEP);
 				if (mpte == NULL)
 					return (mpte);
 			}
 		}
 	} else {
 		mpte = NULL;
 	}
 
 	/*
 	 * This call to vtopte makes the assumption that we are
 	 * entering the page into the current pmap.  In order to support
 	 * quick entry into any pmap, one would likely use pmap_pte_quick.
 	 * But that isn't as quick as vtopte.
 	 */
 	pte = vtopte(va);
 	if (*pte) {
 		if (mpte != NULL) {
 			mpte->wire_count--;
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Enter on the PV list if part of our managed memory.
 	 */
 	if ((m->oflags & VPO_UNMANAGED) == 0 &&
 	    !pmap_try_insert_pv_entry(pmap, va, m)) {
 		if (mpte != NULL) {
 			SLIST_INIT(&free);
 			if (pmap_unwire_ptp(pmap, mpte, &free)) {
 				pmap_invalidate_page(pmap, va);
 				pmap_free_zero_pages(&free);
 			}
 			
 			mpte = NULL;
 		}
 		return (mpte);
 	}
 
 	/*
 	 * Increment counters
 	 */
 	pmap->pm_stats.resident_count++;
 
 	pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
 #if defined(PAE) || defined(PAE_TABLES)
 	if ((prot & VM_PROT_EXECUTE) == 0)
 		pa |= pg_nx;
 #endif
 
 	/*
 	 * Now validate mapping with RO protection
 	 */
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		pte_store(pte, pa | PG_V | PG_U);
 	else
 		pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
 	return (mpte);
 }
 
 /*
  * Make a temporary mapping for a physical address.  This is only intended
  * to be used for panic dumps.
  */
 void *
 pmap_kenter_temporary(vm_paddr_t pa, int i)
 {
 	vm_offset_t va;
 
 	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
 	pmap_kenter(va, pa);
 	invlpg(va);
 	return ((void *)crashdumpmap);
 }
 
 /*
  * This code maps large physical mmap regions into the
  * processor address space.  Note that some shortcuts
  * are taken, but the code works.
  */
 void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
 	pd_entry_t *pde;
 	vm_paddr_t pa, ptepa;
 	vm_page_t p;
 	int pat_mode;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
 	    ("pmap_object_init_pt: non-device object"));
 	if (pseflag && 
 	    (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
 		if (!vm_object_populate(object, pindex, pindex + atop(size)))
 			return;
 		p = vm_page_lookup(object, pindex);
 		KASSERT(p->valid == VM_PAGE_BITS_ALL,
 		    ("pmap_object_init_pt: invalid page %p", p));
 		pat_mode = p->md.pat_mode;
 
 		/*
 		 * Abort the mapping if the first page is not physically
 		 * aligned to a 2/4MB page boundary.
 		 */
 		ptepa = VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1))
 			return;
 
 		/*
 		 * Skip the first page.  Abort the mapping if the rest of
 		 * the pages are not physically contiguous or have differing
 		 * memory attributes.
 		 */
 		p = TAILQ_NEXT(p, listq);
 		for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
 		    pa += PAGE_SIZE) {
 			KASSERT(p->valid == VM_PAGE_BITS_ALL,
 			    ("pmap_object_init_pt: invalid page %p", p));
 			if (pa != VM_PAGE_TO_PHYS(p) ||
 			    pat_mode != p->md.pat_mode)
 				return;
 			p = TAILQ_NEXT(p, listq);
 		}
 
 		/*
 		 * Map using 2/4MB pages.  Since "ptepa" is 2/4M aligned and
 		 * "size" is a multiple of 2/4M, adding the PAT setting to
 		 * "pa" will not affect the termination of this loop.
 		 */
 		PMAP_LOCK(pmap);
 		for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
 		    size; pa += NBPDR) {
 			pde = pmap_pde(pmap, addr);
 			if (*pde == 0) {
 				pde_store(pde, pa | PG_PS | PG_M | PG_A |
 				    PG_U | PG_RW | PG_V);
 				pmap->pm_stats.resident_count += NBPDR /
 				    PAGE_SIZE;
 				pmap_pde_mappings++;
 			}
 			/* Else continue on if the PDE is already valid. */
 			addr += NBPDR;
 		}
 		PMAP_UNLOCK(pmap);
 	}
 }
 
 /*
  *	Clear the wired attribute from the mappings for the specified range of
  *	addresses in the given pmap.  Every valid mapping within that range
  *	must have the wired attribute set.  In contrast, invalid mappings
  *	cannot have the wired attribute set, so they are ignored.
  *
  *	The wired attribute of the page table entry is not a hardware feature,
  *	so there is no need to invalidate any TLB entries.
  */
 void
 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t pdnxt;
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	boolean_t pv_lists_locked;
 
 	if (pmap_is_current(pmap))
 		pv_lists_locked = FALSE;
 	else {
 		pv_lists_locked = TRUE;
 resume:
 		rw_wlock(&pvh_global_lock);
 		sched_pin();
 	}
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = pdnxt) {
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
 		if (pdnxt < sva)
 			pdnxt = eva;
 		pde = pmap_pde(pmap, sva);
 		if ((*pde & PG_V) == 0)
 			continue;
 		if ((*pde & PG_PS) != 0) {
 			if ((*pde & PG_W) == 0)
 				panic("pmap_unwire: pde %#jx is missing PG_W",
 				    (uintmax_t)*pde);
 
 			/*
 			 * Are we unwiring the entire large page?  If not,
 			 * demote the mapping and fall through.
 			 */
 			if (sva + NBPDR == pdnxt && eva >= pdnxt) {
 				/*
 				 * Regardless of whether a pde (or pte) is 32
 				 * or 64 bits in size, PG_W is among the least
 				 * significant 32 bits.
 				 */
 				atomic_clear_int((u_int *)pde, PG_W);
 				pmap->pm_stats.wired_count -= NBPDR /
 				    PAGE_SIZE;
 				continue;
 			} else {
 				if (!pv_lists_locked) {
 					pv_lists_locked = TRUE;
 					if (!rw_try_wlock(&pvh_global_lock)) {
 						PMAP_UNLOCK(pmap);
 						/* Repeat sva. */
 						goto resume;
 					}
 					sched_pin();
 				}
 				if (!pmap_demote_pde(pmap, pde, sva))
 					panic("pmap_unwire: demotion failed");
 			}
 		}
 		if (pdnxt > eva)
 			pdnxt = eva;
 		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 		    sva += PAGE_SIZE) {
 			if ((*pte & PG_V) == 0)
 				continue;
 			if ((*pte & PG_W) == 0)
 				panic("pmap_unwire: pte %#jx is missing PG_W",
 				    (uintmax_t)*pte);
 
 			/*
 			 * PG_W must be cleared atomically.  Although the pmap
 			 * lock synchronizes access to PG_W, another processor
 			 * could be setting PG_M and/or PG_A concurrently.
 			 *
 			 * PG_W is among the least significant 32 bits.
 			 */
 			atomic_clear_int((u_int *)pte, PG_W);
 			pmap->pm_stats.wired_count--;
 		}
 	}
 	if (pv_lists_locked) {
 		sched_unpin();
 		rw_wunlock(&pvh_global_lock);
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 
 /*
  *	Copy the range specified by src_addr/len
  *	from the source map to the range dst_addr/len
  *	in the destination map.
  *
  *	This routine is only advisory and need not do anything.
  */
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
     vm_offset_t src_addr)
 {
 	struct spglist free;
 	vm_offset_t addr;
 	vm_offset_t end_addr = src_addr + len;
 	vm_offset_t pdnxt;
 
 	if (dst_addr != src_addr)
 		return;
 
 	if (!pmap_is_current(src_pmap))
 		return;
 
 	rw_wlock(&pvh_global_lock);
 	if (dst_pmap < src_pmap) {
 		PMAP_LOCK(dst_pmap);
 		PMAP_LOCK(src_pmap);
 	} else {
 		PMAP_LOCK(src_pmap);
 		PMAP_LOCK(dst_pmap);
 	}
 	sched_pin();
 	for (addr = src_addr; addr < end_addr; addr = pdnxt) {
 		pt_entry_t *src_pte, *dst_pte;
 		vm_page_t dstmpte, srcmpte;
 		pd_entry_t srcptepaddr;
 		u_int ptepindex;
 
 		KASSERT(addr < UPT_MIN_ADDRESS,
 		    ("pmap_copy: invalid to pmap_copy page tables"));
 
 		pdnxt = (addr + NBPDR) & ~PDRMASK;
 		if (pdnxt < addr)
 			pdnxt = end_addr;
 		ptepindex = addr >> PDRSHIFT;
 
 		srcptepaddr = src_pmap->pm_pdir[ptepindex];
 		if (srcptepaddr == 0)
 			continue;
 			
 		if (srcptepaddr & PG_PS) {
 			if ((addr & PDRMASK) != 0 || addr + NBPDR > end_addr)
 				continue;
 			if (dst_pmap->pm_pdir[ptepindex] == 0 &&
 			    ((srcptepaddr & PG_MANAGED) == 0 ||
 			    pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr &
 			    PG_PS_FRAME))) {
 				dst_pmap->pm_pdir[ptepindex] = srcptepaddr &
 				    ~PG_W;
 				dst_pmap->pm_stats.resident_count +=
 				    NBPDR / PAGE_SIZE;
 				pmap_pde_mappings++;
 			}
 			continue;
 		}
 
 		srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME);
 		KASSERT(srcmpte->wire_count > 0,
 		    ("pmap_copy: source page table page is unused"));
 
 		if (pdnxt > end_addr)
 			pdnxt = end_addr;
 
 		src_pte = vtopte(addr);
 		while (addr < pdnxt) {
 			pt_entry_t ptetemp;
 			ptetemp = *src_pte;
 			/*
 			 * we only virtual copy managed pages
 			 */
 			if ((ptetemp & PG_MANAGED) != 0) {
 				dstmpte = pmap_allocpte(dst_pmap, addr,
 				    PMAP_ENTER_NOSLEEP);
 				if (dstmpte == NULL)
 					goto out;
 				dst_pte = pmap_pte_quick(dst_pmap, addr);
 				if (*dst_pte == 0 &&
 				    pmap_try_insert_pv_entry(dst_pmap, addr,
 				    PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) {
 					/*
 					 * Clear the wired, modified, and
 					 * accessed (referenced) bits
 					 * during the copy.
 					 */
 					*dst_pte = ptetemp & ~(PG_W | PG_M |
 					    PG_A);
 					dst_pmap->pm_stats.resident_count++;
 	 			} else {
 					SLIST_INIT(&free);
 					if (pmap_unwire_ptp(dst_pmap, dstmpte,
 					    &free)) {
 						pmap_invalidate_page(dst_pmap,
 						    addr);
 						pmap_free_zero_pages(&free);
 					}
 					goto out;
 				}
 				if (dstmpte->wire_count >= srcmpte->wire_count)
 					break;
 			}
 			addr += PAGE_SIZE;
 			src_pte++;
 		}
 	}
 out:
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(src_pmap);
 	PMAP_UNLOCK(dst_pmap);
 }	
 
+/*
+ * Zero 1 page of virtual memory mapped from a hardware page by the caller.
+ */
 static __inline void
 pagezero(void *page)
 {
 #if defined(I686_CPU)
 	if (cpu_class == CPUCLASS_686) {
 #if defined(CPU_ENABLE_SSE)
 		if (cpu_feature & CPUID_SSE2)
 			sse2_pagezero(page);
 		else
 #endif
 			i686_pagezero(page);
 	} else
 #endif
 		bzero(page, PAGE_SIZE);
 }
 
 /*
- *	pmap_zero_page zeros the specified hardware page by mapping 
- *	the page into KVM and using bzero to clear its contents.
+ * Zero the specified hardware page.
  */
 void
 pmap_zero_page(vm_page_t m)
 {
 	struct sysmaps *sysmaps;
 
 	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 	mtx_lock(&sysmaps->lock);
 	if (*sysmaps->CMAP2)
 		panic("pmap_zero_page: CMAP2 busy");
 	sched_pin();
 	*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
 	    pmap_cache_bits(m->md.pat_mode, 0);
 	invlcaddr(sysmaps->CADDR2);
 	pagezero(sysmaps->CADDR2);
 	*sysmaps->CMAP2 = 0;
 	sched_unpin();
 	mtx_unlock(&sysmaps->lock);
 }
 
 /*
- *	pmap_zero_page_area zeros the specified hardware page by mapping 
- *	the page into KVM and using bzero to clear its contents.
- *
- *	off and size may not cover an area beyond a single hardware page.
+ * Zero an an area within a single hardware page.  off and size must not
+ * cover an area beyond a single hardware page.
  */
 void
 pmap_zero_page_area(vm_page_t m, int off, int size)
 {
 	struct sysmaps *sysmaps;
 
 	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 	mtx_lock(&sysmaps->lock);
 	if (*sysmaps->CMAP2)
 		panic("pmap_zero_page_area: CMAP2 busy");
 	sched_pin();
 	*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
 	    pmap_cache_bits(m->md.pat_mode, 0);
 	invlcaddr(sysmaps->CADDR2);
 	if (off == 0 && size == PAGE_SIZE) 
 		pagezero(sysmaps->CADDR2);
 	else
 		bzero((char *)sysmaps->CADDR2 + off, size);
 	*sysmaps->CMAP2 = 0;
 	sched_unpin();
 	mtx_unlock(&sysmaps->lock);
 }
 
 /*
- *	pmap_zero_page_idle zeros the specified hardware page by mapping 
- *	the page into KVM and using bzero to clear its contents.  This
- *	is intended to be called from the vm_pagezero process only and
- *	outside of Giant.
+ * Zero the specified hardware page in a way that minimizes cache thrashing.
+ * This is intended to be called from the vm_pagezero process only and
+ * outside of Giant.
  */
 void
 pmap_zero_page_idle(vm_page_t m)
 {
 
 	if (*CMAP3)
 		panic("pmap_zero_page_idle: CMAP3 busy");
 	sched_pin();
 	*CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
 	    pmap_cache_bits(m->md.pat_mode, 0);
 	invlcaddr(CADDR3);
 	pagezero(CADDR3);
 	*CMAP3 = 0;
 	sched_unpin();
 }
 
 /*
- *	pmap_copy_page copies the specified (machine independent)
- *	page by mapping the page into virtual memory and using
- *	bcopy to copy the page, one machine dependent page at a
- *	time.
+ * Copy 1 specified hardware page to another.
  */
 void
 pmap_copy_page(vm_page_t src, vm_page_t dst)
 {
 	struct sysmaps *sysmaps;
 
 	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 	mtx_lock(&sysmaps->lock);
 	if (*sysmaps->CMAP1)
 		panic("pmap_copy_page: CMAP1 busy");
 	if (*sysmaps->CMAP2)
 		panic("pmap_copy_page: CMAP2 busy");
 	sched_pin();
 	*sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A |
 	    pmap_cache_bits(src->md.pat_mode, 0);
 	invlcaddr(sysmaps->CADDR1);
 	*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M |
 	    pmap_cache_bits(dst->md.pat_mode, 0);
 	invlcaddr(sysmaps->CADDR2);
 	bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
 	*sysmaps->CMAP1 = 0;
 	*sysmaps->CMAP2 = 0;
 	sched_unpin();
 	mtx_unlock(&sysmaps->lock);
 }
 
 int unmapped_buf_allowed = 1;
 
 void
 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
     vm_offset_t b_offset, int xfersize)
 {
 	struct sysmaps *sysmaps;
 	vm_page_t a_pg, b_pg;
 	char *a_cp, *b_cp;
 	vm_offset_t a_pg_offset, b_pg_offset;
 	int cnt;
 
 	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 	mtx_lock(&sysmaps->lock);
 	if (*sysmaps->CMAP1 != 0)
 		panic("pmap_copy_pages: CMAP1 busy");
 	if (*sysmaps->CMAP2 != 0)
 		panic("pmap_copy_pages: CMAP2 busy");
 	sched_pin();
 	while (xfersize > 0) {
 		a_pg = ma[a_offset >> PAGE_SHIFT];
 		a_pg_offset = a_offset & PAGE_MASK;
 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
 		b_pg = mb[b_offset >> PAGE_SHIFT];
 		b_pg_offset = b_offset & PAGE_MASK;
 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
 		*sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(a_pg) | PG_A |
 		    pmap_cache_bits(a_pg->md.pat_mode, 0);
 		invlcaddr(sysmaps->CADDR1);
 		*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(b_pg) | PG_A |
 		    PG_M | pmap_cache_bits(b_pg->md.pat_mode, 0);
 		invlcaddr(sysmaps->CADDR2);
 		a_cp = sysmaps->CADDR1 + a_pg_offset;
 		b_cp = sysmaps->CADDR2 + b_pg_offset;
 		bcopy(a_cp, b_cp, cnt);
 		a_offset += cnt;
 		b_offset += cnt;
 		xfersize -= cnt;
 	}
 	*sysmaps->CMAP1 = 0;
 	*sysmaps->CMAP2 = 0;
 	sched_unpin();
 	mtx_unlock(&sysmaps->lock);
 }
 
 /*
  * Returns true if the pmap's pv is one of the first
  * 16 pvs linked to from this page.  This count may
  * be changed upwards or downwards in the future; it
  * is only necessary that true be returned for a small
  * subset of pmaps for proper page aging.
  */
 boolean_t
 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv;
 	int loops = 0;
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_page_exists_quick: page %p is not managed", m));
 	rv = FALSE;
 	rw_wlock(&pvh_global_lock);
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		if (PV_PMAP(pv) == pmap) {
 			rv = TRUE;
 			break;
 		}
 		loops++;
 		if (loops >= 16)
 			break;
 	}
 	if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) {
 		pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 		TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 			if (PV_PMAP(pv) == pmap) {
 				rv = TRUE;
 				break;
 			}
 			loops++;
 			if (loops >= 16)
 				break;
 		}
 	}
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  *	pmap_page_wired_mappings:
  *
  *	Return the number of managed mappings to the given physical page
  *	that are wired.
  */
 int
 pmap_page_wired_mappings(vm_page_t m)
 {
 	int count;
 
 	count = 0;
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (count);
 	rw_wlock(&pvh_global_lock);
 	count = pmap_pvh_wired_mappings(&m->md, count);
 	if ((m->flags & PG_FICTITIOUS) == 0) {
 	    count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)),
 	        count);
 	}
 	rw_wunlock(&pvh_global_lock);
 	return (count);
 }
 
 /*
  *	pmap_pvh_wired_mappings:
  *
  *	Return the updated number "count" of managed mappings that are wired.
  */
 static int
 pmap_pvh_wired_mappings(struct md_page *pvh, int count)
 {
 	pmap_t pmap;
 	pt_entry_t *pte;
 	pv_entry_t pv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	sched_pin();
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		if ((*pte & PG_W) != 0)
 			count++;
 		PMAP_UNLOCK(pmap);
 	}
 	sched_unpin();
 	return (count);
 }
 
 /*
  * Returns TRUE if the given page is mapped individually or as part of
  * a 4mpage.  Otherwise, returns FALSE.
  */
 boolean_t
 pmap_page_is_mapped(vm_page_t m)
 {
 	boolean_t rv;
 
 	if ((m->oflags & VPO_UNMANAGED) != 0)
 		return (FALSE);
 	rw_wlock(&pvh_global_lock);
 	rv = !TAILQ_EMPTY(&m->md.pv_list) ||
 	    ((m->flags & PG_FICTITIOUS) == 0 &&
 	    !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  * Remove all pages from specified address space
  * this aids process exit speeds.  Also, this code
  * is special cased for current process only, but
  * can have the more generic (and slightly slower)
  * mode enabled.  This is much faster than pmap_remove
  * in the case of running down an entire address space.
  */
 void
 pmap_remove_pages(pmap_t pmap)
 {
 	pt_entry_t *pte, tpte;
 	vm_page_t m, mpte, mt;
 	pv_entry_t pv;
 	struct md_page *pvh;
 	struct pv_chunk *pc, *npc;
 	struct spglist free;
 	int field, idx;
 	int32_t bit;
 	uint32_t inuse, bitmask;
 	int allfree;
 
 	if (pmap != PCPU_GET(curpmap)) {
 		printf("warning: pmap_remove_pages called with non-current pmap\n");
 		return;
 	}
 	SLIST_INIT(&free);
 	rw_wlock(&pvh_global_lock);
 	PMAP_LOCK(pmap);
 	sched_pin();
 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
 		KASSERT(pc->pc_pmap == pmap, ("Wrong pmap %p %p", pmap,
 		    pc->pc_pmap));
 		allfree = 1;
 		for (field = 0; field < _NPCM; field++) {
 			inuse = ~pc->pc_map[field] & pc_freemask[field];
 			while (inuse != 0) {
 				bit = bsfl(inuse);
 				bitmask = 1UL << bit;
 				idx = field * 32 + bit;
 				pv = &pc->pc_pventry[idx];
 				inuse &= ~bitmask;
 
 				pte = pmap_pde(pmap, pv->pv_va);
 				tpte = *pte;
 				if ((tpte & PG_PS) == 0) {
 					pte = vtopte(pv->pv_va);
 					tpte = *pte & ~PG_PTE_PAT;
 				}
 
 				if (tpte == 0) {
 					printf(
 					    "TPTE at %p  IS ZERO @ VA %08x\n",
 					    pte, pv->pv_va);
 					panic("bad pte");
 				}
 
 /*
  * We cannot remove wired pages from a process' mapping at this time
  */
 				if (tpte & PG_W) {
 					allfree = 0;
 					continue;
 				}
 
 				m = PHYS_TO_VM_PAGE(tpte & PG_FRAME);
 				KASSERT(m->phys_addr == (tpte & PG_FRAME),
 				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
 				    m, (uintmax_t)m->phys_addr,
 				    (uintmax_t)tpte));
 
 				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
 				    m < &vm_page_array[vm_page_array_size],
 				    ("pmap_remove_pages: bad tpte %#jx",
 				    (uintmax_t)tpte));
 
 				pte_clear(pte);
 
 				/*
 				 * Update the vm_page_t clean/reference bits.
 				 */
 				if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 					if ((tpte & PG_PS) != 0) {
 						for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
 							vm_page_dirty(mt);
 					} else
 						vm_page_dirty(m);
 				}
 
 				/* Mark free */
 				PV_STAT(pv_entry_frees++);
 				PV_STAT(pv_entry_spare++);
 				pv_entry_count--;
 				pc->pc_map[field] |= bitmask;
 				if ((tpte & PG_PS) != 0) {
 					pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
 					pvh = pa_to_pvh(tpte & PG_PS_FRAME);
 					TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 					if (TAILQ_EMPTY(&pvh->pv_list)) {
 						for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++)
 							if (TAILQ_EMPTY(&mt->md.pv_list))
 								vm_page_aflag_clear(mt, PGA_WRITEABLE);
 					}
 					mpte = pmap_lookup_pt_page(pmap, pv->pv_va);
 					if (mpte != NULL) {
 						pmap_remove_pt_page(pmap, mpte);
 						pmap->pm_stats.resident_count--;
 						KASSERT(mpte->wire_count == NPTEPG,
 						    ("pmap_remove_pages: pte page wire count error"));
 						mpte->wire_count = 0;
 						pmap_add_delayed_free_list(mpte, &free, FALSE);
 						atomic_subtract_int(&vm_cnt.v_wire_count, 1);
 					}
 				} else {
 					pmap->pm_stats.resident_count--;
 					TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 					if (TAILQ_EMPTY(&m->md.pv_list) &&
 					    (m->flags & PG_FICTITIOUS) == 0) {
 						pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 						if (TAILQ_EMPTY(&pvh->pv_list))
 							vm_page_aflag_clear(m, PGA_WRITEABLE);
 					}
 					pmap_unuse_pt(pmap, pv->pv_va, &free);
 				}
 			}
 		}
 		if (allfree) {
 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 			free_pv_chunk(pc);
 		}
 	}
 	sched_unpin();
 	pmap_invalidate_all(pmap);
 	rw_wunlock(&pvh_global_lock);
 	PMAP_UNLOCK(pmap);
 	pmap_free_zero_pages(&free);
 }
 
 /*
  *	pmap_is_modified:
  *
  *	Return whether or not the specified physical page was modified
  *	in any physical maps.
  */
 boolean_t
 pmap_is_modified(vm_page_t m)
 {
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_modified: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
 	 * is clear, no PTEs can have PG_M set.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return (FALSE);
 	rw_wlock(&pvh_global_lock);
 	rv = pmap_is_modified_pvh(&m->md) ||
 	    ((m->flags & PG_FICTITIOUS) == 0 &&
 	    pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  * Returns TRUE if any of the given mappings were used to modify
  * physical memory.  Otherwise, returns FALSE.  Both page and 2mpage
  * mappings are supported.
  */
 static boolean_t
 pmap_is_modified_pvh(struct md_page *pvh)
 {
 	pv_entry_t pv;
 	pt_entry_t *pte;
 	pmap_t pmap;
 	boolean_t rv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	rv = FALSE;
 	sched_pin();
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW);
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			break;
 	}
 	sched_unpin();
 	return (rv);
 }
 
 /*
  *	pmap_is_prefaultable:
  *
  *	Return whether or not the specified virtual address is elgible
  *	for prefault.
  */
 boolean_t
 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
 {
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	boolean_t rv;
 
 	rv = FALSE;
 	PMAP_LOCK(pmap);
 	pde = pmap_pde(pmap, addr);
 	if (*pde != 0 && (*pde & PG_PS) == 0) {
 		pte = vtopte(addr);
 		rv = *pte == 0;
 	}
 	PMAP_UNLOCK(pmap);
 	return (rv);
 }
 
 /*
  *	pmap_is_referenced:
  *
  *	Return whether or not the specified physical page was referenced
  *	in any physical maps.
  */
 boolean_t
 pmap_is_referenced(vm_page_t m)
 {
 	boolean_t rv;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_is_referenced: page %p is not managed", m));
 	rw_wlock(&pvh_global_lock);
 	rv = pmap_is_referenced_pvh(&m->md) ||
 	    ((m->flags & PG_FICTITIOUS) == 0 &&
 	    pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m))));
 	rw_wunlock(&pvh_global_lock);
 	return (rv);
 }
 
 /*
  * Returns TRUE if any of the given mappings were referenced and FALSE
  * otherwise.  Both page and 4mpage mappings are supported.
  */
 static boolean_t
 pmap_is_referenced_pvh(struct md_page *pvh)
 {
 	pv_entry_t pv;
 	pt_entry_t *pte;
 	pmap_t pmap;
 	boolean_t rv;
 
 	rw_assert(&pvh_global_lock, RA_WLOCKED);
 	rv = FALSE;
 	sched_pin();
 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V);
 		PMAP_UNLOCK(pmap);
 		if (rv)
 			break;
 	}
 	sched_unpin();
 	return (rv);
 }
 
 /*
  * Clear the write and modified bits in each of the given page's mappings.
  */
 void
 pmap_remove_write(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t next_pv, pv;
 	pmap_t pmap;
 	pd_entry_t *pde;
 	pt_entry_t oldpte, *pte;
 	vm_offset_t va;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_remove_write: page %p is not managed", m));
 
 	/*
 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
 	 * set by another thread while the object is locked.  Thus,
 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
 	 */
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		goto small_mappings;
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, va);
 		if ((*pde & PG_RW) != 0)
 			(void)pmap_demote_pde(pmap, pde, va);
 		PMAP_UNLOCK(pmap);
 	}
 small_mappings:
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found"
 		    " a 4mpage in page %p's pv list", m));
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 retry:
 		oldpte = *pte;
 		if ((oldpte & PG_RW) != 0) {
 			/*
 			 * Regardless of whether a pte is 32 or 64 bits
 			 * in size, PG_RW and PG_M are among the least
 			 * significant 32 bits.
 			 */
 			if (!atomic_cmpset_int((u_int *)pte, oldpte,
 			    oldpte & ~(PG_RW | PG_M)))
 				goto retry;
 			if ((oldpte & PG_M) != 0)
 				vm_page_dirty(m);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	vm_page_aflag_clear(m, PGA_WRITEABLE);
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 }
 
 #define	PMAP_TS_REFERENCED_MAX	5
 
 /*
  *	pmap_ts_referenced:
  *
  *	Return a count of reference bits for a page, clearing those bits.
  *	It is not necessary for every reference bit to be cleared, but it
  *	is necessary that 0 only be returned when there are truly no
  *	reference bits set.
  *
  *	XXX: The exact number of bits to check and clear is a matter that
  *	should be tested and standardized at some point in the future for
  *	optimal aging of shared pages.
  */
 int
 pmap_ts_referenced(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t pv, pvf;
 	pmap_t pmap;
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	vm_paddr_t pa;
 	int rtval = 0;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_ts_referenced: page %p is not managed", m));
 	pa = VM_PAGE_TO_PHYS(m);
 	pvh = pa_to_pvh(pa);
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	if ((m->flags & PG_FICTITIOUS) != 0 ||
 	    (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
 		goto small_mappings;
 	pv = pvf;
 	do {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
 		if ((*pde & PG_A) != 0) {
 			/*
 			 * Since this reference bit is shared by either 1024
 			 * or 512 4KB pages, it should not be cleared every
 			 * time it is tested.  Apply a simple "hash" function
 			 * on the physical page number, the virtual superpage
 			 * number, and the pmap address to select one 4KB page
 			 * out of the 1024 or 512 on which testing the
 			 * reference bit will result in clearing that bit.
 			 * This function is designed to avoid the selection of
 			 * the same 4KB page for every 2- or 4MB page mapping.
 			 *
 			 * On demotion, a mapping that hasn't been referenced
 			 * is simply destroyed.  To avoid the possibility of a
 			 * subsequent page fault on a demoted wired mapping,
 			 * always leave its reference bit set.  Moreover,
 			 * since the superpage is wired, the current state of
 			 * its reference bit won't affect page replacement.
 			 */
 			if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PDRSHIFT) ^
 			    (uintptr_t)pmap) & (NPTEPG - 1)) == 0 &&
 			    (*pde & PG_W) == 0) {
 				atomic_clear_int((u_int *)pde, PG_A);
 				pmap_invalidate_page(pmap, pv->pv_va);
 			}
 			rtval++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
 		}
 		if (rtval >= PMAP_TS_REFERENCED_MAX)
 			goto out;
 	} while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
 small_mappings:
 	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
 		goto out;
 	pv = pvf;
 	do {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0,
 		    ("pmap_ts_referenced: found a 4mpage in page %p's pv list",
 		    m));
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		if ((*pte & PG_A) != 0) {
 			atomic_clear_int((u_int *)pte, PG_A);
 			pmap_invalidate_page(pmap, pv->pv_va);
 			rtval++;
 		}
 		PMAP_UNLOCK(pmap);
 		/* Rotate the PV list if it has more than one entry. */
 		if (TAILQ_NEXT(pv, pv_next) != NULL) {
 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
 		}
 	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && rtval <
 	    PMAP_TS_REFERENCED_MAX);
 out:
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 	return (rtval);
 }
 
 /*
  *	Apply the given advice to the specified range of addresses within the
  *	given pmap.  Depending on the advice, clear the referenced and/or
  *	modified flags in each mapping and set the mapped page's dirty field.
  */
 void
 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
 {
 	pd_entry_t oldpde, *pde;
 	pt_entry_t *pte;
 	vm_offset_t pdnxt;
 	vm_page_t m;
 	boolean_t anychanged, pv_lists_locked;
 
 	if (advice != MADV_DONTNEED && advice != MADV_FREE)
 		return;
 	if (pmap_is_current(pmap))
 		pv_lists_locked = FALSE;
 	else {
 		pv_lists_locked = TRUE;
 resume:
 		rw_wlock(&pvh_global_lock);
 		sched_pin();
 	}
 	anychanged = FALSE;
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = pdnxt) {
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
 		if (pdnxt < sva)
 			pdnxt = eva;
 		pde = pmap_pde(pmap, sva);
 		oldpde = *pde;
 		if ((oldpde & PG_V) == 0)
 			continue;
 		else if ((oldpde & PG_PS) != 0) {
 			if ((oldpde & PG_MANAGED) == 0)
 				continue;
 			if (!pv_lists_locked) {
 				pv_lists_locked = TRUE;
 				if (!rw_try_wlock(&pvh_global_lock)) {
 					if (anychanged)
 						pmap_invalidate_all(pmap);
 					PMAP_UNLOCK(pmap);
 					goto resume;
 				}
 				sched_pin();
 			}
 			if (!pmap_demote_pde(pmap, pde, sva)) {
 				/*
 				 * The large page mapping was destroyed.
 				 */
 				continue;
 			}
 
 			/*
 			 * Unless the page mappings are wired, remove the
 			 * mapping to a single page so that a subsequent
 			 * access may repromote.  Since the underlying page
 			 * table page is fully populated, this removal never
 			 * frees a page table page.
 			 */
 			if ((oldpde & PG_W) == 0) {
 				pte = pmap_pte_quick(pmap, sva);
 				KASSERT((*pte & PG_V) != 0,
 				    ("pmap_advise: invalid PTE"));
 				pmap_remove_pte(pmap, pte, sva, NULL);
 				anychanged = TRUE;
 			}
 		}
 		if (pdnxt > eva)
 			pdnxt = eva;
 		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
 		    sva += PAGE_SIZE) {
 			if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED |
 			    PG_V))
 				continue;
 			else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 				if (advice == MADV_DONTNEED) {
 					/*
 					 * Future calls to pmap_is_modified()
 					 * can be avoided by making the page
 					 * dirty now.
 					 */
 					m = PHYS_TO_VM_PAGE(*pte & PG_FRAME);
 					vm_page_dirty(m);
 				}
 				atomic_clear_int((u_int *)pte, PG_M | PG_A);
 			} else if ((*pte & PG_A) != 0)
 				atomic_clear_int((u_int *)pte, PG_A);
 			else
 				continue;
 			if ((*pte & PG_G) != 0)
 				pmap_invalidate_page(pmap, sva);
 			else
 				anychanged = TRUE;
 		}
 	}
 	if (anychanged)
 		pmap_invalidate_all(pmap);
 	if (pv_lists_locked) {
 		sched_unpin();
 		rw_wunlock(&pvh_global_lock);
 	}
 	PMAP_UNLOCK(pmap);
 }
 
 /*
  *	Clear the modify bits on the specified physical page.
  */
 void
 pmap_clear_modify(vm_page_t m)
 {
 	struct md_page *pvh;
 	pv_entry_t next_pv, pv;
 	pmap_t pmap;
 	pd_entry_t oldpde, *pde;
 	pt_entry_t oldpte, *pte;
 	vm_offset_t va;
 
 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
 	    ("pmap_clear_modify: page %p is not managed", m));
 	VM_OBJECT_ASSERT_WLOCKED(m->object);
 	KASSERT(!vm_page_xbusied(m),
 	    ("pmap_clear_modify: page %p is exclusive busied", m));
 
 	/*
 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
 	 * If the object containing the page is locked and the page is not
 	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
 	 */
 	if ((m->aflags & PGA_WRITEABLE) == 0)
 		return;
 	rw_wlock(&pvh_global_lock);
 	sched_pin();
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		goto small_mappings;
 	pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
 	TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
 		va = pv->pv_va;
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, va);
 		oldpde = *pde;
 		if ((oldpde & PG_RW) != 0) {
 			if (pmap_demote_pde(pmap, pde, va)) {
 				if ((oldpde & PG_W) == 0) {
 					/*
 					 * Write protect the mapping to a
 					 * single page so that a subsequent
 					 * write access may repromote.
 					 */
 					va += VM_PAGE_TO_PHYS(m) - (oldpde &
 					    PG_PS_FRAME);
 					pte = pmap_pte_quick(pmap, va);
 					oldpte = *pte;
 					if ((oldpte & PG_V) != 0) {
 						/*
 						 * Regardless of whether a pte is 32 or 64 bits
 						 * in size, PG_RW and PG_M are among the least
 						 * significant 32 bits.
 						 */
 						while (!atomic_cmpset_int((u_int *)pte,
 						    oldpte,
 						    oldpte & ~(PG_M | PG_RW)))
 							oldpte = *pte;
 						vm_page_dirty(m);
 						pmap_invalidate_page(pmap, va);
 					}
 				}
 			}
 		}
 		PMAP_UNLOCK(pmap);
 	}
 small_mappings:
 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pde = pmap_pde(pmap, pv->pv_va);
 		KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found"
 		    " a 4mpage in page %p's pv list", m));
 		pte = pmap_pte_quick(pmap, pv->pv_va);
 		if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 			/*
 			 * Regardless of whether a pte is 32 or 64 bits
 			 * in size, PG_M is among the least significant
 			 * 32 bits. 
 			 */
 			atomic_clear_int((u_int *)pte, PG_M);
 			pmap_invalidate_page(pmap, pv->pv_va);
 		}
 		PMAP_UNLOCK(pmap);
 	}
 	sched_unpin();
 	rw_wunlock(&pvh_global_lock);
 }
 
 /*
  * Miscellaneous support routines follow
  */
 
 /* Adjust the cache mode for a 4KB page mapped via a PTE. */
 static __inline void
 pmap_pte_attr(pt_entry_t *pte, int cache_bits)
 {
 	u_int opte, npte;
 
 	/*
 	 * The cache mode bits are all in the low 32-bits of the
 	 * PTE, so we can just spin on updating the low 32-bits.
 	 */
 	do {
 		opte = *(u_int *)pte;
 		npte = opte & ~PG_PTE_CACHE;
 		npte |= cache_bits;
 	} while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte));
 }
 
 /* Adjust the cache mode for a 2/4MB page mapped via a PDE. */
 static __inline void
 pmap_pde_attr(pd_entry_t *pde, int cache_bits)
 {
 	u_int opde, npde;
 
 	/*
 	 * The cache mode bits are all in the low 32-bits of the
 	 * PDE, so we can just spin on updating the low 32-bits.
 	 */
 	do {
 		opde = *(u_int *)pde;
 		npde = opde & ~PG_PDE_CACHE;
 		npde |= cache_bits;
 	} while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde));
 }
 
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
  * routine is intended to be used for mapping device memory,
  * NOT real memory.
  */
 void *
 pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
 {
 	struct pmap_preinit_mapping *ppim;
 	vm_offset_t va, offset;
 	vm_size_t tmpsize;
 	int i;
 
 	offset = pa & PAGE_MASK;
 	size = round_page(offset + size);
 	pa = pa & PG_FRAME;
 
 	if (pa < KERNLOAD && pa + size <= KERNLOAD)
 		va = KERNBASE + pa;
 	else if (!pmap_initialized) {
 		va = 0;
 		for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
 			ppim = pmap_preinit_mapping + i;
 			if (ppim->va == 0) {
 				ppim->pa = pa;
 				ppim->sz = size;
 				ppim->mode = mode;
 				ppim->va = virtual_avail;
 				virtual_avail += size;
 				va = ppim->va;
 				break;
 			}
 		}
 		if (va == 0)
 			panic("%s: too many preinit mappings", __func__);
 	} else {
 		/*
 		 * If we have a preinit mapping, re-use it.
 		 */
 		for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
 			ppim = pmap_preinit_mapping + i;
 			if (ppim->pa == pa && ppim->sz == size &&
 			    ppim->mode == mode)
 				return ((void *)(ppim->va + offset));
 		}
 		va = kva_alloc(size);
 		if (va == 0)
 			panic("%s: Couldn't allocate KVA", __func__);
 	}
 	for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE)
 		pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode);
 	pmap_invalidate_range(kernel_pmap, va, va + tmpsize);
 	pmap_invalidate_cache_range(va, va + size, FALSE);
 	return ((void *)(va + offset));
 }
 
 void *
 pmap_mapdev(vm_paddr_t pa, vm_size_t size)
 {
 
 	return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
 }
 
 void *
 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
 {
 
 	return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
 }
 
 void
 pmap_unmapdev(vm_offset_t va, vm_size_t size)
 {
 	struct pmap_preinit_mapping *ppim;
 	vm_offset_t offset;
 	int i;
 
 	if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
 		return;
 	offset = va & PAGE_MASK;
 	size = round_page(offset + size);
 	va = trunc_page(va);
 	for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
 		ppim = pmap_preinit_mapping + i;
 		if (ppim->va == va && ppim->sz == size) {
 			if (pmap_initialized)
 				return;
 			ppim->pa = 0;
 			ppim->va = 0;
 			ppim->sz = 0;
 			ppim->mode = 0;
 			if (va + size == virtual_avail)
 				virtual_avail = va;
 			return;
 		}
 	}
 	if (pmap_initialized)
 		kva_free(va, size);
 }
 
 /*
  * Sets the memory attribute for the specified page.
  */
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
 
 	m->md.pat_mode = ma;
 	if ((m->flags & PG_FICTITIOUS) != 0)
 		return;
 
 	/*
 	 * If "m" is a normal page, flush it from the cache.
 	 * See pmap_invalidate_cache_range().
 	 *
 	 * First, try to find an existing mapping of the page by sf
 	 * buffer. sf_buf_invalidate_cache() modifies mapping and
 	 * flushes the cache.
 	 */    
 	if (sf_buf_invalidate_cache(m))
 		return;
 
 	/*
 	 * If page is not mapped by sf buffer, but CPU does not
 	 * support self snoop, map the page transient and do
 	 * invalidation. In the worst case, whole cache is flushed by
 	 * pmap_invalidate_cache_range().
 	 */
 	if ((cpu_feature & CPUID_SS) == 0)
 		pmap_flush_page(m);
 }
 
 static void
 pmap_flush_page(vm_page_t m)
 {
 	struct sysmaps *sysmaps;
 	vm_offset_t sva, eva;
 	bool useclflushopt;
 
 	useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0;
 	if (useclflushopt || (cpu_feature & CPUID_CLFSH) != 0) {
 		sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 		mtx_lock(&sysmaps->lock);
 		if (*sysmaps->CMAP2)
 			panic("pmap_flush_page: CMAP2 busy");
 		sched_pin();
 		*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) |
 		    PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0);
 		invlcaddr(sysmaps->CADDR2);
 		sva = (vm_offset_t)sysmaps->CADDR2;
 		eva = sva + PAGE_SIZE;
 
 		/*
 		 * Use mfence despite the ordering implied by
 		 * mtx_{un,}lock() because clflush on non-Intel CPUs
 		 * and clflushopt are not guaranteed to be ordered by
 		 * any other instruction.
 		 */
 		if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL)
 			mfence();
 		for (; sva < eva; sva += cpu_clflush_line_size) {
 			if (useclflushopt)
 				clflushopt(sva);
 			else
 				clflush(sva);
 		}
 		if (useclflushopt || cpu_vendor_id != CPU_VENDOR_INTEL)
 			mfence();
 		*sysmaps->CMAP2 = 0;
 		sched_unpin();
 		mtx_unlock(&sysmaps->lock);
 	} else
 		pmap_invalidate_cache();
 }
 
 /*
  * Changes the specified virtual address range's memory type to that given by
  * the parameter "mode".  The specified virtual address range must be
  * completely contained within either the kernel map.
  *
  * Returns zero if the change completed successfully, and either EINVAL or
  * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
  * of the virtual address range was not mapped, and ENOMEM is returned if
  * there was insufficient memory available to complete the change.
  */
 int
 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
 {
 	vm_offset_t base, offset, tmpva;
 	pd_entry_t *pde;
 	pt_entry_t *pte;
 	int cache_bits_pte, cache_bits_pde;
 	boolean_t changed;
 
 	base = trunc_page(va);
 	offset = va & PAGE_MASK;
 	size = round_page(offset + size);
 
 	/*
 	 * Only supported on kernel virtual addresses above the recursive map.
 	 */
 	if (base < VM_MIN_KERNEL_ADDRESS)
 		return (EINVAL);
 
 	cache_bits_pde = pmap_cache_bits(mode, 1);
 	cache_bits_pte = pmap_cache_bits(mode, 0);
 	changed = FALSE;
 
 	/*
 	 * Pages that aren't mapped aren't supported.  Also break down
 	 * 2/4MB pages into 4KB pages if required.
 	 */
 	PMAP_LOCK(kernel_pmap);
 	for (tmpva = base; tmpva < base + size; ) {
 		pde = pmap_pde(kernel_pmap, tmpva);
 		if (*pde == 0) {
 			PMAP_UNLOCK(kernel_pmap);
 			return (EINVAL);
 		}
 		if (*pde & PG_PS) {
 			/*
 			 * If the current 2/4MB page already has
 			 * the required memory type, then we need not
 			 * demote this page.  Just increment tmpva to
 			 * the next 2/4MB page frame.
 			 */
 			if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
 				tmpva = trunc_4mpage(tmpva) + NBPDR;
 				continue;
 			}
 
 			/*
 			 * If the current offset aligns with a 2/4MB
 			 * page frame and there is at least 2/4MB left
 			 * within the range, then we need not break
 			 * down this page into 4KB pages.
 			 */
 			if ((tmpva & PDRMASK) == 0 &&
 			    tmpva + PDRMASK < base + size) {
 				tmpva += NBPDR;
 				continue;
 			}
 			if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) {
 				PMAP_UNLOCK(kernel_pmap);
 				return (ENOMEM);
 			}
 		}
 		pte = vtopte(tmpva);
 		if (*pte == 0) {
 			PMAP_UNLOCK(kernel_pmap);
 			return (EINVAL);
 		}
 		tmpva += PAGE_SIZE;
 	}
 	PMAP_UNLOCK(kernel_pmap);
 
 	/*
 	 * Ok, all the pages exist, so run through them updating their
 	 * cache mode if required.
 	 */
 	for (tmpva = base; tmpva < base + size; ) {
 		pde = pmap_pde(kernel_pmap, tmpva);
 		if (*pde & PG_PS) {
 			if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
 				pmap_pde_attr(pde, cache_bits_pde);
 				changed = TRUE;
 			}
 			tmpva = trunc_4mpage(tmpva) + NBPDR;
 		} else {
 			pte = vtopte(tmpva);
 			if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
 				pmap_pte_attr(pte, cache_bits_pte);
 				changed = TRUE;
 			}
 			tmpva += PAGE_SIZE;
 		}
 	}
 
 	/*
 	 * Flush CPU caches to make sure any data isn't cached that
 	 * shouldn't be, etc.
 	 */
 	if (changed) {
 		pmap_invalidate_range(kernel_pmap, base, tmpva);
 		pmap_invalidate_cache_range(base, tmpva, FALSE);
 	}
 	return (0);
 }
 
 /*
  * perform the pmap work for mincore
  */
 int
 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
 {
 	pd_entry_t *pdep;
 	pt_entry_t *ptep, pte;
 	vm_paddr_t pa;
 	int val;
 
 	PMAP_LOCK(pmap);
 retry:
 	pdep = pmap_pde(pmap, addr);
 	if (*pdep != 0) {
 		if (*pdep & PG_PS) {
 			pte = *pdep;
 			/* Compute the physical address of the 4KB page. */
 			pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) &
 			    PG_FRAME;
 			val = MINCORE_SUPER;
 		} else {
 			ptep = pmap_pte(pmap, addr);
 			pte = *ptep;
 			pmap_pte_release(ptep);
 			pa = pte & PG_FRAME;
 			val = 0;
 		}
 	} else {
 		pte = 0;
 		pa = 0;
 		val = 0;
 	}
 	if ((pte & PG_V) != 0) {
 		val |= MINCORE_INCORE;
 		if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW))
 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
 		if ((pte & PG_A) != 0)
 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
 	}
 	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
 	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) &&
 	    (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) {
 		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
 		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
 			goto retry;
 	} else
 		PA_UNLOCK_COND(*locked_pa);
 	PMAP_UNLOCK(pmap);
 	return (val);
 }
 
 void
 pmap_activate(struct thread *td)
 {
 	pmap_t	pmap, oldpmap;
 	u_int	cpuid;
 	u_int32_t  cr3;
 
 	critical_enter();
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 	cpuid = PCPU_GET(cpuid);
 #if defined(SMP)
 	CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
 	CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
 #else
 	CPU_CLR(cpuid, &oldpmap->pm_active);
 	CPU_SET(cpuid, &pmap->pm_active);
 #endif
 #if defined(PAE) || defined(PAE_TABLES)
 	cr3 = vtophys(pmap->pm_pdpt);
 #else
 	cr3 = vtophys(pmap->pm_pdir);
 #endif
 	/*
 	 * pmap_activate is for the current thread on the current cpu
 	 */
 	td->td_pcb->pcb_cr3 = cr3;
 	load_cr3(cr3);
 	PCPU_SET(curpmap, pmap);
 	critical_exit();
 }
 
 void
 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
 {
 }
 
 /*
  *	Increase the starting virtual address of the given mapping if a
  *	different alignment might result in more superpage mappings.
  */
 void
 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t size)
 {
 	vm_offset_t superpage_offset;
 
 	if (size < NBPDR)
 		return;
 	if (object != NULL && (object->flags & OBJ_COLORED) != 0)
 		offset += ptoa(object->pg_color);
 	superpage_offset = offset & PDRMASK;
 	if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR ||
 	    (*addr & PDRMASK) == superpage_offset)
 		return;
 	if ((*addr & PDRMASK) < superpage_offset)
 		*addr = (*addr & ~PDRMASK) + superpage_offset;
 	else
 		*addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset;
 }
 
 vm_offset_t
 pmap_quick_enter_page(vm_page_t m)
 {
 	vm_offset_t qaddr;
 	pt_entry_t *pte;
 
 	critical_enter();
 	qaddr = PCPU_GET(qmap_addr);
 	pte = vtopte(qaddr);
 
 	KASSERT(*pte == 0, ("pmap_quick_enter_page: PTE busy"));
 	*pte = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M |
 	    pmap_cache_bits(pmap_page_get_memattr(m), 0);
 	invlpg(qaddr);
 
 	return (qaddr);
 }
 
 void
 pmap_quick_remove_page(vm_offset_t addr)
 {
 	vm_offset_t qaddr;
 	pt_entry_t *pte;
 
 	qaddr = PCPU_GET(qmap_addr);
 	pte = vtopte(qaddr);
 
 	KASSERT(*pte != 0, ("pmap_quick_remove_page: PTE not in use"));
 	KASSERT(addr == qaddr, ("pmap_quick_remove_page: invalid address"));
 
 	*pte = 0;
 	critical_exit();
 }
 
 #if defined(PMAP_DEBUG)
 pmap_pid_dump(int pid)
 {
 	pmap_t pmap;
 	struct proc *p;
 	int npte = 0;
 	int index;
 
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_pid != pid)
 			continue;
 
 		if (p->p_vmspace) {
 			int i,j;
 			index = 0;
 			pmap = vmspace_pmap(p->p_vmspace);
 			for (i = 0; i < NPDEPTD; i++) {
 				pd_entry_t *pde;
 				pt_entry_t *pte;
 				vm_offset_t base = i << PDRSHIFT;
 				
 				pde = &pmap->pm_pdir[i];
 				if (pde && pmap_pde_v(pde)) {
 					for (j = 0; j < NPTEPG; j++) {
 						vm_offset_t va = base + (j << PAGE_SHIFT);
 						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
 							if (index) {
 								index = 0;
 								printf("\n");
 							}
 							sx_sunlock(&allproc_lock);
 							return (npte);
 						}
 						pte = pmap_pte(pmap, va);
 						if (pte && pmap_pte_v(pte)) {
 							pt_entry_t pa;
 							vm_page_t m;
 							pa = *pte;
 							m = PHYS_TO_VM_PAGE(pa & PG_FRAME);
 							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
 								va, pa, m->hold_count, m->wire_count, m->flags);
 							npte++;
 							index++;
 							if (index >= 2) {
 								index = 0;
 								printf("\n");
 							} else {
 								printf(" ");
 							}
 						}
 					}
 				}
 			}
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	return (npte);
 }
 #endif
Index: projects/clang390-import/sys/i386/i386/support.s
===================================================================
--- projects/clang390-import/sys/i386/i386/support.s	(revision 305016)
+++ projects/clang390-import/sys/i386/i386/support.s	(revision 305017)
@@ -1,834 +1,841 @@
 /*-
  * Copyright (c) 1993 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include "opt_npx.h"
 
 #include <machine/asmacros.h>
 #include <machine/cputypes.h>
 #include <machine/pmap.h>
 #include <machine/specialreg.h>
 
 #include "assym.s"
 
 #define IDXSHIFT	10
 
 	.text
 
 /*
  * bcopy family
  * void bzero(void *buf, u_int len)
  */
 ENTRY(bzero)
 	pushl	%edi
 	movl	8(%esp),%edi
 	movl	12(%esp),%ecx
 	xorl	%eax,%eax
 	shrl	$2,%ecx
 	cld
 	rep
 	stosl
 	movl	12(%esp),%ecx
 	andl	$3,%ecx
 	rep
 	stosb
 	popl	%edi
 	ret
 END(bzero)
 
 ENTRY(sse2_pagezero)
 	pushl	%ebx
 	movl	8(%esp),%ecx
 	movl	%ecx,%eax
 	addl	$4096,%eax
 	xor	%ebx,%ebx
+	jmp	1f
+	/*
+	 * The loop takes 14 bytes.  Ensure that it doesn't cross a 16-byte
+	 * cache line.
+	 */
+	.p2align 4,0x90
 1:
 	movnti	%ebx,(%ecx)
-	addl	$4,%ecx
+	movnti	%ebx,4(%ecx)
+	addl	$8,%ecx
 	cmpl	%ecx,%eax
 	jne	1b
 	sfence
 	popl	%ebx
 	ret
 END(sse2_pagezero)
 
 ENTRY(i686_pagezero)
 	pushl	%edi
 	pushl	%ebx
 
 	movl	12(%esp),%edi
 	movl	$1024,%ecx
 	cld
 
 	ALIGN_TEXT
 1:
 	xorl	%eax,%eax
 	repe
 	scasl
 	jnz	2f
 
 	popl	%ebx
 	popl	%edi
 	ret
 
 	ALIGN_TEXT
 
 2:
 	incl	%ecx
 	subl	$4,%edi
 
 	movl	%ecx,%edx
 	cmpl	$16,%ecx
 
 	jge	3f
 
 	movl	%edi,%ebx
 	andl	$0x3f,%ebx
 	shrl	%ebx
 	shrl	%ebx
 	movl	$16,%ecx
 	subl	%ebx,%ecx
 
 3:
 	subl	%ecx,%edx
 	rep
 	stosl
 
 	movl	%edx,%ecx
 	testl	%edx,%edx
 	jnz	1b
 
 	popl	%ebx
 	popl	%edi
 	ret
 END(i686_pagezero)
 
 /* fillw(pat, base, cnt) */
 ENTRY(fillw)
 	pushl	%edi
 	movl	8(%esp),%eax
 	movl	12(%esp),%edi
 	movl	16(%esp),%ecx
 	cld
 	rep
 	stosw
 	popl	%edi
 	ret
 END(fillw)
 
 ENTRY(bcopyb)
 	pushl	%esi
 	pushl	%edi
 	movl	12(%esp),%esi
 	movl	16(%esp),%edi
 	movl	20(%esp),%ecx
 	movl	%edi,%eax
 	subl	%esi,%eax
 	cmpl	%ecx,%eax			/* overlapping && src < dst? */
 	jb	1f
 	cld					/* nope, copy forwards */
 	rep
 	movsb
 	popl	%edi
 	popl	%esi
 	ret
 
 	ALIGN_TEXT
 1:
 	addl	%ecx,%edi			/* copy backwards. */
 	addl	%ecx,%esi
 	decl	%edi
 	decl	%esi
 	std
 	rep
 	movsb
 	popl	%edi
 	popl	%esi
 	cld
 	ret
 END(bcopyb)
 
 /*
  * bcopy(src, dst, cnt)
  *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
  */
 ENTRY(bcopy)
 	pushl	%ebp
 	movl	%esp,%ebp
 	pushl	%esi
 	pushl	%edi
 	movl	8(%ebp),%esi
 	movl	12(%ebp),%edi
 	movl	16(%ebp),%ecx
 
 	movl	%edi,%eax
 	subl	%esi,%eax
 	cmpl	%ecx,%eax			/* overlapping && src < dst? */
 	jb	1f
 
 	shrl	$2,%ecx				/* copy by 32-bit words */
 	cld					/* nope, copy forwards */
 	rep
 	movsl
 	movl	16(%ebp),%ecx
 	andl	$3,%ecx				/* any bytes left? */
 	rep
 	movsb
 	popl	%edi
 	popl	%esi
 	popl	%ebp
 	ret
 
 	ALIGN_TEXT
 1:
 	addl	%ecx,%edi			/* copy backwards */
 	addl	%ecx,%esi
 	decl	%edi
 	decl	%esi
 	andl	$3,%ecx				/* any fractional bytes? */
 	std
 	rep
 	movsb
 	movl	16(%ebp),%ecx			/* copy remainder by 32-bit words */
 	shrl	$2,%ecx
 	subl	$3,%esi
 	subl	$3,%edi
 	rep
 	movsl
 	popl	%edi
 	popl	%esi
 	cld
 	popl	%ebp
 	ret
 END(bcopy)
 
 /*
  * Note: memcpy does not support overlapping copies
  */
 ENTRY(memcpy)
 	pushl	%edi
 	pushl	%esi
 	movl	12(%esp),%edi
 	movl	16(%esp),%esi
 	movl	20(%esp),%ecx
 	movl	%edi,%eax
 	shrl	$2,%ecx				/* copy by 32-bit words */
 	cld					/* nope, copy forwards */
 	rep
 	movsl
 	movl	20(%esp),%ecx
 	andl	$3,%ecx				/* any bytes left? */
 	rep
 	movsb
 	popl	%esi
 	popl	%edi
 	ret
 END(memcpy)
 
 /*****************************************************************************/
 /* copyout and fubyte family                                                 */
 /*****************************************************************************/
 /*
  * Access user memory from inside the kernel. These routines and possibly
  * the math- and DOS emulators should be the only places that do this.
  *
  * We have to access the memory with user's permissions, so use a segment
  * selector with RPL 3. For writes to user space we have to additionally
  * check the PTE for write permission, because the 386 does not check
  * write permissions when we are executing with EPL 0. The 486 does check
  * this if the WP bit is set in CR0, so we can use a simpler version here.
  *
  * These routines set curpcb->pcb_onfault for the time they execute. When a
  * protection violation occurs inside the functions, the trap handler
  * returns to *curpcb->pcb_onfault instead of the function.
  */
 
 /*
  * copyout(from_kernel, to_user, len)  - MP SAFE
  */
 ENTRY(copyout)
 	movl	PCPU(CURPCB),%eax
 	movl	$copyout_fault,PCB_ONFAULT(%eax)
 	pushl	%esi
 	pushl	%edi
 	pushl	%ebx
 	movl	16(%esp),%esi
 	movl	20(%esp),%edi
 	movl	24(%esp),%ebx
 	testl	%ebx,%ebx			/* anything to do? */
 	jz	done_copyout
 
 	/*
 	 * Check explicitly for non-user addresses.  If 486 write protection
 	 * is being used, this check is essential because we are in kernel
 	 * mode so the h/w does not provide any protection against writing
 	 * kernel addresses.
 	 */
 
 	/*
 	 * First, prevent address wrapping.
 	 */
 	movl	%edi,%eax
 	addl	%ebx,%eax
 	jc	copyout_fault
 /*
  * XXX STOP USING VM_MAXUSER_ADDRESS.
  * It is an end address, not a max, so every time it is used correctly it
  * looks like there is an off by one error, and of course it caused an off
  * by one error in several places.
  */
 	cmpl	$VM_MAXUSER_ADDRESS,%eax
 	ja	copyout_fault
 
 	/* bcopy(%esi, %edi, %ebx) */
 	movl	%ebx,%ecx
 
 	shrl	$2,%ecx
 	cld
 	rep
 	movsl
 	movb	%bl,%cl
 	andb	$3,%cl
 	rep
 	movsb
 
 done_copyout:
 	popl	%ebx
 	popl	%edi
 	popl	%esi
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%edx
 	movl	%eax,PCB_ONFAULT(%edx)
 	ret
 END(copyout)
 
 	ALIGN_TEXT
 copyout_fault:
 	popl	%ebx
 	popl	%edi
 	popl	%esi
 	movl	PCPU(CURPCB),%edx
 	movl	$0,PCB_ONFAULT(%edx)
 	movl	$EFAULT,%eax
 	ret
 
 /*
  * copyin(from_user, to_kernel, len) - MP SAFE
  */
 ENTRY(copyin)
 	movl	PCPU(CURPCB),%eax
 	movl	$copyin_fault,PCB_ONFAULT(%eax)
 	pushl	%esi
 	pushl	%edi
 	movl	12(%esp),%esi			/* caddr_t from */
 	movl	16(%esp),%edi			/* caddr_t to */
 	movl	20(%esp),%ecx			/* size_t  len */
 
 	/*
 	 * make sure address is valid
 	 */
 	movl	%esi,%edx
 	addl	%ecx,%edx
 	jc	copyin_fault
 	cmpl	$VM_MAXUSER_ADDRESS,%edx
 	ja	copyin_fault
 
 	movb	%cl,%al
 	shrl	$2,%ecx				/* copy longword-wise */
 	cld
 	rep
 	movsl
 	movb	%al,%cl
 	andb	$3,%cl				/* copy remaining bytes */
 	rep
 	movsb
 
 	popl	%edi
 	popl	%esi
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%edx
 	movl	%eax,PCB_ONFAULT(%edx)
 	ret
 END(copyin)
 
 	ALIGN_TEXT
 copyin_fault:
 	popl	%edi
 	popl	%esi
 	movl	PCPU(CURPCB),%edx
 	movl	$0,PCB_ONFAULT(%edx)
 	movl	$EFAULT,%eax
 	ret
 
 /*
  * casueword.  Compare and set user word.  Returns -1 on fault,
  * 0 on non-faulting access.  The current value is in *oldp.
  */
 ALTENTRY(casueword32)
 ENTRY(casueword)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx			/* dst */
 	movl	8(%esp),%eax			/* old */
 	movl	16(%esp),%ecx			/* new */
 
 	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
 	ja	fusufault
 
 #ifdef SMP
 	lock
 #endif
 	cmpxchgl %ecx,(%edx)			/* Compare and set. */
 
 	/*
 	 * The old value is in %eax.  If the store succeeded it will be the
 	 * value we expected (old) from before the store, otherwise it will
 	 * be the current value.
 	 */
 
 	movl	PCPU(CURPCB),%ecx
 	movl	$0,PCB_ONFAULT(%ecx)
 	movl	12(%esp),%edx			/* oldp */
 	movl	%eax,(%edx)
 	xorl	%eax,%eax
 	ret
 END(casueword32)
 END(casueword)
 
 /*
  * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
  * memory.
  */
 
 ALTENTRY(fueword32)
 ENTRY(fueword)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx			/* from */
 
 	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address is valid */
 	ja	fusufault
 
 	movl	(%edx),%eax
 	movl	$0,PCB_ONFAULT(%ecx)
 	movl	8(%esp),%edx
 	movl	%eax,(%edx)
 	xorl	%eax,%eax
 	ret
 END(fueword32)
 END(fueword)
 
 /*
  * fuswintr() and suswintr() are specialized variants of fuword16() and
  * suword16(), respectively.  They are called from the profiling code,
  * potentially at interrupt time.  If they fail, that's okay; good things
  * will happen later.  They always fail for now, until the trap code is
  * able to deal with this.
  */
 ALTENTRY(suswintr)
 ENTRY(fuswintr)
 	movl	$-1,%eax
 	ret
 END(suswintr)
 END(fuswintr)
 
 ENTRY(fuword16)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-2,%edx
 	ja	fusufault
 
 	movzwl	(%edx),%eax
 	movl	$0,PCB_ONFAULT(%ecx)
 	ret
 END(fuword16)
 
 ENTRY(fubyte)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-1,%edx
 	ja	fusufault
 
 	movzbl	(%edx),%eax
 	movl	$0,PCB_ONFAULT(%ecx)
 	ret
 END(fubyte)
 
 	ALIGN_TEXT
 fusufault:
 	movl	PCPU(CURPCB),%ecx
 	xorl	%eax,%eax
 	movl	%eax,PCB_ONFAULT(%ecx)
 	decl	%eax
 	ret
 
 /*
  * Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory.
  * All these functions are MPSAFE.
  */
 
 ALTENTRY(suword32)
 ENTRY(suword)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-4,%edx	/* verify address validity */
 	ja	fusufault
 
 	movl	8(%esp),%eax
 	movl	%eax,(%edx)
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%ecx
 	movl	%eax,PCB_ONFAULT(%ecx)
 	ret
 END(suword32)
 END(suword)
 
 ENTRY(suword16)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-2,%edx	/* verify address validity */
 	ja	fusufault
 
 	movw	8(%esp),%ax
 	movw	%ax,(%edx)
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
 	movl	%eax,PCB_ONFAULT(%ecx)
 	ret
 END(suword16)
 
 ENTRY(subyte)
 	movl	PCPU(CURPCB),%ecx
 	movl	$fusufault,PCB_ONFAULT(%ecx)
 	movl	4(%esp),%edx
 
 	cmpl	$VM_MAXUSER_ADDRESS-1,%edx	/* verify address validity */
 	ja	fusufault
 
 	movb	8(%esp),%al
 	movb	%al,(%edx)
 	xorl	%eax,%eax
 	movl	PCPU(CURPCB),%ecx		/* restore trashed register */
 	movl	%eax,PCB_ONFAULT(%ecx)
 	ret
 END(subyte)
 
 /*
  * copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
  *
  *	copy a string from from to to, stop when a 0 character is reached.
  *	return ENAMETOOLONG if string is longer than maxlen, and
  *	EFAULT on protection violations. If lencopied is non-zero,
  *	return the actual length in *lencopied.
  */
 ENTRY(copyinstr)
 	pushl	%esi
 	pushl	%edi
 	movl	PCPU(CURPCB),%ecx
 	movl	$cpystrflt,PCB_ONFAULT(%ecx)
 
 	movl	12(%esp),%esi			/* %esi = from */
 	movl	16(%esp),%edi			/* %edi = to */
 	movl	20(%esp),%edx			/* %edx = maxlen */
 
 	movl	$VM_MAXUSER_ADDRESS,%eax
 
 	/* make sure 'from' is within bounds */
 	subl	%esi,%eax
 	jbe	cpystrflt
 
 	/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
 	cmpl	%edx,%eax
 	jae	1f
 	movl	%eax,%edx
 	movl	%eax,20(%esp)
 1:
 	incl	%edx
 	cld
 
 2:
 	decl	%edx
 	jz	3f
 
 	lodsb
 	stosb
 	orb	%al,%al
 	jnz	2b
 
 	/* Success -- 0 byte reached */
 	decl	%edx
 	xorl	%eax,%eax
 	jmp	cpystrflt_x
 3:
 	/* edx is zero - return ENAMETOOLONG or EFAULT */
 	cmpl	$VM_MAXUSER_ADDRESS,%esi
 	jae	cpystrflt
 4:
 	movl	$ENAMETOOLONG,%eax
 	jmp	cpystrflt_x
 
 cpystrflt:
 	movl	$EFAULT,%eax
 
 cpystrflt_x:
 	/* set *lencopied and return %eax */
 	movl	PCPU(CURPCB),%ecx
 	movl	$0,PCB_ONFAULT(%ecx)
 	movl	20(%esp),%ecx
 	subl	%edx,%ecx
 	movl	24(%esp),%edx
 	testl	%edx,%edx
 	jz	1f
 	movl	%ecx,(%edx)
 1:
 	popl	%edi
 	popl	%esi
 	ret
 END(copyinstr)
 
 /*
  * copystr(from, to, maxlen, int *lencopied) - MP SAFE
  */
 ENTRY(copystr)
 	pushl	%esi
 	pushl	%edi
 
 	movl	12(%esp),%esi			/* %esi = from */
 	movl	16(%esp),%edi			/* %edi = to */
 	movl	20(%esp),%edx			/* %edx = maxlen */
 	incl	%edx
 	cld
 1:
 	decl	%edx
 	jz	4f
 	lodsb
 	stosb
 	orb	%al,%al
 	jnz	1b
 
 	/* Success -- 0 byte reached */
 	decl	%edx
 	xorl	%eax,%eax
 	jmp	6f
 4:
 	/* edx is zero -- return ENAMETOOLONG */
 	movl	$ENAMETOOLONG,%eax
 
 6:
 	/* set *lencopied and return %eax */
 	movl	20(%esp),%ecx
 	subl	%edx,%ecx
 	movl	24(%esp),%edx
 	testl	%edx,%edx
 	jz	7f
 	movl	%ecx,(%edx)
 7:
 	popl	%edi
 	popl	%esi
 	ret
 END(copystr)
 
 ENTRY(bcmp)
 	pushl	%edi
 	pushl	%esi
 	movl	12(%esp),%edi
 	movl	16(%esp),%esi
 	movl	20(%esp),%edx
 
 	movl	%edx,%ecx
 	shrl	$2,%ecx
 	cld					/* compare forwards */
 	repe
 	cmpsl
 	jne	1f
 
 	movl	%edx,%ecx
 	andl	$3,%ecx
 	repe
 	cmpsb
 1:
 	setne	%al
 	movsbl	%al,%eax
 	popl	%esi
 	popl	%edi
 	ret
 END(bcmp)
 
 /*
  * Handling of special 386 registers and descriptor tables etc
  */
 /* void lgdt(struct region_descriptor *rdp); */
 ENTRY(lgdt)
 	/* reload the descriptor table */
 	movl	4(%esp),%eax
 	lgdt	(%eax)
 
 	/* flush the prefetch q */
 	jmp	1f
 	nop
 1:
 	/* reload "stale" selectors */
 	movl	$KDSEL,%eax
 	movl	%eax,%ds
 	movl	%eax,%es
 	movl	%eax,%gs
 	movl	%eax,%ss
 	movl	$KPSEL,%eax
 	movl	%eax,%fs
 
 	/* reload code selector by turning return into intersegmental return */
 	movl	(%esp),%eax
 	pushl	%eax
 	movl	$KCSEL,4(%esp)
 	MEXITCOUNT
 	lret
 END(lgdt)
 
 /* ssdtosd(*ssdp,*sdp) */
 ENTRY(ssdtosd)
 	pushl	%ebx
 	movl	8(%esp),%ecx
 	movl	8(%ecx),%ebx
 	shll	$16,%ebx
 	movl	(%ecx),%edx
 	roll	$16,%edx
 	movb	%dh,%bl
 	movb	%dl,%bh
 	rorl	$8,%ebx
 	movl	4(%ecx),%eax
 	movw	%ax,%dx
 	andl	$0xf0000,%eax
 	orl	%eax,%ebx
 	movl	12(%esp),%ecx
 	movl	%edx,(%ecx)
 	movl	%ebx,4(%ecx)
 	popl	%ebx
 	ret
 END(ssdtosd)
 
 /* void reset_dbregs() */
 ENTRY(reset_dbregs)
 	movl	$0,%eax
 	movl	%eax,%dr7	/* disable all breakpoints first */
 	movl	%eax,%dr0
 	movl	%eax,%dr1
 	movl	%eax,%dr2
 	movl	%eax,%dr3
 	movl	%eax,%dr6
 	ret
 END(reset_dbregs)
 
 /*****************************************************************************/
 /* setjump, longjump                                                         */
 /*****************************************************************************/
 
 ENTRY(setjmp)
 	movl	4(%esp),%eax
 	movl	%ebx,(%eax)			/* save ebx */
 	movl	%esp,4(%eax)			/* save esp */
 	movl	%ebp,8(%eax)			/* save ebp */
 	movl	%esi,12(%eax)			/* save esi */
 	movl	%edi,16(%eax)			/* save edi */
 	movl	(%esp),%edx			/* get rta */
 	movl	%edx,20(%eax)			/* save eip */
 	xorl	%eax,%eax			/* return(0); */
 	ret
 END(setjmp)
 
 ENTRY(longjmp)
 	movl	4(%esp),%eax
 	movl	(%eax),%ebx			/* restore ebx */
 	movl	4(%eax),%esp			/* restore esp */
 	movl	8(%eax),%ebp			/* restore ebp */
 	movl	12(%eax),%esi			/* restore esi */
 	movl	16(%eax),%edi			/* restore edi */
 	movl	20(%eax),%edx			/* get rta */
 	movl	%edx,(%esp)			/* put in return frame */
 	xorl	%eax,%eax			/* return(1); */
 	incl	%eax
 	ret
 END(longjmp)
 
 /*
  * Support for reading MSRs in the safe manner.
  */
 ENTRY(rdmsr_safe)
 /* int rdmsr_safe(u_int msr, uint64_t *data) */
 	movl	PCPU(CURPCB),%ecx
 	movl	$msr_onfault,PCB_ONFAULT(%ecx)
 
 	movl	4(%esp),%ecx
 	rdmsr
 	movl	8(%esp),%ecx
 	movl	%eax,(%ecx)
 	movl	%edx,4(%ecx)
 	xorl	%eax,%eax
 
 	movl	PCPU(CURPCB),%ecx
 	movl	%eax,PCB_ONFAULT(%ecx)
 
 	ret
 
 /*
  * Support for writing MSRs in the safe manner.
  */
 ENTRY(wrmsr_safe)
 /* int wrmsr_safe(u_int msr, uint64_t data) */
 	movl	PCPU(CURPCB),%ecx
 	movl	$msr_onfault,PCB_ONFAULT(%ecx)
 
 	movl	4(%esp),%ecx
 	movl	8(%esp),%eax
 	movl	12(%esp),%edx
 	wrmsr
 	xorl	%eax,%eax
 
 	movl	PCPU(CURPCB),%ecx
 	movl	%eax,PCB_ONFAULT(%ecx)
 
 	ret
 
 /*
  * MSR operations fault handler
  */
 	ALIGN_TEXT
 msr_onfault:
 	movl	PCPU(CURPCB),%ecx
 	movl	$0,PCB_ONFAULT(%ecx)
 	movl	$EFAULT,%eax
 	ret
Index: projects/clang390-import/sys/modules/cloudabi32/Makefile
===================================================================
--- projects/clang390-import/sys/modules/cloudabi32/Makefile	(revision 305016)
+++ projects/clang390-import/sys/modules/cloudabi32/Makefile	(revision 305017)
@@ -1,37 +1,38 @@
 # $FreeBSD$
 
 SYSDIR?=${.CURDIR}/../..
 
 .PATH: ${SYSDIR}/compat/cloudabi32
 .PATH: ${SYSDIR}/${MACHINE_CPUARCH}/cloudabi32
+.PATH: ${SYSDIR}/${MACHINE}/cloudabi32
 
 KMOD=	cloudabi32
 SRCS=	cloudabi32_fd.c cloudabi32_module.c cloudabi32_poll.c \
 	cloudabi32_sock.c cloudabi32_syscalls.c cloudabi32_sysent.c \
 	cloudabi32_sysvec.c cloudabi32_thread.c
 
 OBJS=	cloudabi32_vdso_blob.o
 CLEANFILES=cloudabi32_vdso.o
 
 .if ${MACHINE_CPUARCH} == "i386"
 VDSO_SRCS=${SYSDIR}/contrib/cloudabi/cloudabi_vdso_i686.S
 OUTPUT_TARGET=elf32-i386-freebsd
 BINARY_ARCHITECTURE=aarch32
 .elif ${MACHINE_CPUARCH} == "amd64"
 VDSO_SRCS=${SYSDIR}/contrib/cloudabi/cloudabi_vdso_i686_on_64bit.S
 OUTPUT_TARGET=elf64-x86-64-freebsd
 BINARY_ARCHITECTURE=i386
 .endif
 
 cloudabi32_vdso.o: ${VDSO_SRCS}
 	${CC} -x assembler-with-cpp -m32 -shared -nostdinc -nostdlib \
 	    -Wl,-T${SYSDIR}/compat/cloudabi/cloudabi_vdso.lds \
 	    ${VDSO_SRCS} -o ${.TARGET}
 
 cloudabi32_vdso_blob.o: cloudabi32_vdso.o
 	${OBJCOPY} --input-target binary \
 	    --output-target ${OUTPUT_TARGET} \
 	    --binary-architecture ${BINARY_ARCHITECTURE} \
 	    cloudabi32_vdso.o ${.TARGET}
 
 .include <bsd.kmod.mk>
Index: projects/clang390-import/sys/modules/cloudabi64/Makefile
===================================================================
--- projects/clang390-import/sys/modules/cloudabi64/Makefile	(revision 305016)
+++ projects/clang390-import/sys/modules/cloudabi64/Makefile	(revision 305017)
@@ -1,37 +1,38 @@
 # $FreeBSD$
 
 SYSDIR?=${.CURDIR}/../..
 
 .PATH: ${SYSDIR}/compat/cloudabi64
 .PATH: ${SYSDIR}/${MACHINE_CPUARCH}/cloudabi64
+.PATH: ${SYSDIR}/${MACHINE}/cloudabi64
 
 KMOD=	cloudabi64
 SRCS=	cloudabi64_fd.c cloudabi64_module.c cloudabi64_poll.c \
 	cloudabi64_sock.c cloudabi64_syscalls.c cloudabi64_sysent.c \
 	cloudabi64_sysvec.c cloudabi64_thread.c
 
 OBJS=	cloudabi64_vdso_blob.o
 CLEANFILES=cloudabi64_vdso.o
 
 .if ${MACHINE_CPUARCH} == "aarch64"
 VDSO_SRCS=${SYSDIR}/contrib/cloudabi/cloudabi_vdso_aarch64.S
 OUTPUT_TARGET=elf64-littleaarch64
 BINARY_ARCHITECTURE=aarch64
 .elif ${MACHINE_CPUARCH} == "amd64"
 VDSO_SRCS=${SYSDIR}/contrib/cloudabi/cloudabi_vdso_x86_64.S
 OUTPUT_TARGET=elf64-x86-64-freebsd
 BINARY_ARCHITECTURE=i386
 .endif
 
 cloudabi64_vdso.o: ${VDSO_SRCS}
 	${CC} -x assembler-with-cpp -shared -nostdinc -nostdlib \
 	    -Wl,-T${SYSDIR}/compat/cloudabi/cloudabi_vdso.lds \
 	    ${VDSO_SRCS} -o ${.TARGET}
 
 cloudabi64_vdso_blob.o: cloudabi64_vdso.o
 	${OBJCOPY} --input-target binary \
 	    --output-target ${OUTPUT_TARGET} \
 	    --binary-architecture ${BINARY_ARCHITECTURE} \
 	    cloudabi64_vdso.o ${.TARGET}
 
 .include <bsd.kmod.mk>
Index: projects/clang390-import/usr.sbin/newsyslog/tests/legacy_test.sh
===================================================================
--- projects/clang390-import/usr.sbin/newsyslog/tests/legacy_test.sh	(revision 305016)
+++ projects/clang390-import/usr.sbin/newsyslog/tests/legacy_test.sh	(revision 305017)
@@ -1,444 +1,457 @@
 #!/bin/sh
 
 # $FreeBSD$
 
 COUNT=0
 TMPDIR=$(pwd)/work
 if [ $? -ne 0 ]; then
         echo "$0: Can't create temp dir, exiting..."
         exit 1
 fi
 
 # Begin an individual test
 begin()
 {
 	COUNT=`expr $COUNT + 1`
 	OK=1
 	NAME="$1"
 }
 
 # End an individual test
 end()
 {
+	local message
+
 	if [ $OK = 1 ]
 	then
-		printf 'ok '
+		message='ok '
 	else
-		printf 'not ok '
+		message='not ok '
 	fi
-	echo "$COUNT - $NAME"
+
+	message="$message $COUNT - $NAME"
+	if [ -n "$TODO" ]
+	then
+		message="$message # TODO $TODO"
+	fi
+
+	echo "$message"
 }
 
 # Make a file that can later be verified
 mkf()
 {
 	CN=`basename $1`
 	echo "$CN-$CN" >$1
 }
 
 # Verify that the file specified is correct
 ckf()
 {
 	if [ -f $2 ] && echo "$1-$1" | diff - $2 >/dev/null
 	then
 		ok
 	else
 		notok
 	fi
 }
 
 # Check that a file exists
 ckfe()
 {
 	if [ -f $1 ]
 	then
 		ok
 	else
 		notok
 	fi
 }
 
 # Verify that the specified file does not exist
 # (is not there)
 cknt()
 {
 	if [ -r $1 ]
 	then
 		notok
 	else
 		ok
 	fi
 }
 
 # Check if a file is there, depending of if it's supposed to or not -
 # basically how many log files we are supposed to keep vs. how many we
 # actually keep.
 ckntfe()
 {
 	curcnt=$1
 	keepcnt=$2
 	f=$3
 
 	if [ $curcnt -le $keepcnt ]
 	then
 		#echo Assuming file there
 		ckfe $f
 	else
 		#echo Assuming file NOT there
 		cknt $f
 	fi
 }
 
 
 
 # A part of a test succeeds
 ok()
 {
 	:
 }
 
 # A part of a test fails
 notok()
 {
 	OK=0
 }
 
 # Verify that the exit code passed is for unsuccessful termination
 ckfail()
 {
 	if [ $1 -gt 0 ]
 	then
 		ok
 	else
 		notok
 	fi
 }
 
 # Verify that the exit code passed is for successful termination
 ckok()
 {
 	if [ $1 -eq 0 ]
 	then
 		ok
 	else
 		notok
 	fi
 }
 
 # Check that there are X files which match expr
 chkfcnt()
 {
 	cnt=$1; shift
 	if [ $cnt -eq `echo "$@" | wc -w` ]
 	then
 		ok
 	else
 		notok
 	fi
 }
 
 # Check that two strings are alike
 ckstr()
 {
 	if [ "$1" = "$2" ]
 	then
 		ok
 	else
 		notok
 	fi
 }
 
 tmpdir_create()
 {
 	mkdir -p ${TMPDIR}/log ${TMPDIR}/alog
 	cd ${TMPDIR}/log
 }
 
 tmpdir_clean()
 {
 	cd ${TMPDIR}
 	rm -rf "${TMPDIR}/log" "${TMPDIR}/alog" newsyslog.conf
 }
 
 run_newsyslog()
 {
 
 	newsyslog -f ../newsyslog.conf -F -r "$@"
 }
 
 tests_normal_rotate() {
 	ext="$1"
 	dir="$2"
 
 	if [ -n "$dir" ]; then
 		newsyslog_args=" -a ${dir}"
 		name_postfix="${ext} archive dir"
 	else
 		newsyslog_args=""
 		name_postfix="${ext}"
 	fi
 
 	tmpdir_create
 
 	begin "create file ${name_postfix}" -newdir
 	run_newsyslog -C
 	ckfe $LOGFNAME
 	cknt ${dir}${LOGFNAME}.0${ext}
 	end
 
 	begin "rotate normal 1 ${name_postfix}"
 	run_newsyslog $newsyslog_args
 	ckfe ${LOGFNAME}
 	ckfe ${dir}${LOGFNAME}.0${ext}
 	cknt ${dir}${LOGFNAME}.1${ext}
 	end
 
 	begin "rotate normal 2 ${name_postfix}"
 	run_newsyslog $newsyslog_args
 	ckfe ${LOGFNAME}
 	ckfe ${dir}${LOGFNAME}.0${ext}
 	ckfe ${dir}${LOGFNAME}.1${ext}
 	cknt ${dir}${LOGFNAME}.2${ext}
 	end
 
 	begin "rotate normal 3 ${name_postfix}"
 	run_newsyslog $newsyslog_args
 	ckfe ${LOGFNAME}
 	ckfe ${dir}${LOGFNAME}.0${ext}
 	ckfe ${dir}${LOGFNAME}.1${ext}
 	ckfe ${dir}${LOGFNAME}.2${ext}
 	cknt ${dir}${LOGFNAME}.3${ext}
 	end
 
 	begin "rotate normal 4 ${name_postfix}"
 	run_newsyslog $newsyslog_args
 	ckfe ${LOGFNAME}
 	ckfe ${dir}${LOGFNAME}.0${ext}
 	ckfe ${dir}${LOGFNAME}.1${ext}
 	ckfe ${dir}${LOGFNAME}.2${ext}
 	cknt ${dir}${LOGFNAME}.4${ext}
 	end
 
 	begin "rotate normal 5 ${name_postfix}"
 	run_newsyslog $newsyslog_args
 	ckfe ${LOGFNAME}
 	ckfe ${dir}${LOGFNAME}.0${ext}
 	ckfe ${dir}${LOGFNAME}.1${ext}
 	ckfe ${dir}${LOGFNAME}.2${ext}
 	cknt ${dir}${LOGFNAME}.4${ext}
 	end
 
 	# Wait a bit so we can see if the noaction test rotates files
 	sleep 1.1
 
 	begin "noaction ${name_postfix}"
 	ofiles=`ls -Tl ${dir}${LOGFNAME}.*${ext} | tr -d '\n'`
 	run_newsyslog ${newsyslog_args} -n >/dev/null
 	ckfe ${LOGFNAME}
 	ckstr "$ofiles" "`ls -lT ${dir}${LOGFNAME}.*${ext} | tr -d '\n'`"
 	end
 
 	tmpdir_clean
 }
 
 tests_normal_rotate_keepn() {
 	cnt="$1"
 	ext="$2"
 	dir="$3"
 
 	if [ -n "$dir" ]; then
 		newsyslog_args=" -a ${dir}"
 		name_postfix="${ext} archive dir"
 	else
 		newsyslog_args=""
 		name_postfix="${ext}"
 	fi
 
 	tmpdir_create
 
 	begin "create file ${name_postfix}" -newdir
 	run_newsyslog -C
 	ckfe $LOGFNAME
 	cknt ${dir}${LOGFNAME}.0${ext}
 	end
 
 	begin "rotate normal 1 cnt=$cnt ${name_postfix}"
 	run_newsyslog $newsyslog_args
 	ckfe ${LOGFNAME}
 	ckntfe 1 $cnt ${dir}${LOGFNAME}.0${ext}
 	cknt ${dir}${LOGFNAME}.1${ext}
 	end
 
 	begin "rotate normal 2 cnt=$cnt ${name_postfix}"
 	run_newsyslog $newsyslog_args
 	ckfe ${LOGFNAME}
 	ckntfe 1 $cnt ${dir}${LOGFNAME}.0${ext}
 	ckntfe 2 $cnt ${dir}${LOGFNAME}.1${ext}
 	cknt ${dir}${LOGFNAME}.2${ext}
 	end
 
 	begin "rotate normal 3 cnt=$cnt ${name_postfix}"
 	run_newsyslog $newsyslog_args
 	ckfe ${LOGFNAME}
 	ckntfe 1 $cnt ${dir}${LOGFNAME}.0${ext}
 	ckntfe 2 $cnt ${dir}${LOGFNAME}.1${ext}
 	ckntfe 3 $cnt ${dir}${LOGFNAME}.2${ext}
 	cknt ${dir}${LOGFNAME}.3${ext}
 	end
 
 	begin "rotate normal 3 cnt=$cnt ${name_postfix}"
 	run_newsyslog $newsyslog_args
 	ckfe ${LOGFNAME}
 	ckntfe 1 $cnt ${dir}${LOGFNAME}.0${ext}
 	ckntfe 2 $cnt ${dir}${LOGFNAME}.1${ext}
 	ckntfe 3 $cnt ${dir}${LOGFNAME}.2${ext}
 	ckntfe 4 $cnt ${dir}${LOGFNAME}.3${ext}
 	cknt ${dir}${LOGFNAME}.4${ext}
 	end
 
 	# Wait a bit so we can see if the noaction test rotates files
 	sleep 1.1
 
 	begin "noaction ${name_postfix}"
 	osum=`md5 ${dir}${LOGFNAME} | tr -d '\n'`
 	run_newsyslog ${newsyslog_args} -n >/dev/null
 	ckfe ${LOGFNAME}
 	ckstr "$osum" "`md5 ${dir}${LOGFNAME} | tr -d '\n'`"
 	end
 
 	tmpdir_clean
 }
 
 tests_time_rotate() {
 	ext="$1"
 	dir="$2"
 
 	if [ -n "$dir" ]; then
 		newsyslog_args="-t DEFAULT -a ${dir}"
 		name_postfix="${ext} archive dir"
 	else
 		newsyslog_args="-t DEFAULT"
 		name_postfix="${ext}"
 	fi
 
 	tmpdir_create
 
 	begin "create file ${name_postfix}" -newdir
 	run_newsyslog -C ${newsyslog_args}
 	ckfe ${LOGFNAME}
 	end
 
 	begin "rotate time 1 ${name_postfix}"
 	run_newsyslog ${newsyslog_args}
 	ckfe ${LOGFNAME}
 	chkfcnt 1 ${dir}${LOGFNAME}.*${ext}
 	end
 
 	sleep 1.1
 
+	(
+	TODO="rotate time 2-4 fail today; bug 212160"
+
 	begin "rotate time 2 ${name_postfix}"
 	run_newsyslog ${newsyslog_args}
 	ckfe ${LOGFNAME}
 	chkfcnt 2 ${dir}${LOGFNAME}.*${ext}
 	end
 
 	sleep 1.1
 
 	begin "rotate time 3 ${name_postfix}"
 	run_newsyslog ${newsyslog_args}
 	ckfe ${LOGFNAME}
 	chkfcnt 3 ${dir}${LOGFNAME}.*${ext}
 	end
 
 	sleep 1.1
 
 	begin "rotate time 4 ${name_postfix}"
 	run_newsyslog ${newsyslog_args}
 	ckfe ${LOGFNAME}
 	chkfcnt 3 ${dir}${LOGFNAME}.*${ext}
 	end
+	)
 
 	begin "noaction ${name_postfix}"
 	ofiles=`ls -1 ${dir}${LOGFNAME}.*${ext} | tr -d '\n'`
 	run_newsyslog ${newsyslog_args} -n >/dev/null
 	ckfe ${LOGFNAME}
 	ckstr "$ofiles" "`ls -1 ${dir}${LOGFNAME}.*${ext} | tr -d '\n'`"
 	end
 
 	tmpdir_clean
 }
 
 echo 1..126
 mkdir -p ${TMPDIR}
 cd ${TMPDIR}
 
 LOGFNAME=foo.log
 LOGFPATH=${TMPDIR}/log/${LOGFNAME}
 
 # Normal, no archive dir, keep X files
 echo "$LOGFPATH	640  0	   *	@T00  NC" > newsyslog.conf
 tests_normal_rotate_keepn 0
 
 echo "$LOGFPATH	640  1	   *	@T00  NC" > newsyslog.conf
 tests_normal_rotate_keepn 1
 
 echo "$LOGFPATH	640  2	   *	@T00  NC" > newsyslog.conf
 tests_normal_rotate_keepn 2
 
 echo "$LOGFPATH	640  3	   *	@T00  NC" > newsyslog.conf
 tests_normal_rotate_keepn 3
 
 # Normal, no archive dir, keep X files, gz
 echo "$LOGFPATH	640  0	   *	@T00  NCZ" > newsyslog.conf
 tests_normal_rotate_keepn 0 ".gz"
 
 echo "$LOGFPATH	640  1	   *	@T00  NCZ" > newsyslog.conf
 tests_normal_rotate_keepn 1 ".gz"
 
 echo "$LOGFPATH	640  2	   *	@T00  NCZ" > newsyslog.conf
 tests_normal_rotate_keepn 2 ".gz"
 
 echo "$LOGFPATH	640  3	   *	@T00  NCZ" > newsyslog.conf
 tests_normal_rotate_keepn 3 ".gz"
 
 # Normal, no archive dir
 echo "$LOGFPATH	640  3	   *	@T00  NC" > newsyslog.conf
 tests_normal_rotate
 
 echo "$LOGFPATH	640  3	   *	@T00  NCZ" > newsyslog.conf
 tests_normal_rotate ".gz"
 
 echo "$LOGFPATH	640  3	   *	@T00  NCJ" > newsyslog.conf
 tests_normal_rotate ".bz2"
 
 # Normal, archive dir
 echo "$LOGFPATH	640  3	   *	@T00  NC" > newsyslog.conf
 tests_normal_rotate "" "${TMPDIR}/alog/"
 
 echo "$LOGFPATH	640  3	   *	@T00  NCZ" > newsyslog.conf
 tests_normal_rotate ".gz" "${TMPDIR}/alog/"
 
 echo "$LOGFPATH	640  3	   *	@T00  NCJ" > newsyslog.conf
 tests_normal_rotate ".bz2" "${TMPDIR}/alog/"
 
 # Time based, no archive dir
 echo "$LOGFPATH	640  3	   *	@T00  NC" > newsyslog.conf
 tests_time_rotate
 
 echo "$LOGFPATH	640  3	   *	@T00  NCZ" > newsyslog.conf
 tests_time_rotate "gz" ""
 
 echo "$LOGFPATH	640  3	   *	@T00  NCJ" > newsyslog.conf
 tests_time_rotate "bz2" ""
 
 # Time based, archive dir
 echo "$LOGFPATH	640  3	   *	@T00  NC" > newsyslog.conf
 tests_time_rotate "" "${TMPDIR}/alog/"
 
 echo "$LOGFPATH	640  3	   *	@T00  NCZ" > newsyslog.conf
 tests_time_rotate "gz" "${TMPDIR}/alog/"
 
 echo "$LOGFPATH	640  3	   *	@T00  NCJ" > newsyslog.conf
 tests_time_rotate "bz2" "${TMPDIR}/alog/"
 
 rm -rf "${TMPDIR}"
Index: projects/clang390-import
===================================================================
--- projects/clang390-import	(revision 305016)
+++ projects/clang390-import	(revision 305017)

Property changes on: projects/clang390-import
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r304965-305016